1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/sysmacros.h> 30 #include <sys/debug.h> 31 #include <sys/cmn_err.h> 32 33 #include <sys/stropts.h> 34 #include <sys/socket.h> 35 #include <sys/socketvar.h> 36 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/sockio.h> 40 #include <sys/kmem_impl.h> 41 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/ddi.h> 45 #include <netinet/in.h> 46 #include <inet/ip.h> 47 48 #include <fs/sockfs/sockcommon.h> 49 50 #include <sys/socket_proto.h> 51 52 #include <fs/sockfs/socktpi_impl.h> 53 #include <fs/sockfs/sodirect.h> 54 #include <sys/tihdr.h> 55 #include <fs/sockfs/nl7c.h> 56 #include <inet/kssl/ksslapi.h> 57 58 59 extern int xnet_skip_checks; 60 extern int xnet_check_print; 61 62 static void so_queue_oob(sock_upper_handle_t, mblk_t *, size_t); 63 64 65 /*ARGSUSED*/ 66 int 67 so_accept_notsupp(struct sonode *lso, int fflag, 68 struct cred *cr, struct sonode **nsop) 69 { 70 return (EOPNOTSUPP); 71 } 72 73 /*ARGSUSED*/ 74 int 75 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 76 { 77 return (EOPNOTSUPP); 78 } 79 80 /*ARGSUSED*/ 81 int 82 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 83 socklen_t *len, struct cred *cr) 84 { 85 return (EOPNOTSUPP); 86 } 87 88 /*ARGSUSED*/ 89 int 90 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 91 socklen_t *addrlen, boolean_t accept, struct cred *cr) 92 { 93 return (EOPNOTSUPP); 94 } 95 96 /*ARGSUSED*/ 97 int 98 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 99 { 100 return (EOPNOTSUPP); 101 } 102 103 /*ARGSUSED*/ 104 int 105 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 106 struct cred *cr, mblk_t **mpp) 107 { 108 return (EOPNOTSUPP); 109 } 110 111 /* 112 * Generic Socket Ops 113 */ 114 115 /* ARGSUSED */ 116 int 117 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 118 { 119 return (socket_init_common(so, pso, flags, cr)); 120 } 121 122 int 123 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 124 int flags, struct cred *cr) 125 { 126 int error; 127 128 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 129 130 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 131 132 /* X/Open requires this check */ 133 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 134 if (xnet_check_print) { 135 printf("sockfs: X/Open bind state check " 136 "caused EINVAL\n"); 137 } 138 error = EINVAL; 139 goto done; 140 } 141 142 /* 143 * a bind to a NULL address is interpreted as unbind. So just 144 * do the downcall. 145 */ 146 if (name == NULL) 147 goto dobind; 148 149 switch (so->so_family) { 150 case AF_INET: 151 if ((size_t)namelen != sizeof (sin_t)) { 152 error = name->sa_family != so->so_family ? 153 EAFNOSUPPORT : EINVAL; 154 eprintsoline(so, error); 155 goto done; 156 } 157 158 if ((flags & _SOBIND_XPG4_2) && 159 (name->sa_family != so->so_family)) { 160 /* 161 * This check has to be made for X/Open 162 * sockets however application failures have 163 * been observed when it is applied to 164 * all sockets. 165 */ 166 error = EAFNOSUPPORT; 167 eprintsoline(so, error); 168 goto done; 169 } 170 /* 171 * Force a zero sa_family to match so_family. 172 * 173 * Some programs like inetd(1M) don't set the 174 * family field. Other programs leave 175 * sin_family set to garbage - SunOS 4.X does 176 * not check the family field on a bind. 177 * We use the family field that 178 * was passed in to the socket() call. 179 */ 180 name->sa_family = so->so_family; 181 break; 182 183 case AF_INET6: { 184 #ifdef DEBUG 185 sin6_t *sin6 = (sin6_t *)name; 186 #endif 187 if ((size_t)namelen != sizeof (sin6_t)) { 188 error = name->sa_family != so->so_family ? 189 EAFNOSUPPORT : EINVAL; 190 eprintsoline(so, error); 191 goto done; 192 } 193 194 if (name->sa_family != so->so_family) { 195 /* 196 * With IPv6 we require the family to match 197 * unlike in IPv4. 198 */ 199 error = EAFNOSUPPORT; 200 eprintsoline(so, error); 201 goto done; 202 } 203 #ifdef DEBUG 204 /* 205 * Verify that apps don't forget to clear 206 * sin6_scope_id etc 207 */ 208 if (sin6->sin6_scope_id != 0 && 209 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 210 zcmn_err(getzoneid(), CE_WARN, 211 "bind with uninitialized sin6_scope_id " 212 "(%d) on socket. Pid = %d\n", 213 (int)sin6->sin6_scope_id, 214 (int)curproc->p_pid); 215 } 216 if (sin6->__sin6_src_id != 0) { 217 zcmn_err(getzoneid(), CE_WARN, 218 "bind with uninitialized __sin6_src_id " 219 "(%d) on socket. Pid = %d\n", 220 (int)sin6->__sin6_src_id, 221 (int)curproc->p_pid); 222 } 223 #endif /* DEBUG */ 224 225 break; 226 } 227 default: 228 /* Just pass the request to the protocol */ 229 goto dobind; 230 } 231 232 /* 233 * First we check if either NCA or KSSL has been enabled for 234 * the requested address, and if so, we fall back to TPI. 235 * If neither of those two services are enabled, then we just 236 * pass the request to the protocol. 237 * 238 * Note that KSSL can only be enabled on a socket if NCA is NOT 239 * enabled for that socket, hence the else-statement below. 240 */ 241 if (nl7c_enabled && ((so->so_family == AF_INET || 242 so->so_family == AF_INET6) && 243 nl7c_lookup_addr(name, namelen) != NULL)) { 244 /* 245 * NL7C is not supported in non-global zones, 246 * we enforce this restriction here. 247 */ 248 if (so->so_zoneid == GLOBAL_ZONEID) { 249 /* NCA should be used, so fall back to TPI */ 250 error = so_tpi_fallback(so, cr); 251 SO_UNBLOCK_FALLBACK(so); 252 if (error) 253 return (error); 254 else 255 return (SOP_BIND(so, name, namelen, flags, cr)); 256 } 257 } else if (so->so_type == SOCK_STREAM) { 258 /* Check if KSSL has been configured for this address */ 259 kssl_ent_t ent; 260 kssl_endpt_type_t type; 261 struct T_bind_req bind_req; 262 mblk_t *mp; 263 264 /* 265 * TODO: Check with KSSL team if we could add a function call 266 * that only queries whether KSSL is enabled for the given 267 * address. 268 */ 269 bind_req.PRIM_type = T_BIND_REQ; 270 bind_req.ADDR_length = namelen; 271 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 272 mp = soallocproto2(&bind_req, sizeof (bind_req), 273 name, namelen, 0, _ALLOC_SLEEP, cr); 274 275 type = kssl_check_proxy(mp, so, &ent); 276 freemsg(mp); 277 278 if (type != KSSL_NO_PROXY) { 279 /* 280 * KSSL has been configured for this address, so 281 * we must fall back to TPI. 282 */ 283 kssl_release_ent(ent, so, type); 284 error = so_tpi_fallback(so, cr); 285 SO_UNBLOCK_FALLBACK(so); 286 if (error) 287 return (error); 288 else 289 return (SOP_BIND(so, name, namelen, flags, cr)); 290 } 291 } 292 293 dobind: 294 error = (*so->so_downcalls->sd_bind) 295 (so->so_proto_handle, name, namelen, cr); 296 done: 297 SO_UNBLOCK_FALLBACK(so); 298 299 return (error); 300 } 301 302 int 303 so_listen(struct sonode *so, int backlog, struct cred *cr) 304 { 305 int error = 0; 306 307 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 308 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 309 310 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, backlog, 311 cr); 312 313 SO_UNBLOCK_FALLBACK(so); 314 315 return (error); 316 } 317 318 319 int 320 so_connect(struct sonode *so, const struct sockaddr *name, 321 socklen_t namelen, int fflag, int flags, struct cred *cr) 322 { 323 int error = 0; 324 sock_connid_t id; 325 326 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 327 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 328 329 /* 330 * If there is a pending error, return error 331 * This can happen if a non blocking operation caused an error. 332 */ 333 334 if (so->so_error != 0) { 335 mutex_enter(&so->so_lock); 336 error = sogeterr(so, B_TRUE); 337 mutex_exit(&so->so_lock); 338 if (error != 0) 339 goto done; 340 } 341 342 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 343 name, namelen, &id, cr); 344 345 if (error == EINPROGRESS) 346 error = so_wait_connected(so, fflag & (FNONBLOCK|FNDELAY), id); 347 348 done: 349 SO_UNBLOCK_FALLBACK(so); 350 return (error); 351 } 352 353 /*ARGSUSED*/ 354 int 355 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 356 { 357 int error = 0; 358 struct sonode *nso; 359 360 *nsop = NULL; 361 362 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 363 if ((so->so_state & SS_ACCEPTCONN) == 0) { 364 SO_UNBLOCK_FALLBACK(so); 365 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 366 EOPNOTSUPP : EINVAL); 367 } 368 369 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 370 &nso)) == 0) { 371 ASSERT(nso != NULL); 372 373 /* finish the accept */ 374 error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 375 nso->so_proto_handle, (sock_upper_handle_t)nso, cr); 376 if (error != 0) { 377 (void) socket_close(nso, 0, cr); 378 socket_destroy(nso); 379 } else { 380 *nsop = nso; 381 } 382 } 383 384 SO_UNBLOCK_FALLBACK(so); 385 return (error); 386 } 387 388 int 389 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 390 struct cred *cr) 391 { 392 int error, flags; 393 boolean_t dontblock; 394 ssize_t orig_resid; 395 mblk_t *mp; 396 397 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 398 399 flags = msg->msg_flags; 400 error = 0; 401 dontblock = (flags & MSG_DONTWAIT) || 402 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 403 404 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 405 /* 406 * Old way of passing fd's is not supported 407 */ 408 SO_UNBLOCK_FALLBACK(so); 409 return (EOPNOTSUPP); 410 } 411 412 if ((so->so_mode & SM_ATOMIC) && 413 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 414 so->so_proto_props.sopp_maxpsz != -1) { 415 SO_UNBLOCK_FALLBACK(so); 416 return (EMSGSIZE); 417 } 418 419 /* 420 * For atomic sends we will only do one iteration. 421 */ 422 do { 423 if (so->so_state & SS_CANTSENDMORE) { 424 error = EPIPE; 425 break; 426 } 427 428 if (so->so_error != 0) { 429 mutex_enter(&so->so_lock); 430 error = sogeterr(so, B_TRUE); 431 mutex_exit(&so->so_lock); 432 if (error != 0) 433 break; 434 } 435 436 /* 437 * Send down OOB messages even if the send path is being 438 * flow controlled (assuming the protocol supports OOB data). 439 */ 440 if (flags & MSG_OOB) { 441 if ((so->so_mode & SM_EXDATA) == 0) { 442 error = EOPNOTSUPP; 443 break; 444 } 445 } else if (so->so_snd_qfull) { 446 /* 447 * Need to wait until the protocol is ready to receive 448 * more data for transmission. 449 */ 450 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 451 break; 452 } 453 454 /* 455 * Time to send data to the protocol. We either copy the 456 * data into mblks or pass the uio directly to the protocol. 457 * We decide what to do based on the available down calls. 458 */ 459 if (so->so_downcalls->sd_send_uio != NULL) { 460 error = (*so->so_downcalls->sd_send_uio) 461 (so->so_proto_handle, uiop, msg, cr); 462 if (error != 0) 463 break; 464 } else { 465 /* save the resid in case of failure */ 466 orig_resid = uiop->uio_resid; 467 468 if ((mp = socopyinuio(uiop, 469 so->so_proto_props.sopp_maxpsz, 470 so->so_proto_props.sopp_wroff, 471 so->so_proto_props.sopp_maxblk, 472 so->so_proto_props.sopp_tail, &error)) == NULL) { 473 break; 474 } 475 ASSERT(uiop->uio_resid >= 0); 476 477 error = (*so->so_downcalls->sd_send) 478 (so->so_proto_handle, mp, msg, cr); 479 if (error != 0) { 480 /* 481 * The send failed. We do not have to free the 482 * mblks, because that is the protocol's 483 * responsibility. However, uio_resid must 484 * remain accurate, so adjust that here. 485 */ 486 uiop->uio_resid = orig_resid; 487 break; 488 } 489 } 490 } while (uiop->uio_resid > 0); 491 492 SO_UNBLOCK_FALLBACK(so); 493 494 return (error); 495 } 496 497 int 498 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 499 struct cred *cr, mblk_t **mpp) 500 { 501 int error; 502 boolean_t dontblock; 503 size_t size; 504 mblk_t *mp = *mpp; 505 506 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 507 508 error = 0; 509 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 510 (fflag & (FNONBLOCK|FNDELAY)); 511 size = msgdsize(mp); 512 513 if ((so->so_mode & SM_SENDFILESUPP) == 0 || 514 so->so_downcalls->sd_send == NULL) { 515 SO_UNBLOCK_FALLBACK(so); 516 return (EOPNOTSUPP); 517 } 518 519 if ((so->so_mode & SM_ATOMIC) && 520 size > so->so_proto_props.sopp_maxpsz && 521 so->so_proto_props.sopp_maxpsz != -1) { 522 SO_UNBLOCK_FALLBACK(so); 523 return (EMSGSIZE); 524 } 525 526 while (mp != NULL) { 527 mblk_t *nmp, *last_mblk; 528 size_t mlen; 529 530 if (so->so_state & SS_CANTSENDMORE) { 531 error = EPIPE; 532 break; 533 } 534 if (so->so_error != 0) { 535 mutex_enter(&so->so_lock); 536 error = sogeterr(so, B_TRUE); 537 mutex_exit(&so->so_lock); 538 if (error != 0) 539 break; 540 } 541 if (so->so_snd_qfull) { 542 /* 543 * Need to wait until the protocol is ready to receive 544 * more data for transmission. 545 */ 546 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 547 break; 548 } 549 550 /* 551 * We only allow so_maxpsz of data to be sent down to 552 * the protocol at time. 553 */ 554 mlen = MBLKL(mp); 555 nmp = mp->b_cont; 556 last_mblk = mp; 557 while (nmp != NULL) { 558 mlen += MBLKL(nmp); 559 if (mlen > so->so_proto_props.sopp_maxpsz) { 560 last_mblk->b_cont = NULL; 561 break; 562 } 563 last_mblk = nmp; 564 nmp = nmp->b_cont; 565 } 566 567 error = (*so->so_downcalls->sd_send) 568 (so->so_proto_handle, mp, msg, cr); 569 if (error != 0) { 570 /* 571 * The send failed. The protocol will free the mblks 572 * that were sent down. Let the caller deal with the 573 * rest. 574 */ 575 *mpp = nmp; 576 break; 577 } 578 579 *mpp = mp = nmp; 580 } 581 582 SO_UNBLOCK_FALLBACK(so); 583 584 return (error); 585 } 586 587 int 588 so_shutdown(struct sonode *so, int how, struct cred *cr) 589 { 590 int error; 591 592 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 593 594 /* 595 * SunOS 4.X has no check for datagram sockets. 596 * 5.X checks that it is connected (ENOTCONN) 597 * X/Open requires that we check the connected state. 598 */ 599 if (!(so->so_state & SS_ISCONNECTED)) { 600 if (!xnet_skip_checks) { 601 error = ENOTCONN; 602 if (xnet_check_print) { 603 printf("sockfs: X/Open shutdown check " 604 "caused ENOTCONN\n"); 605 } 606 } 607 goto done; 608 } 609 610 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 611 how, cr)); 612 613 /* 614 * Protocol agreed to shutdown. We need to flush the 615 * receive buffer if the receive side is being shutdown. 616 */ 617 if (error == 0 && how != SHUT_WR) { 618 mutex_enter(&so->so_lock); 619 /* wait for active reader to finish */ 620 (void) so_lock_read(so, 0); 621 622 so_rcv_flush(so); 623 624 so_unlock_read(so); 625 mutex_exit(&so->so_lock); 626 } 627 628 done: 629 SO_UNBLOCK_FALLBACK(so); 630 return (error); 631 } 632 633 int 634 so_getsockname(struct sonode *so, struct sockaddr *addr, 635 socklen_t *addrlen, struct cred *cr) 636 { 637 int error; 638 639 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 640 641 error = (*so->so_downcalls->sd_getsockname) 642 (so->so_proto_handle, addr, addrlen, cr); 643 644 SO_UNBLOCK_FALLBACK(so); 645 return (error); 646 } 647 648 int 649 so_getpeername(struct sonode *so, struct sockaddr *addr, 650 socklen_t *addrlen, boolean_t accept, struct cred *cr) 651 { 652 int error; 653 654 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 655 656 if (accept) { 657 error = (*so->so_downcalls->sd_getpeername) 658 (so->so_proto_handle, addr, addrlen, cr); 659 } else if (!(so->so_state & SS_ISCONNECTED)) { 660 error = ENOTCONN; 661 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 662 /* Added this check for X/Open */ 663 error = EINVAL; 664 if (xnet_check_print) { 665 printf("sockfs: X/Open getpeername check => EINVAL\n"); 666 } 667 } else { 668 error = (*so->so_downcalls->sd_getpeername) 669 (so->so_proto_handle, addr, addrlen, cr); 670 } 671 672 SO_UNBLOCK_FALLBACK(so); 673 return (error); 674 } 675 676 int 677 so_getsockopt(struct sonode *so, int level, int option_name, 678 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 679 { 680 int error = 0; 681 682 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 683 SO_BLOCK_FALLBACK(so, 684 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 685 686 error = socket_getopt_common(so, level, option_name, optval, optlenp, 687 flags); 688 if (error < 0) { 689 error = (*so->so_downcalls->sd_getsockopt) 690 (so->so_proto_handle, level, option_name, optval, optlenp, 691 cr); 692 if (error == ENOPROTOOPT) { 693 if (level == SOL_SOCKET) { 694 /* 695 * If a protocol does not support a particular 696 * socket option, set can fail (not allowed) 697 * but get can not fail. This is the previous 698 * sockfs bahvior. 699 */ 700 switch (option_name) { 701 case SO_LINGER: 702 if (*optlenp < (t_uscalar_t) 703 sizeof (struct linger)) { 704 error = EINVAL; 705 break; 706 } 707 error = 0; 708 bzero(optval, sizeof (struct linger)); 709 *optlenp = sizeof (struct linger); 710 break; 711 case SO_RCVTIMEO: 712 case SO_SNDTIMEO: 713 if (*optlenp < (t_uscalar_t) 714 sizeof (struct timeval)) { 715 error = EINVAL; 716 break; 717 } 718 error = 0; 719 bzero(optval, sizeof (struct timeval)); 720 *optlenp = sizeof (struct timeval); 721 break; 722 case SO_SND_BUFINFO: 723 if (*optlenp < (t_uscalar_t) 724 sizeof (struct so_snd_bufinfo)) { 725 error = EINVAL; 726 break; 727 } 728 error = 0; 729 bzero(optval, 730 sizeof (struct so_snd_bufinfo)); 731 *optlenp = 732 sizeof (struct so_snd_bufinfo); 733 break; 734 case SO_DEBUG: 735 case SO_REUSEADDR: 736 case SO_KEEPALIVE: 737 case SO_DONTROUTE: 738 case SO_BROADCAST: 739 case SO_USELOOPBACK: 740 case SO_OOBINLINE: 741 case SO_DGRAM_ERRIND: 742 case SO_SNDBUF: 743 case SO_RCVBUF: 744 error = 0; 745 *((int32_t *)optval) = 0; 746 *optlenp = sizeof (int32_t); 747 break; 748 default: 749 break; 750 } 751 } 752 } 753 } 754 755 SO_UNBLOCK_FALLBACK(so); 756 return (error); 757 } 758 759 int 760 so_setsockopt(struct sonode *so, int level, int option_name, 761 const void *optval, socklen_t optlen, struct cred *cr) 762 { 763 int error = 0; 764 struct timeval tl; 765 const void *opt = optval; 766 767 SO_BLOCK_FALLBACK(so, 768 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 769 770 /* X/Open requires this check */ 771 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 772 SO_UNBLOCK_FALLBACK(so); 773 if (xnet_check_print) 774 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 775 return (EINVAL); 776 } 777 778 if (level == SOL_SOCKET) { 779 switch (option_name) { 780 case SO_RCVTIMEO: 781 case SO_SNDTIMEO: { 782 /* 783 * We pass down these two options to protocol in order 784 * to support some third part protocols which need to 785 * know them. For those protocols which don't care 786 * these two options, simply return 0. 787 */ 788 clock_t t_usec; 789 790 if (get_udatamodel() == DATAMODEL_NONE || 791 get_udatamodel() == DATAMODEL_NATIVE) { 792 if (optlen != sizeof (struct timeval)) { 793 error = EINVAL; 794 goto done; 795 } 796 bcopy((struct timeval *)optval, &tl, 797 sizeof (struct timeval)); 798 } else { 799 if (optlen != sizeof (struct timeval32)) { 800 error = EINVAL; 801 goto done; 802 } 803 TIMEVAL32_TO_TIMEVAL(&tl, 804 (struct timeval32 *)optval); 805 } 806 opt = &tl; 807 optlen = sizeof (tl); 808 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 809 mutex_enter(&so->so_lock); 810 if (option_name == SO_RCVTIMEO) 811 so->so_rcvtimeo = drv_usectohz(t_usec); 812 else 813 so->so_sndtimeo = drv_usectohz(t_usec); 814 mutex_exit(&so->so_lock); 815 break; 816 } 817 case SO_RCVBUF: 818 /* 819 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 820 * sockfs since the transport might adjust the value 821 * and not return exactly what was set by the 822 * application. 823 */ 824 so->so_xpg_rcvbuf = *(int32_t *)optval; 825 break; 826 } 827 } 828 error = (*so->so_downcalls->sd_setsockopt) 829 (so->so_proto_handle, level, option_name, opt, optlen, cr); 830 done: 831 SO_UNBLOCK_FALLBACK(so); 832 return (error); 833 } 834 835 int 836 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 837 struct cred *cr, int32_t *rvalp) 838 { 839 int error = 0; 840 841 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 842 843 /* 844 * If there is a pending error, return error 845 * This can happen if a non blocking operation caused an error. 846 */ 847 if (so->so_error != 0) { 848 mutex_enter(&so->so_lock); 849 error = sogeterr(so, B_TRUE); 850 mutex_exit(&so->so_lock); 851 if (error != 0) 852 goto done; 853 } 854 855 /* 856 * calling strioc can result in the socket falling back to TPI, 857 * if that is supported. 858 */ 859 if ((error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 860 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 861 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 862 cmd, arg, mode, rvalp, cr); 863 } 864 865 done: 866 SO_UNBLOCK_FALLBACK(so); 867 868 return (error); 869 } 870 871 int 872 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 873 struct pollhead **phpp) 874 { 875 int state = so->so_state; 876 *reventsp = 0; 877 878 /* 879 * In sockets the errors are represented as input/output events 880 */ 881 if (so->so_error != 0 && 882 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 883 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 884 return (0); 885 } 886 887 /* 888 * If the socket is in a state where it can send data 889 * turn on POLLWRBAND and POLLOUT events. 890 */ 891 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 892 /* 893 * out of band data is allowed even if the connection 894 * is flow controlled 895 */ 896 *reventsp |= POLLWRBAND & events; 897 if (!so->so_snd_qfull) { 898 /* 899 * As long as there is buffer to send data 900 * turn on POLLOUT events 901 */ 902 *reventsp |= POLLOUT & events; 903 } 904 } 905 906 /* 907 * Turn on POLLIN whenever there is data on the receive queue, 908 * or the socket is in a state where no more data will be received. 909 * Also, if the socket is accepting connections, flip the bit if 910 * there is something on the queue. 911 * 912 * We do an initial check for events without holding locks. However, 913 * if there are no event available, then we redo the check for POLLIN 914 * events under the lock. 915 */ 916 917 /* Pending connections */ 918 if (so->so_acceptq_len > 0) 919 *reventsp |= (POLLIN|POLLRDNORM) & events; 920 921 /* Data */ 922 /* so_downcalls is null for sctp */ 923 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 924 *reventsp |= (*so->so_downcalls->sd_poll) 925 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 926 CRED()) & events; 927 ASSERT((*reventsp & ~events) == 0); 928 /* do not recheck events */ 929 events &= ~SO_PROTO_POLLEV; 930 } else { 931 if (SO_HAVE_DATA(so)) 932 *reventsp |= (POLLIN|POLLRDNORM) & events; 933 934 /* Urgent data */ 935 if ((state & SS_OOBPEND) != 0) { 936 *reventsp |= (POLLRDBAND | POLLPRI) & events; 937 } 938 } 939 940 if (!*reventsp && !anyyet) { 941 /* Check for read events again, but this time under lock */ 942 if (events & (POLLIN|POLLRDNORM)) { 943 mutex_enter(&so->so_lock); 944 if (SO_HAVE_DATA(so) || so->so_acceptq_len > 0) { 945 mutex_exit(&so->so_lock); 946 *reventsp |= (POLLIN|POLLRDNORM) & events; 947 return (0); 948 } else { 949 so->so_pollev |= SO_POLLEV_IN; 950 mutex_exit(&so->so_lock); 951 } 952 } 953 *phpp = &so->so_poll_list; 954 } 955 return (0); 956 } 957 958 /* 959 * Generic Upcalls 960 */ 961 void 962 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 963 cred_t *peer_cred, pid_t peer_cpid) 964 { 965 struct sonode *so = (struct sonode *)sock_handle; 966 967 mutex_enter(&so->so_lock); 968 ASSERT(so->so_proto_handle != NULL); 969 970 if (peer_cred != NULL) { 971 if (so->so_peercred != NULL) 972 crfree(so->so_peercred); 973 crhold(peer_cred); 974 so->so_peercred = peer_cred; 975 so->so_cpid = peer_cpid; 976 } 977 978 so->so_proto_connid = id; 979 soisconnected(so); 980 /* 981 * Wake ones who're waiting for conn to become established. 982 */ 983 so_notify_connected(so); 984 } 985 986 int 987 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 988 { 989 struct sonode *so = (struct sonode *)sock_handle; 990 991 mutex_enter(&so->so_lock); 992 993 so->so_proto_connid = id; 994 soisdisconnected(so, error); 995 so_notify_disconnected(so, error); 996 997 return (0); 998 } 999 1000 void 1001 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1002 uintptr_t arg) 1003 { 1004 struct sonode *so = (struct sonode *)sock_handle; 1005 1006 switch (action) { 1007 case SOCK_OPCTL_SHUT_SEND: 1008 mutex_enter(&so->so_lock); 1009 socantsendmore(so); 1010 so_notify_disconnecting(so); 1011 break; 1012 case SOCK_OPCTL_SHUT_RECV: { 1013 mutex_enter(&so->so_lock); 1014 socantrcvmore(so); 1015 so_notify_eof(so); 1016 break; 1017 } 1018 case SOCK_OPCTL_ENAB_ACCEPT: 1019 mutex_enter(&so->so_lock); 1020 so->so_state |= SS_ACCEPTCONN; 1021 so->so_backlog = (unsigned int)arg; 1022 mutex_exit(&so->so_lock); 1023 break; 1024 default: 1025 ASSERT(0); 1026 break; 1027 } 1028 } 1029 1030 void 1031 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1032 { 1033 struct sonode *so = (struct sonode *)sock_handle; 1034 1035 if (qfull) { 1036 so_snd_qfull(so); 1037 } else { 1038 so_snd_qnotfull(so); 1039 mutex_enter(&so->so_lock); 1040 so_notify_writable(so); 1041 } 1042 } 1043 1044 sock_upper_handle_t 1045 so_newconn(sock_upper_handle_t parenthandle, 1046 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1047 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1048 { 1049 struct sonode *so = (struct sonode *)parenthandle; 1050 struct sonode *nso; 1051 int error; 1052 1053 ASSERT(proto_handle != NULL); 1054 1055 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1056 so->so_acceptq_len >= so->so_backlog) 1057 return (NULL); 1058 1059 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1060 &error); 1061 if (nso == NULL) 1062 return (NULL); 1063 1064 if (peer_cred != NULL) { 1065 crhold(peer_cred); 1066 nso->so_peercred = peer_cred; 1067 nso->so_cpid = peer_cpid; 1068 } 1069 1070 /* 1071 * The new socket (nso), proto_handle and sock_upcallsp are all 1072 * valid at this point. But as soon as nso is placed in the accept 1073 * queue that can no longer be assumed (since an accept() thread may 1074 * pull it off the queue and close the socket). 1075 */ 1076 *sock_upcallsp = &so_upcalls; 1077 1078 (void) so_acceptq_enqueue(so, nso); 1079 1080 mutex_enter(&so->so_lock); 1081 so_notify_newconn(so); 1082 1083 return ((sock_upper_handle_t)nso); 1084 } 1085 1086 void 1087 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1088 { 1089 struct sonode *so; 1090 1091 so = (struct sonode *)sock_handle; 1092 1093 mutex_enter(&so->so_lock); 1094 1095 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1096 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1097 if (soppp->sopp_flags & SOCKOPT_WROFF) 1098 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1099 if (soppp->sopp_flags & SOCKOPT_TAIL) 1100 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1101 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1102 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1103 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1104 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1105 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1106 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1107 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1108 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1109 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1110 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1111 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1112 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1113 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1114 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1115 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1116 } 1117 1118 if (soppp->sopp_zcopyflag & COPYCACHED) { 1119 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1120 } 1121 } 1122 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1123 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1124 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1125 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1126 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1127 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1128 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1129 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1130 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1131 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1132 1133 mutex_exit(&so->so_lock); 1134 1135 #ifdef DEBUG 1136 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1137 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1138 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1139 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1140 SOCKOPT_LOOPBACK); 1141 ASSERT(soppp->sopp_flags == 0); 1142 #endif 1143 } 1144 1145 /* ARGSUSED */ 1146 ssize_t 1147 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1148 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1149 { 1150 struct sonode *so = (struct sonode *)sock_handle; 1151 boolean_t force_push = B_TRUE; 1152 int space_left; 1153 sodirect_t *sodp = so->so_direct; 1154 1155 ASSERT(errorp != NULL); 1156 *errorp = 0; 1157 if (mp == NULL) { 1158 if (so->so_downcalls->sd_recv_uio != NULL) { 1159 mutex_enter(&so->so_lock); 1160 /* the notify functions will drop the lock */ 1161 if (flags & MSG_OOB) 1162 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1163 else 1164 so_notify_data(so, msg_size); 1165 return (0); 1166 } 1167 ASSERT(msg_size == 0); 1168 /* 1169 * recv space check 1170 */ 1171 mutex_enter(&so->so_lock); 1172 space_left = so->so_rcvbuf - so->so_rcv_queued; 1173 if (space_left <= 0) { 1174 so->so_flowctrld = B_TRUE; 1175 *errorp = ENOSPC; 1176 space_left = -1; 1177 } 1178 goto done_unlock; 1179 } 1180 1181 ASSERT(mp->b_next == NULL); 1182 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1183 ASSERT(msg_size == msgdsize(mp)); 1184 1185 if (flags & MSG_OOB) { 1186 so_queue_oob(sock_handle, mp, msg_size); 1187 return (0); 1188 } 1189 1190 if (force_pushp != NULL) 1191 force_push = *force_pushp; 1192 1193 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1194 /* The read pointer is not aligned correctly for TPI */ 1195 zcmn_err(getzoneid(), CE_WARN, 1196 "sockfs: Unaligned TPI message received. rptr = %p\n", 1197 (void *)mp->b_rptr); 1198 freemsg(mp); 1199 mutex_enter(&so->so_lock); 1200 if (sodp != NULL) 1201 SOD_UIOAFINI(sodp); 1202 mutex_exit(&so->so_lock); 1203 1204 return (so->so_rcvbuf - so->so_rcv_queued); 1205 } 1206 1207 mutex_enter(&so->so_lock); 1208 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1209 if (sodp != NULL) 1210 SOD_DISABLE(sodp); 1211 mutex_exit(&so->so_lock); 1212 *errorp = EOPNOTSUPP; 1213 return (-1); 1214 } 1215 if (so->so_state & SS_CANTRCVMORE) { 1216 freemsg(mp); 1217 if (sodp != NULL) 1218 SOD_DISABLE(sodp); 1219 mutex_exit(&so->so_lock); 1220 return (0); 1221 } 1222 1223 /* process the mblk via I/OAT if capable */ 1224 if (sodp != NULL && sodp->sod_enabled) { 1225 if (DB_TYPE(mp) == M_DATA) { 1226 sod_uioa_mblk_init(sodp, mp, msg_size); 1227 } else { 1228 SOD_UIOAFINI(sodp); 1229 } 1230 } 1231 1232 if (mp->b_next == NULL) { 1233 so_enqueue_msg(so, mp, msg_size); 1234 } else { 1235 do { 1236 mblk_t *nmp; 1237 1238 if ((nmp = mp->b_next) != NULL) { 1239 mp->b_next = NULL; 1240 } 1241 so_enqueue_msg(so, mp, msgdsize(mp)); 1242 mp = nmp; 1243 } while (mp != NULL); 1244 } 1245 1246 space_left = so->so_rcvbuf - so->so_rcv_queued; 1247 if (space_left <= 0) { 1248 so->so_flowctrld = B_TRUE; 1249 *errorp = ENOSPC; 1250 space_left = -1; 1251 } 1252 1253 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1254 so->so_rcv_queued >= so->so_rcv_wanted) { 1255 SOCKET_TIMER_CANCEL(so); 1256 /* 1257 * so_notify_data will release the lock 1258 */ 1259 so_notify_data(so, so->so_rcv_queued); 1260 1261 if (force_pushp != NULL) 1262 *force_pushp = B_TRUE; 1263 goto done; 1264 } else if (so->so_rcv_timer_tid == 0) { 1265 /* Make sure the recv push timer is running */ 1266 SOCKET_TIMER_START(so); 1267 } 1268 1269 done_unlock: 1270 mutex_exit(&so->so_lock); 1271 done: 1272 return (space_left); 1273 } 1274 1275 /* 1276 * Set the offset of where the oob data is relative to the bytes in 1277 * queued. Also generate SIGURG 1278 */ 1279 void 1280 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1281 { 1282 struct sonode *so; 1283 1284 ASSERT(offset >= 0); 1285 so = (struct sonode *)sock_handle; 1286 mutex_enter(&so->so_lock); 1287 if (so->so_direct != NULL) 1288 SOD_UIOAFINI(so->so_direct); 1289 1290 /* 1291 * New urgent data on the way so forget about any old 1292 * urgent data. 1293 */ 1294 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1295 1296 /* 1297 * Record that urgent data is pending. 1298 */ 1299 so->so_state |= SS_OOBPEND; 1300 1301 if (so->so_oobmsg != NULL) { 1302 dprintso(so, 1, ("sock: discarding old oob\n")); 1303 freemsg(so->so_oobmsg); 1304 so->so_oobmsg = NULL; 1305 } 1306 1307 /* 1308 * set the offset where the urgent byte is 1309 */ 1310 so->so_oobmark = so->so_rcv_queued + offset; 1311 if (so->so_oobmark == 0) 1312 so->so_state |= SS_RCVATMARK; 1313 else 1314 so->so_state &= ~SS_RCVATMARK; 1315 1316 so_notify_oobsig(so); 1317 } 1318 1319 /* 1320 * Queue the OOB byte 1321 */ 1322 static void 1323 so_queue_oob(sock_upper_handle_t sock_handle, mblk_t *mp, size_t len) 1324 { 1325 struct sonode *so; 1326 1327 so = (struct sonode *)sock_handle; 1328 mutex_enter(&so->so_lock); 1329 if (so->so_direct != NULL) 1330 SOD_UIOAFINI(so->so_direct); 1331 1332 ASSERT(mp != NULL); 1333 if (!IS_SO_OOB_INLINE(so)) { 1334 so->so_oobmsg = mp; 1335 so->so_state |= SS_HAVEOOBDATA; 1336 } else { 1337 so_enqueue_msg(so, mp, len); 1338 } 1339 1340 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1341 } 1342 1343 int 1344 so_close(struct sonode *so, int flag, struct cred *cr) 1345 { 1346 int error; 1347 1348 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1349 1350 /* 1351 * At this point there will be no more upcalls from the protocol 1352 */ 1353 mutex_enter(&so->so_lock); 1354 1355 ASSERT(so_verify_oobstate(so)); 1356 1357 so_rcv_flush(so); 1358 mutex_exit(&so->so_lock); 1359 1360 return (error); 1361 } 1362 1363 void 1364 so_zcopy_notify(sock_upper_handle_t sock_handle) 1365 { 1366 struct sonode *so = (struct sonode *)sock_handle; 1367 1368 mutex_enter(&so->so_lock); 1369 so->so_copyflag |= STZCNOTIFY; 1370 cv_broadcast(&so->so_copy_cv); 1371 mutex_exit(&so->so_lock); 1372 } 1373 1374 void 1375 so_set_error(sock_upper_handle_t sock_handle, int error) 1376 { 1377 struct sonode *so = (struct sonode *)sock_handle; 1378 1379 mutex_enter(&so->so_lock); 1380 1381 soseterror(so, error); 1382 1383 so_notify_error(so); 1384 } 1385 1386 /* 1387 * so_recvmsg - read data from the socket 1388 * 1389 * There are two ways of obtaining data; either we ask the protocol to 1390 * copy directly into the supplied buffer, or we copy data from the 1391 * sonode's receive queue. The decision which one to use depends on 1392 * whether the protocol has a sd_recv_uio down call. 1393 */ 1394 int 1395 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1396 struct cred *cr) 1397 { 1398 rval_t rval; 1399 int flags = 0; 1400 t_uscalar_t controllen, namelen; 1401 int error = 0; 1402 int ret; 1403 mblk_t *mctlp = NULL; 1404 union T_primitives *tpr; 1405 void *control; 1406 ssize_t saved_resid; 1407 struct uio *suiop; 1408 1409 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1410 1411 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1412 (so->so_mode & SM_CONNREQUIRED)) { 1413 SO_UNBLOCK_FALLBACK(so); 1414 return (ENOTCONN); 1415 } 1416 1417 if (msg->msg_flags & MSG_PEEK) 1418 msg->msg_flags &= ~MSG_WAITALL; 1419 1420 if (so->so_mode & SM_ATOMIC) 1421 msg->msg_flags |= MSG_TRUNC; 1422 1423 if (msg->msg_flags & MSG_OOB) { 1424 if ((so->so_mode & SM_EXDATA) == 0) { 1425 error = EOPNOTSUPP; 1426 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1427 error = (*so->so_downcalls->sd_recv_uio) 1428 (so->so_proto_handle, uiop, msg, cr); 1429 } else { 1430 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1431 IS_SO_OOB_INLINE(so)); 1432 } 1433 SO_UNBLOCK_FALLBACK(so); 1434 return (error); 1435 } 1436 1437 /* 1438 * If the protocol has the recv down call, then pass the request 1439 * down. 1440 */ 1441 if (so->so_downcalls->sd_recv_uio != NULL) { 1442 error = (*so->so_downcalls->sd_recv_uio) 1443 (so->so_proto_handle, uiop, msg, cr); 1444 SO_UNBLOCK_FALLBACK(so); 1445 return (error); 1446 } 1447 1448 /* 1449 * Reading data from the socket buffer 1450 */ 1451 flags = msg->msg_flags; 1452 msg->msg_flags = 0; 1453 1454 /* 1455 * Set msg_controllen and msg_namelen to zero here to make it 1456 * simpler in the cases that no control or name is returned. 1457 */ 1458 controllen = msg->msg_controllen; 1459 namelen = msg->msg_namelen; 1460 msg->msg_controllen = 0; 1461 msg->msg_namelen = 0; 1462 1463 mutex_enter(&so->so_lock); 1464 /* Set SOREADLOCKED */ 1465 error = so_lock_read_intr(so, 1466 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1467 mutex_exit(&so->so_lock); 1468 if (error) { 1469 SO_UNBLOCK_FALLBACK(so); 1470 return (error); 1471 } 1472 1473 suiop = sod_rcv_init(so, flags, &uiop); 1474 retry: 1475 saved_resid = uiop->uio_resid; 1476 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1477 if (error != 0) { 1478 goto out; 1479 } 1480 /* 1481 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1482 * For non-datagrams MOREDATA is used to set MSG_EOR. 1483 */ 1484 ASSERT(!(rval.r_val1 & MORECTL)); 1485 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1486 msg->msg_flags |= MSG_TRUNC; 1487 if (mctlp == NULL) { 1488 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1489 1490 mutex_enter(&so->so_lock); 1491 /* Set MSG_EOR based on MOREDATA */ 1492 if (!(rval.r_val1 & MOREDATA)) { 1493 if (so->so_state & SS_SAVEDEOR) { 1494 msg->msg_flags |= MSG_EOR; 1495 so->so_state &= ~SS_SAVEDEOR; 1496 } 1497 } 1498 /* 1499 * If some data was received (i.e. not EOF) and the 1500 * read/recv* has not been satisfied wait for some more. 1501 */ 1502 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1503 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1504 mutex_exit(&so->so_lock); 1505 flags |= MSG_NOMARK; 1506 goto retry; 1507 } 1508 1509 goto out_locked; 1510 } 1511 /* so_queue_msg has already verified length and alignment */ 1512 tpr = (union T_primitives *)mctlp->b_rptr; 1513 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1514 switch (tpr->type) { 1515 case T_DATA_IND: { 1516 /* 1517 * Set msg_flags to MSG_EOR based on 1518 * MORE_flag and MOREDATA. 1519 */ 1520 mutex_enter(&so->so_lock); 1521 so->so_state &= ~SS_SAVEDEOR; 1522 if (!(tpr->data_ind.MORE_flag & 1)) { 1523 if (!(rval.r_val1 & MOREDATA)) 1524 msg->msg_flags |= MSG_EOR; 1525 else 1526 so->so_state |= SS_SAVEDEOR; 1527 } 1528 freemsg(mctlp); 1529 /* 1530 * If some data was received (i.e. not EOF) and the 1531 * read/recv* has not been satisfied wait for some more. 1532 */ 1533 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1534 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1535 mutex_exit(&so->so_lock); 1536 flags |= MSG_NOMARK; 1537 goto retry; 1538 } 1539 goto out_locked; 1540 } 1541 case T_UNITDATA_IND: { 1542 void *addr; 1543 t_uscalar_t addrlen; 1544 void *abuf; 1545 t_uscalar_t optlen; 1546 void *opt; 1547 1548 if (namelen != 0) { 1549 /* Caller wants source address */ 1550 addrlen = tpr->unitdata_ind.SRC_length; 1551 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1552 addrlen, 1); 1553 if (addr == NULL) { 1554 freemsg(mctlp); 1555 error = EPROTO; 1556 eprintsoline(so, error); 1557 goto out; 1558 } 1559 ASSERT(so->so_family != AF_UNIX); 1560 } 1561 optlen = tpr->unitdata_ind.OPT_length; 1562 if (optlen != 0) { 1563 t_uscalar_t ncontrollen; 1564 1565 /* 1566 * Extract any source address option. 1567 * Determine how large cmsg buffer is needed. 1568 */ 1569 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1570 optlen, __TPI_ALIGN_SIZE); 1571 1572 if (opt == NULL) { 1573 freemsg(mctlp); 1574 error = EPROTO; 1575 eprintsoline(so, error); 1576 goto out; 1577 } 1578 if (so->so_family == AF_UNIX) 1579 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1580 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1581 !(flags & MSG_XPG4_2)); 1582 if (controllen != 0) 1583 controllen = ncontrollen; 1584 else if (ncontrollen != 0) 1585 msg->msg_flags |= MSG_CTRUNC; 1586 } else { 1587 controllen = 0; 1588 } 1589 1590 if (namelen != 0) { 1591 /* 1592 * Return address to caller. 1593 * Caller handles truncation if length 1594 * exceeds msg_namelen. 1595 * NOTE: AF_UNIX NUL termination is ensured by 1596 * the sender's copyin_name(). 1597 */ 1598 abuf = kmem_alloc(addrlen, KM_SLEEP); 1599 1600 bcopy(addr, abuf, addrlen); 1601 msg->msg_name = abuf; 1602 msg->msg_namelen = addrlen; 1603 } 1604 1605 if (controllen != 0) { 1606 /* 1607 * Return control msg to caller. 1608 * Caller handles truncation if length 1609 * exceeds msg_controllen. 1610 */ 1611 control = kmem_zalloc(controllen, KM_SLEEP); 1612 1613 error = so_opt2cmsg(mctlp, opt, optlen, 1614 !(flags & MSG_XPG4_2), control, controllen); 1615 if (error) { 1616 freemsg(mctlp); 1617 if (msg->msg_namelen != 0) 1618 kmem_free(msg->msg_name, 1619 msg->msg_namelen); 1620 kmem_free(control, controllen); 1621 eprintsoline(so, error); 1622 goto out; 1623 } 1624 msg->msg_control = control; 1625 msg->msg_controllen = controllen; 1626 } 1627 1628 freemsg(mctlp); 1629 goto out; 1630 } 1631 case T_OPTDATA_IND: { 1632 struct T_optdata_req *tdr; 1633 void *opt; 1634 t_uscalar_t optlen; 1635 1636 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1637 optlen = tdr->OPT_length; 1638 if (optlen != 0) { 1639 t_uscalar_t ncontrollen; 1640 /* 1641 * Determine how large cmsg buffer is needed. 1642 */ 1643 opt = sogetoff(mctlp, 1644 tpr->optdata_ind.OPT_offset, optlen, 1645 __TPI_ALIGN_SIZE); 1646 1647 if (opt == NULL) { 1648 freemsg(mctlp); 1649 error = EPROTO; 1650 eprintsoline(so, error); 1651 goto out; 1652 } 1653 1654 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1655 !(flags & MSG_XPG4_2)); 1656 if (controllen != 0) 1657 controllen = ncontrollen; 1658 else if (ncontrollen != 0) 1659 msg->msg_flags |= MSG_CTRUNC; 1660 } else { 1661 controllen = 0; 1662 } 1663 1664 if (controllen != 0) { 1665 /* 1666 * Return control msg to caller. 1667 * Caller handles truncation if length 1668 * exceeds msg_controllen. 1669 */ 1670 control = kmem_zalloc(controllen, KM_SLEEP); 1671 1672 error = so_opt2cmsg(mctlp, opt, optlen, 1673 !(flags & MSG_XPG4_2), control, controllen); 1674 if (error) { 1675 freemsg(mctlp); 1676 kmem_free(control, controllen); 1677 eprintsoline(so, error); 1678 goto out; 1679 } 1680 msg->msg_control = control; 1681 msg->msg_controllen = controllen; 1682 } 1683 1684 /* 1685 * Set msg_flags to MSG_EOR based on 1686 * DATA_flag and MOREDATA. 1687 */ 1688 mutex_enter(&so->so_lock); 1689 so->so_state &= ~SS_SAVEDEOR; 1690 if (!(tpr->data_ind.MORE_flag & 1)) { 1691 if (!(rval.r_val1 & MOREDATA)) 1692 msg->msg_flags |= MSG_EOR; 1693 else 1694 so->so_state |= SS_SAVEDEOR; 1695 } 1696 freemsg(mctlp); 1697 /* 1698 * If some data was received (i.e. not EOF) and the 1699 * read/recv* has not been satisfied wait for some more. 1700 * Not possible to wait if control info was received. 1701 */ 1702 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1703 controllen == 0 && 1704 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1705 mutex_exit(&so->so_lock); 1706 flags |= MSG_NOMARK; 1707 goto retry; 1708 } 1709 goto out_locked; 1710 } 1711 default: 1712 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1713 tpr->type); 1714 freemsg(mctlp); 1715 error = EPROTO; 1716 ASSERT(0); 1717 } 1718 out: 1719 mutex_enter(&so->so_lock); 1720 out_locked: 1721 ret = sod_rcv_done(so, suiop, uiop); 1722 if (ret != 0 && error == 0) 1723 error = ret; 1724 1725 so_unlock_read(so); /* Clear SOREADLOCKED */ 1726 mutex_exit(&so->so_lock); 1727 1728 SO_UNBLOCK_FALLBACK(so); 1729 1730 return (error); 1731 } 1732 1733 sonodeops_t so_sonodeops = { 1734 so_init, /* sop_init */ 1735 so_accept, /* sop_accept */ 1736 so_bind, /* sop_bind */ 1737 so_listen, /* sop_listen */ 1738 so_connect, /* sop_connect */ 1739 so_recvmsg, /* sop_recvmsg */ 1740 so_sendmsg, /* sop_sendmsg */ 1741 so_sendmblk, /* sop_sendmblk */ 1742 so_getpeername, /* sop_getpeername */ 1743 so_getsockname, /* sop_getsockname */ 1744 so_shutdown, /* sop_shutdown */ 1745 so_getsockopt, /* sop_getsockopt */ 1746 so_setsockopt, /* sop_setsockopt */ 1747 so_ioctl, /* sop_ioctl */ 1748 so_poll, /* sop_poll */ 1749 so_close, /* sop_close */ 1750 }; 1751 1752 sock_upcalls_t so_upcalls = { 1753 so_newconn, 1754 so_connected, 1755 so_disconnected, 1756 so_opctl, 1757 so_queue_msg, 1758 so_set_prop, 1759 so_txq_full, 1760 so_signal_oob, 1761 so_zcopy_notify, 1762 so_set_error 1763 }; 1764