1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "@(#)sockcommon_sops.c 1.1 07/06/14 SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/sysmacros.h> 33 #include <sys/debug.h> 34 #include <sys/cmn_err.h> 35 36 #include <sys/stropts.h> 37 #include <sys/socket.h> 38 #include <sys/socketvar.h> 39 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/sockio.h> 43 #include <sys/sodirect.h> 44 #include <sys/kmem_impl.h> 45 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/ddi.h> 49 #include <netinet/in.h> 50 #include <inet/ip.h> 51 52 #include <fs/sockfs/sockcommon.h> 53 54 #include <sys/socket_proto.h> 55 56 #include <fs/sockfs/socktpi_impl.h> 57 #include <sys/tihdr.h> 58 #include <fs/sockfs/nl7c.h> 59 #include <inet/kssl/ksslapi.h> 60 61 62 extern int xnet_skip_checks; 63 extern int xnet_check_print; 64 65 static void so_queue_oob(sock_upper_handle_t, mblk_t *, size_t); 66 67 68 /*ARGSUSED*/ 69 int 70 so_accept_notsupp(struct sonode *lso, int fflag, 71 struct cred *cr, struct sonode **nsop) 72 { 73 return (EOPNOTSUPP); 74 } 75 76 /*ARGSUSED*/ 77 int 78 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 79 { 80 return (EOPNOTSUPP); 81 } 82 83 /*ARGSUSED*/ 84 int 85 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 86 socklen_t *len, struct cred *cr) 87 { 88 return (EOPNOTSUPP); 89 } 90 91 /*ARGSUSED*/ 92 int 93 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 94 socklen_t *addrlen, boolean_t accept, struct cred *cr) 95 { 96 return (EOPNOTSUPP); 97 } 98 99 /*ARGSUSED*/ 100 int 101 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 102 { 103 return (EOPNOTSUPP); 104 } 105 106 /*ARGSUSED*/ 107 int 108 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 109 struct cred *cr, mblk_t **mpp) 110 { 111 return (EOPNOTSUPP); 112 } 113 114 /* 115 * Generic Socket Ops 116 */ 117 118 /* ARGSUSED */ 119 int 120 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 121 { 122 return (socket_init_common(so, pso, flags, cr)); 123 } 124 125 int 126 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 127 int flags, struct cred *cr) 128 { 129 int error; 130 131 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 132 133 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 134 135 /* X/Open requires this check */ 136 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 137 if (xnet_check_print) { 138 printf("sockfs: X/Open bind state check " 139 "caused EINVAL\n"); 140 } 141 error = EINVAL; 142 goto done; 143 } 144 145 /* 146 * a bind to a NULL address is interpreted as unbind. So just 147 * do the downcall. 148 */ 149 if (name == NULL) 150 goto dobind; 151 152 switch (so->so_family) { 153 case AF_INET: 154 if ((size_t)namelen != sizeof (sin_t)) { 155 error = name->sa_family != so->so_family ? 156 EAFNOSUPPORT : EINVAL; 157 eprintsoline(so, error); 158 goto done; 159 } 160 161 if ((flags & _SOBIND_XPG4_2) && 162 (name->sa_family != so->so_family)) { 163 /* 164 * This check has to be made for X/Open 165 * sockets however application failures have 166 * been observed when it is applied to 167 * all sockets. 168 */ 169 error = EAFNOSUPPORT; 170 eprintsoline(so, error); 171 goto done; 172 } 173 /* 174 * Force a zero sa_family to match so_family. 175 * 176 * Some programs like inetd(1M) don't set the 177 * family field. Other programs leave 178 * sin_family set to garbage - SunOS 4.X does 179 * not check the family field on a bind. 180 * We use the family field that 181 * was passed in to the socket() call. 182 */ 183 name->sa_family = so->so_family; 184 break; 185 186 case AF_INET6: { 187 #ifdef DEBUG 188 sin6_t *sin6 = (sin6_t *)name; 189 #endif 190 if ((size_t)namelen != sizeof (sin6_t)) { 191 error = name->sa_family != so->so_family ? 192 EAFNOSUPPORT : EINVAL; 193 eprintsoline(so, error); 194 goto done; 195 } 196 197 if (name->sa_family != so->so_family) { 198 /* 199 * With IPv6 we require the family to match 200 * unlike in IPv4. 201 */ 202 error = EAFNOSUPPORT; 203 eprintsoline(so, error); 204 goto done; 205 } 206 #ifdef DEBUG 207 /* 208 * Verify that apps don't forget to clear 209 * sin6_scope_id etc 210 */ 211 if (sin6->sin6_scope_id != 0 && 212 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 213 zcmn_err(getzoneid(), CE_WARN, 214 "bind with uninitialized sin6_scope_id " 215 "(%d) on socket. Pid = %d\n", 216 (int)sin6->sin6_scope_id, 217 (int)curproc->p_pid); 218 } 219 if (sin6->__sin6_src_id != 0) { 220 zcmn_err(getzoneid(), CE_WARN, 221 "bind with uninitialized __sin6_src_id " 222 "(%d) on socket. Pid = %d\n", 223 (int)sin6->__sin6_src_id, 224 (int)curproc->p_pid); 225 } 226 #endif /* DEBUG */ 227 228 break; 229 } 230 default: 231 /* Just pass the request to the protocol */ 232 goto dobind; 233 } 234 235 /* 236 * First we check if either NCA or KSSL has been enabled for 237 * the requested address, and if so, we fall back to TPI. 238 * If neither of those two services are enabled, then we just 239 * pass the request to the protocol. 240 * 241 * Note that KSSL can only be enabled on a socket if NCA is NOT 242 * enabled for that socket, hence the else-statement below. 243 */ 244 if (nl7c_enabled && ((so->so_family == AF_INET || 245 so->so_family == AF_INET6) && 246 nl7c_lookup_addr(name, namelen) != NULL)) { 247 /* 248 * NL7C is not supported in non-global zones, 249 * we enforce this restriction here. 250 */ 251 if (so->so_zoneid == GLOBAL_ZONEID) { 252 /* NCA should be used, so fall back to TPI */ 253 error = so_tpi_fallback(so, cr); 254 SO_UNBLOCK_FALLBACK(so); 255 if (error) 256 return (error); 257 else 258 return (SOP_BIND(so, name, namelen, flags, cr)); 259 } 260 } else if (so->so_type == SOCK_STREAM) { 261 /* Check if KSSL has been configured for this address */ 262 kssl_ent_t ent; 263 kssl_endpt_type_t type; 264 struct T_bind_req bind_req; 265 mblk_t *mp; 266 267 /* 268 * TODO: Check with KSSL team if we could add a function call 269 * that only queries whether KSSL is enabled for the given 270 * address. 271 */ 272 bind_req.PRIM_type = T_BIND_REQ; 273 bind_req.ADDR_length = namelen; 274 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 275 mp = soallocproto2(&bind_req, sizeof (bind_req), 276 name, namelen, 0, _ALLOC_SLEEP); 277 278 type = kssl_check_proxy(mp, so, &ent); 279 freemsg(mp); 280 281 if (type != KSSL_NO_PROXY) { 282 /* 283 * KSSL has been configured for this address, so 284 * we must fall back to TPI. 285 */ 286 kssl_release_ent(ent, so, type); 287 error = so_tpi_fallback(so, cr); 288 SO_UNBLOCK_FALLBACK(so); 289 if (error) 290 return (error); 291 else 292 return (SOP_BIND(so, name, namelen, flags, cr)); 293 } 294 } 295 296 dobind: 297 error = (*so->so_downcalls->sd_bind) 298 (so->so_proto_handle, name, namelen, cr); 299 done: 300 SO_UNBLOCK_FALLBACK(so); 301 302 return (error); 303 } 304 305 int 306 so_listen(struct sonode *so, int backlog, struct cred *cr) 307 { 308 int error = 0; 309 310 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 311 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 312 313 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, backlog, 314 cr); 315 316 SO_UNBLOCK_FALLBACK(so); 317 318 return (error); 319 } 320 321 322 int 323 so_connect(struct sonode *so, const struct sockaddr *name, 324 socklen_t namelen, int fflag, int flags, struct cred *cr) 325 { 326 int error = 0; 327 sock_connid_t id; 328 329 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 330 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 331 332 /* 333 * If there is a pending error, return error 334 * This can happen if a non blocking operation caused an error. 335 */ 336 337 if (so->so_error != 0) { 338 mutex_enter(&so->so_lock); 339 error = sogeterr(so, B_TRUE); 340 mutex_exit(&so->so_lock); 341 if (error != 0) 342 goto done; 343 } 344 345 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 346 name, namelen, &id, cr); 347 348 if (error == EINPROGRESS) 349 error = so_wait_connected(so, fflag & (FNONBLOCK|FNDELAY), id); 350 351 done: 352 SO_UNBLOCK_FALLBACK(so); 353 return (error); 354 } 355 356 /*ARGSUSED*/ 357 int 358 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 359 { 360 int error = 0; 361 struct sonode *nso; 362 363 *nsop = NULL; 364 365 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 366 if ((so->so_state & SS_ACCEPTCONN) == 0) { 367 SO_UNBLOCK_FALLBACK(so); 368 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 369 EOPNOTSUPP : EINVAL); 370 } 371 372 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 373 &nso)) == 0) { 374 ASSERT(nso != NULL); 375 376 /* finish the accept */ 377 error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 378 nso->so_proto_handle, (sock_upper_handle_t)nso, cr); 379 if (error != 0) { 380 (void) socket_close(nso, 0, cr); 381 socket_destroy(nso); 382 } else { 383 *nsop = nso; 384 } 385 } 386 387 SO_UNBLOCK_FALLBACK(so); 388 return (error); 389 } 390 391 int 392 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 393 struct cred *cr) 394 { 395 int error, flags; 396 boolean_t dontblock; 397 ssize_t orig_resid; 398 mblk_t *mp; 399 400 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 401 402 flags = msg->msg_flags; 403 error = 0; 404 dontblock = (flags & MSG_DONTWAIT) || 405 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 406 407 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 408 /* 409 * Old way of passing fd's is not supported 410 */ 411 SO_UNBLOCK_FALLBACK(so); 412 return (EOPNOTSUPP); 413 } 414 415 if ((so->so_mode & SM_ATOMIC) && 416 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 417 so->so_proto_props.sopp_maxpsz != -1) { 418 SO_UNBLOCK_FALLBACK(so); 419 return (EMSGSIZE); 420 } 421 422 /* 423 * For atomic sends we will only do one iteration. 424 */ 425 do { 426 if (so->so_state & SS_CANTSENDMORE) { 427 error = EPIPE; 428 break; 429 } 430 431 if (so->so_error != 0) { 432 mutex_enter(&so->so_lock); 433 error = sogeterr(so, B_TRUE); 434 mutex_exit(&so->so_lock); 435 if (error != 0) 436 break; 437 } 438 439 /* 440 * Send down OOB messages even if the send path is being 441 * flow controlled (assuming the protocol supports OOB data). 442 */ 443 if (flags & MSG_OOB) { 444 if ((so->so_mode & SM_EXDATA) == 0) { 445 error = EOPNOTSUPP; 446 break; 447 } 448 } else if (so->so_snd_qfull) { 449 /* 450 * Need to wait until the protocol is ready to receive 451 * more data for transmission. 452 */ 453 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 454 break; 455 } 456 457 /* 458 * Time to send data to the protocol. We either copy the 459 * data into mblks or pass the uio directly to the protocol. 460 * We decide what to do based on the available down calls. 461 */ 462 if (so->so_downcalls->sd_send_uio != NULL) { 463 error = (*so->so_downcalls->sd_send_uio) 464 (so->so_proto_handle, uiop, msg, cr); 465 if (error != 0) 466 break; 467 } else { 468 /* save the resid in case of failure */ 469 orig_resid = uiop->uio_resid; 470 471 if ((mp = socopyinuio(uiop, 472 so->so_proto_props.sopp_maxpsz, 473 so->so_proto_props.sopp_wroff, 474 so->so_proto_props.sopp_maxblk, 475 so->so_proto_props.sopp_tail, &error)) == NULL) { 476 break; 477 } 478 ASSERT(uiop->uio_resid >= 0); 479 480 error = (*so->so_downcalls->sd_send) 481 (so->so_proto_handle, mp, msg, cr); 482 if (error != 0) { 483 /* 484 * The send failed. We do not have to free the 485 * mblks, because that is the protocol's 486 * responsibility. However, uio_resid must 487 * remain accurate, so adjust that here. 488 */ 489 uiop->uio_resid = orig_resid; 490 break; 491 } 492 } 493 } while (uiop->uio_resid > 0); 494 495 SO_UNBLOCK_FALLBACK(so); 496 497 return (error); 498 } 499 500 int 501 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 502 struct cred *cr, mblk_t **mpp) 503 { 504 int error; 505 boolean_t dontblock; 506 size_t size; 507 mblk_t *mp = *mpp; 508 509 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 510 511 error = 0; 512 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 513 (fflag & (FNONBLOCK|FNDELAY)); 514 size = msgdsize(mp); 515 516 if ((so->so_mode & SM_SENDFILESUPP) == 0 || 517 so->so_downcalls->sd_send == NULL) { 518 SO_UNBLOCK_FALLBACK(so); 519 return (EOPNOTSUPP); 520 } 521 522 if ((so->so_mode & SM_ATOMIC) && 523 size > so->so_proto_props.sopp_maxpsz && 524 so->so_proto_props.sopp_maxpsz != -1) { 525 SO_UNBLOCK_FALLBACK(so); 526 return (EMSGSIZE); 527 } 528 529 while (mp != NULL) { 530 mblk_t *nmp, *last_mblk; 531 size_t mlen; 532 533 if (so->so_state & SS_CANTSENDMORE) { 534 error = EPIPE; 535 break; 536 } 537 if (so->so_error != 0) { 538 mutex_enter(&so->so_lock); 539 error = sogeterr(so, B_TRUE); 540 mutex_exit(&so->so_lock); 541 if (error != 0) 542 break; 543 } 544 if (so->so_snd_qfull) { 545 /* 546 * Need to wait until the protocol is ready to receive 547 * more data for transmission. 548 */ 549 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 550 break; 551 } 552 553 /* 554 * We only allow so_maxpsz of data to be sent down to 555 * the protocol at time. 556 */ 557 mlen = MBLKL(mp); 558 nmp = mp->b_cont; 559 last_mblk = mp; 560 while (nmp != NULL) { 561 mlen += MBLKL(nmp); 562 if (mlen > so->so_proto_props.sopp_maxpsz) { 563 last_mblk->b_cont = NULL; 564 break; 565 } 566 last_mblk = nmp; 567 nmp = nmp->b_cont; 568 } 569 570 error = (*so->so_downcalls->sd_send) 571 (so->so_proto_handle, mp, msg, cr); 572 if (error != 0) { 573 /* 574 * The send failed. The protocol will free the mblks 575 * that were sent down. Let the caller deal with the 576 * rest. 577 */ 578 *mpp = nmp; 579 break; 580 } 581 582 *mpp = mp = nmp; 583 } 584 585 SO_UNBLOCK_FALLBACK(so); 586 587 return (error); 588 } 589 590 int 591 so_shutdown(struct sonode *so, int how, struct cred *cr) 592 { 593 int error; 594 595 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 596 597 /* 598 * SunOS 4.X has no check for datagram sockets. 599 * 5.X checks that it is connected (ENOTCONN) 600 * X/Open requires that we check the connected state. 601 */ 602 if (!(so->so_state & SS_ISCONNECTED)) { 603 if (!xnet_skip_checks) { 604 error = ENOTCONN; 605 if (xnet_check_print) { 606 printf("sockfs: X/Open shutdown check " 607 "caused ENOTCONN\n"); 608 } 609 } 610 goto done; 611 } 612 613 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 614 how, cr)); 615 616 /* 617 * Protocol agreed to shutdown. We need to flush the 618 * receive buffer if the receive side is being shutdown. 619 */ 620 if (error == 0 && how != SHUT_WR) { 621 mutex_enter(&so->so_lock); 622 /* wait for active reader to finish */ 623 (void) so_lock_read(so, 0); 624 625 so_rcv_flush(so); 626 627 so_unlock_read(so); 628 mutex_exit(&so->so_lock); 629 } 630 631 done: 632 SO_UNBLOCK_FALLBACK(so); 633 return (error); 634 } 635 636 int 637 so_getsockname(struct sonode *so, struct sockaddr *addr, 638 socklen_t *addrlen, struct cred *cr) 639 { 640 int error; 641 642 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 643 644 error = (*so->so_downcalls->sd_getsockname) 645 (so->so_proto_handle, addr, addrlen, cr); 646 647 SO_UNBLOCK_FALLBACK(so); 648 return (error); 649 } 650 651 int 652 so_getpeername(struct sonode *so, struct sockaddr *addr, 653 socklen_t *addrlen, boolean_t accept, struct cred *cr) 654 { 655 int error; 656 657 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 658 659 if (accept) { 660 error = (*so->so_downcalls->sd_getpeername) 661 (so->so_proto_handle, addr, addrlen, cr); 662 } else if (!(so->so_state & SS_ISCONNECTED)) { 663 error = ENOTCONN; 664 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 665 /* Added this check for X/Open */ 666 error = EINVAL; 667 if (xnet_check_print) { 668 printf("sockfs: X/Open getpeername check => EINVAL\n"); 669 } 670 } else { 671 error = (*so->so_downcalls->sd_getpeername) 672 (so->so_proto_handle, addr, addrlen, cr); 673 } 674 675 SO_UNBLOCK_FALLBACK(so); 676 return (error); 677 } 678 679 int 680 so_getsockopt(struct sonode *so, int level, int option_name, 681 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 682 { 683 int error = 0; 684 685 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 686 SO_BLOCK_FALLBACK(so, 687 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 688 689 error = socket_getopt_common(so, level, option_name, optval, optlenp, 690 flags); 691 if (error < 0) { 692 error = (*so->so_downcalls->sd_getsockopt) 693 (so->so_proto_handle, level, option_name, optval, optlenp, 694 cr); 695 if (error == ENOPROTOOPT) { 696 if (level == SOL_SOCKET) { 697 /* 698 * If a protocol does not support a particular 699 * socket option, set can fail (not allowed) 700 * but get can not fail. This is the previous 701 * sockfs bahvior. 702 */ 703 switch (option_name) { 704 case SO_LINGER: 705 if (*optlenp < (t_uscalar_t) 706 sizeof (struct linger)) { 707 error = EINVAL; 708 break; 709 } 710 error = 0; 711 bzero(optval, sizeof (struct linger)); 712 *optlenp = sizeof (struct linger); 713 break; 714 case SO_RCVTIMEO: 715 case SO_SNDTIMEO: 716 if (*optlenp < (t_uscalar_t) 717 sizeof (struct timeval)) { 718 error = EINVAL; 719 break; 720 } 721 error = 0; 722 bzero(optval, sizeof (struct timeval)); 723 *optlenp = sizeof (struct timeval); 724 break; 725 case SO_SND_BUFINFO: 726 if (*optlenp < (t_uscalar_t) 727 sizeof (struct so_snd_bufinfo)) { 728 error = EINVAL; 729 break; 730 } 731 error = 0; 732 bzero(optval, 733 sizeof (struct so_snd_bufinfo)); 734 *optlenp = 735 sizeof (struct so_snd_bufinfo); 736 break; 737 case SO_DEBUG: 738 case SO_REUSEADDR: 739 case SO_KEEPALIVE: 740 case SO_DONTROUTE: 741 case SO_BROADCAST: 742 case SO_USELOOPBACK: 743 case SO_OOBINLINE: 744 case SO_DGRAM_ERRIND: 745 case SO_SNDBUF: 746 case SO_RCVBUF: 747 error = 0; 748 *((int32_t *)optval) = 0; 749 *optlenp = sizeof (int32_t); 750 break; 751 default: 752 break; 753 } 754 } 755 } 756 } 757 758 SO_UNBLOCK_FALLBACK(so); 759 return (error); 760 } 761 762 int 763 so_setsockopt(struct sonode *so, int level, int option_name, 764 const void *optval, socklen_t optlen, struct cred *cr) 765 { 766 int error = 0; 767 struct timeval tl; 768 const void *opt = optval; 769 770 SO_BLOCK_FALLBACK(so, 771 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 772 773 /* X/Open requires this check */ 774 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 775 SO_UNBLOCK_FALLBACK(so); 776 if (xnet_check_print) 777 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 778 return (EINVAL); 779 } 780 781 if (level == SOL_SOCKET) { 782 switch (option_name) { 783 case SO_RCVTIMEO: 784 case SO_SNDTIMEO: { 785 /* 786 * We pass down these two options to protocol in order 787 * to support some third part protocols which need to 788 * know them. For those protocols which don't care 789 * these two options, simply return 0. 790 */ 791 clock_t t_usec; 792 793 if (get_udatamodel() == DATAMODEL_NONE || 794 get_udatamodel() == DATAMODEL_NATIVE) { 795 if (optlen != sizeof (struct timeval)) { 796 error = EINVAL; 797 goto done; 798 } 799 bcopy((struct timeval *)optval, &tl, 800 sizeof (struct timeval)); 801 } else { 802 if (optlen != sizeof (struct timeval32)) { 803 error = EINVAL; 804 goto done; 805 } 806 TIMEVAL32_TO_TIMEVAL(&tl, 807 (struct timeval32 *)optval); 808 } 809 opt = &tl; 810 optlen = sizeof (tl); 811 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 812 mutex_enter(&so->so_lock); 813 if (option_name == SO_RCVTIMEO) 814 so->so_rcvtimeo = drv_usectohz(t_usec); 815 else 816 so->so_sndtimeo = drv_usectohz(t_usec); 817 mutex_exit(&so->so_lock); 818 break; 819 } 820 case SO_RCVBUF: 821 /* 822 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 823 * sockfs since the transport might adjust the value 824 * and not return exactly what was set by the 825 * application. 826 */ 827 so->so_xpg_rcvbuf = *(int32_t *)optval; 828 break; 829 } 830 } 831 error = (*so->so_downcalls->sd_setsockopt) 832 (so->so_proto_handle, level, option_name, opt, optlen, cr); 833 done: 834 SO_UNBLOCK_FALLBACK(so); 835 return (error); 836 } 837 838 int 839 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 840 struct cred *cr, int32_t *rvalp) 841 { 842 int error = 0; 843 844 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 845 846 /* 847 * If there is a pending error, return error 848 * This can happen if a non blocking operation caused an error. 849 */ 850 if (so->so_error != 0) { 851 mutex_enter(&so->so_lock); 852 error = sogeterr(so, B_TRUE); 853 mutex_exit(&so->so_lock); 854 if (error != 0) 855 goto done; 856 } 857 858 /* 859 * calling strioc can result in the socket falling back to TPI, 860 * if that is supported. 861 */ 862 if ((error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 863 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 864 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 865 cmd, arg, mode, rvalp, cr); 866 } 867 868 done: 869 SO_UNBLOCK_FALLBACK(so); 870 871 return (error); 872 } 873 874 int 875 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 876 struct pollhead **phpp) 877 { 878 int state = so->so_state; 879 *reventsp = 0; 880 881 if (so->so_error != 0 && 882 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 883 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 884 return (0); 885 } 886 887 /* 888 * As long as there is buffer to send data, and the socket is 889 * in a state where it can send data (i.e., connected for 890 * connection oriented protocols), then turn on POLLOUT events 891 */ 892 if (!so->so_snd_qfull && ((so->so_mode & SM_CONNREQUIRED) == 0 || 893 state & SS_ISCONNECTED)) { 894 *reventsp |= POLLOUT & events; 895 } 896 897 /* 898 * Turn on POLLIN whenever there is data on the receive queue, 899 * or the socket is in a state where no more data will be received. 900 * Also, if the socket is accepting connections, flip the bit if 901 * there is something on the queue. 902 * 903 * We do an initial check for events without holding locks. However, 904 * if there are no event available, then we redo the check for POLLIN 905 * events under the lock. 906 */ 907 908 /* Pending connections */ 909 if (so->so_acceptq_len > 0) 910 *reventsp |= (POLLIN|POLLRDNORM) & events; 911 912 /* Data */ 913 /* so_downcalls is null for sctp */ 914 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 915 *reventsp |= (*so->so_downcalls->sd_poll) 916 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 917 CRED()) & events; 918 ASSERT((*reventsp & ~events) == 0); 919 /* do not recheck events */ 920 events &= ~SO_PROTO_POLLEV; 921 } else { 922 if (SO_HAVE_DATA(so)) 923 *reventsp |= (POLLIN|POLLRDNORM) & events; 924 925 /* Urgent data */ 926 if ((state & SS_OOBPEND) != 0) 927 *reventsp |= (POLLRDBAND) & events; 928 } 929 930 if (!*reventsp && !anyyet) { 931 /* Check for read events again, but this time under lock */ 932 if (events & (POLLIN|POLLRDNORM)) { 933 mutex_enter(&so->so_lock); 934 if (SO_HAVE_DATA(so) || so->so_acceptq_len > 0) { 935 mutex_exit(&so->so_lock); 936 *reventsp |= (POLLIN|POLLRDNORM) & events; 937 return (0); 938 } else { 939 so->so_pollev |= SO_POLLEV_IN; 940 mutex_exit(&so->so_lock); 941 } 942 } 943 *phpp = &so->so_poll_list; 944 } 945 return (0); 946 } 947 948 /* 949 * Generic Upcalls 950 */ 951 void 952 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 953 cred_t *peer_cred, pid_t peer_cpid) 954 { 955 struct sonode *so = (struct sonode *)sock_handle; 956 957 mutex_enter(&so->so_lock); 958 ASSERT(so->so_proto_handle != NULL); 959 960 if (peer_cred != NULL) { 961 if (so->so_peercred != NULL) 962 crfree(so->so_peercred); 963 crhold(peer_cred); 964 so->so_peercred = peer_cred; 965 so->so_cpid = peer_cpid; 966 } 967 968 so->so_proto_connid = id; 969 soisconnected(so); 970 /* 971 * Wake ones who're waiting for conn to become established. 972 */ 973 so_notify_connected(so); 974 } 975 976 int 977 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 978 { 979 struct sonode *so = (struct sonode *)sock_handle; 980 981 mutex_enter(&so->so_lock); 982 983 so->so_proto_connid = id; 984 soisdisconnected(so, error); 985 so_notify_disconnected(so, error); 986 987 return (0); 988 } 989 990 void 991 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 992 uintptr_t arg) 993 { 994 struct sonode *so = (struct sonode *)sock_handle; 995 996 switch (action) { 997 case SOCK_OPCTL_SHUT_SEND: 998 mutex_enter(&so->so_lock); 999 socantsendmore(so); 1000 so_notify_disconnecting(so); 1001 break; 1002 case SOCK_OPCTL_SHUT_RECV: { 1003 mutex_enter(&so->so_lock); 1004 socantrcvmore(so); 1005 so_notify_eof(so); 1006 break; 1007 } 1008 case SOCK_OPCTL_ENAB_ACCEPT: 1009 mutex_enter(&so->so_lock); 1010 so->so_state |= SS_ACCEPTCONN; 1011 so->so_backlog = (unsigned int)arg; 1012 mutex_exit(&so->so_lock); 1013 break; 1014 default: 1015 ASSERT(0); 1016 break; 1017 } 1018 } 1019 1020 void 1021 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1022 { 1023 struct sonode *so = (struct sonode *)sock_handle; 1024 1025 if (qfull) { 1026 so_snd_qfull(so); 1027 } else { 1028 so_snd_qnotfull(so); 1029 mutex_enter(&so->so_lock); 1030 so_notify_writable(so); 1031 } 1032 } 1033 1034 sock_upper_handle_t 1035 so_newconn(sock_upper_handle_t parenthandle, 1036 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1037 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1038 { 1039 struct sonode *so = (struct sonode *)parenthandle; 1040 struct sonode *nso; 1041 int error; 1042 1043 ASSERT(proto_handle != NULL); 1044 1045 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1046 so->so_acceptq_len >= so->so_backlog) 1047 return (NULL); 1048 1049 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1050 &error); 1051 if (nso == NULL) 1052 return (NULL); 1053 1054 if (peer_cred != NULL) { 1055 crhold(peer_cred); 1056 nso->so_peercred = peer_cred; 1057 nso->so_cpid = peer_cpid; 1058 } 1059 1060 (void) so_acceptq_enqueue(so, nso); 1061 mutex_enter(&so->so_lock); 1062 so_notify_newconn(so); 1063 1064 *sock_upcallsp = &so_upcalls; 1065 1066 return ((sock_upper_handle_t)nso); 1067 } 1068 1069 void 1070 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1071 { 1072 struct sonode *so; 1073 1074 so = (struct sonode *)sock_handle; 1075 1076 mutex_enter(&so->so_lock); 1077 1078 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1079 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1080 if (soppp->sopp_flags & SOCKOPT_WROFF) 1081 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1082 if (soppp->sopp_flags & SOCKOPT_TAIL) 1083 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1084 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1085 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1086 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1087 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1088 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1089 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1090 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1091 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1092 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1093 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1094 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1095 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1096 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1097 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1098 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1099 } 1100 1101 if (soppp->sopp_zcopyflag & COPYCACHED) { 1102 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1103 } 1104 } 1105 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1106 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1107 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1108 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1109 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1110 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1111 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1112 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1113 1114 mutex_exit(&so->so_lock); 1115 1116 #ifdef DEBUG 1117 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1118 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1119 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1120 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ); 1121 ASSERT(soppp->sopp_flags == 0); 1122 #endif 1123 } 1124 1125 /* ARGSUSED */ 1126 ssize_t 1127 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1128 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1129 { 1130 struct sonode *so = (struct sonode *)sock_handle; 1131 boolean_t force_push = B_TRUE; 1132 int space_left; 1133 sodirect_t *sodp = so->so_direct; 1134 1135 ASSERT(errorp != NULL); 1136 *errorp = 0; 1137 if (mp == NULL) { 1138 if (msg_size > 0) { 1139 ASSERT(so->so_downcalls->sd_recv_uio != NULL); 1140 mutex_enter(&so->so_lock); 1141 /* the notify functions will drop the lock */ 1142 if (flags & MSG_OOB) 1143 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1144 else 1145 so_notify_data(so, msg_size); 1146 return (0); 1147 } 1148 /* 1149 * recv space check 1150 */ 1151 mutex_enter(&so->so_lock); 1152 space_left = so->so_rcvbuf - so->so_rcv_queued; 1153 if (space_left <= 0) { 1154 so->so_flowctrld = B_TRUE; 1155 *errorp = ENOSPC; 1156 space_left = -1; 1157 } 1158 goto done_unlock; 1159 } 1160 1161 ASSERT(mp->b_next == NULL); 1162 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1163 ASSERT(msg_size == msgdsize(mp)); 1164 1165 if (flags & MSG_OOB) { 1166 so_queue_oob(sock_handle, mp, msg_size); 1167 return (0); 1168 } 1169 1170 if (force_pushp != NULL) 1171 force_push = *force_pushp; 1172 1173 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1174 /* The read pointer is not aligned correctly for TPI */ 1175 zcmn_err(getzoneid(), CE_WARN, 1176 "sockfs: Unaligned TPI message received. rptr = %p\n", 1177 (void *)mp->b_rptr); 1178 freemsg(mp); 1179 mutex_enter(sodp->sod_lockp); 1180 SOD_UIOAFINI(sodp); 1181 mutex_exit(sodp->sod_lockp); 1182 1183 return (so->so_rcvbuf - so->so_rcv_queued); 1184 } 1185 1186 mutex_enter(&so->so_lock); 1187 if (so->so_state & (SS_FALLBACK_PENDING | SS_FALLBACK_COMP)) { 1188 SOD_DISABLE(sodp); 1189 mutex_exit(&so->so_lock); 1190 *errorp = EOPNOTSUPP; 1191 return (-1); 1192 } 1193 if (so->so_state & SS_CANTRCVMORE) { 1194 freemsg(mp); 1195 SOD_DISABLE(sodp); 1196 mutex_exit(&so->so_lock); 1197 return (0); 1198 } 1199 1200 /* process the mblk via I/OAT if capable */ 1201 if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { 1202 if (DB_TYPE(mp) == M_DATA) { 1203 (void) sod_uioa_mblk_init(sodp, mp, msg_size); 1204 } else { 1205 SOD_UIOAFINI(sodp); 1206 } 1207 } 1208 1209 if (mp->b_next == NULL) { 1210 so_enqueue_msg(so, mp, msg_size); 1211 } else { 1212 do { 1213 mblk_t *nmp; 1214 1215 if ((nmp = mp->b_next) != NULL) { 1216 mp->b_next = NULL; 1217 } 1218 so_enqueue_msg(so, mp, msgdsize(mp)); 1219 mp = nmp; 1220 } while (mp != NULL); 1221 } 1222 1223 space_left = so->so_rcvbuf - so->so_rcv_queued; 1224 if (space_left <= 0) { 1225 so->so_flowctrld = B_TRUE; 1226 *errorp = ENOSPC; 1227 space_left = -1; 1228 } 1229 1230 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1231 so->so_rcv_queued >= so->so_rcv_wanted || 1232 (sodp != NULL && so->so_rcv_queued >= sodp->sod_want)) { 1233 SOCKET_TIMER_CANCEL(so); 1234 /* 1235 * so_notify_data will release the lock 1236 */ 1237 so_notify_data(so, so->so_rcv_queued); 1238 1239 if (force_pushp != NULL) 1240 *force_pushp = B_TRUE; 1241 goto done; 1242 } else if (so->so_rcv_timer_tid == 0) { 1243 /* Make sure the recv push timer is running */ 1244 SOCKET_TIMER_START(so); 1245 } 1246 1247 done_unlock: 1248 mutex_exit(&so->so_lock); 1249 done: 1250 return (space_left); 1251 } 1252 1253 /* 1254 * Set the offset of where the oob data is relative to the bytes in 1255 * queued. Also generate SIGURG 1256 */ 1257 void 1258 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1259 { 1260 struct sonode *so; 1261 1262 ASSERT(offset >= 0); 1263 so = (struct sonode *)sock_handle; 1264 mutex_enter(&so->so_lock); 1265 SOD_UIOAFINI(so->so_direct); 1266 1267 /* 1268 * New urgent data on the way so forget about any old 1269 * urgent data. 1270 */ 1271 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1272 1273 /* 1274 * Record that urgent data is pending. 1275 */ 1276 so->so_state |= SS_OOBPEND; 1277 1278 if (so->so_oobmsg != NULL) { 1279 dprintso(so, 1, ("sock: discarding old oob\n")); 1280 freemsg(so->so_oobmsg); 1281 so->so_oobmsg = NULL; 1282 } 1283 1284 /* 1285 * set the offset where the urgent byte is 1286 */ 1287 so->so_oobmark = so->so_rcv_queued + offset; 1288 if (so->so_oobmark == 0) 1289 so->so_state |= SS_RCVATMARK; 1290 else 1291 so->so_state &= ~SS_RCVATMARK; 1292 1293 so_notify_oobsig(so); 1294 } 1295 1296 /* 1297 * Queue the OOB byte 1298 */ 1299 static void 1300 so_queue_oob(sock_upper_handle_t sock_handle, mblk_t *mp, size_t len) 1301 { 1302 struct sonode *so; 1303 1304 so = (struct sonode *)sock_handle; 1305 mutex_enter(&so->so_lock); 1306 SOD_UIOAFINI(so->so_direct); 1307 1308 ASSERT(mp != NULL); 1309 if (!IS_SO_OOB_INLINE(so)) { 1310 so->so_oobmsg = mp; 1311 so->so_state |= SS_HAVEOOBDATA; 1312 } else { 1313 so_enqueue_msg(so, mp, len); 1314 } 1315 1316 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1317 } 1318 1319 int 1320 so_close(struct sonode *so, int flag, struct cred *cr) 1321 { 1322 int error; 1323 1324 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1325 1326 /* 1327 * At this point there will be no more upcalls from the protocol 1328 */ 1329 mutex_enter(&so->so_lock); 1330 1331 ASSERT(so_verify_oobstate(so)); 1332 1333 so_rcv_flush(so); 1334 mutex_exit(&so->so_lock); 1335 1336 return (error); 1337 } 1338 1339 void 1340 so_zcopy_notify(sock_upper_handle_t sock_handle) 1341 { 1342 struct sonode *so = (struct sonode *)sock_handle; 1343 1344 mutex_enter(&so->so_lock); 1345 so->so_copyflag |= STZCNOTIFY; 1346 cv_broadcast(&so->so_copy_cv); 1347 mutex_exit(&so->so_lock); 1348 } 1349 1350 void 1351 so_set_error(sock_upper_handle_t sock_handle, int error) 1352 { 1353 struct sonode *so = (struct sonode *)sock_handle; 1354 1355 mutex_enter(&so->so_lock); 1356 1357 soseterror(so, error); 1358 1359 so_notify_error(so); 1360 } 1361 1362 /* 1363 * so_recvmsg - read data from the socket 1364 * 1365 * There are two ways of obtaining data; either we ask the protocol to 1366 * copy directly into the supplied buffer, or we copy data from the 1367 * sonode's receive queue. The decision which one to use depends on 1368 * whether the protocol has a sd_recv_uio down call. 1369 */ 1370 int 1371 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1372 struct cred *cr) 1373 { 1374 rval_t rval; 1375 int flags = 0; 1376 t_uscalar_t controllen, namelen; 1377 int error = 0; 1378 int ret; 1379 mblk_t *mctlp = NULL; 1380 union T_primitives *tpr; 1381 void *control; 1382 ssize_t saved_resid; 1383 struct uio *suiop; 1384 1385 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1386 1387 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1388 (so->so_mode & SM_CONNREQUIRED)) { 1389 SO_UNBLOCK_FALLBACK(so); 1390 return (ENOTCONN); 1391 } 1392 1393 if (msg->msg_flags & MSG_PEEK) 1394 msg->msg_flags &= ~MSG_WAITALL; 1395 1396 if (so->so_mode & SM_ATOMIC) 1397 msg->msg_flags |= MSG_TRUNC; 1398 1399 if (msg->msg_flags & MSG_OOB) { 1400 if ((so->so_mode & SM_EXDATA) == 0) { 1401 error = EOPNOTSUPP; 1402 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1403 error = (*so->so_downcalls->sd_recv_uio) 1404 (so->so_proto_handle, uiop, msg, cr); 1405 } else { 1406 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1407 IS_SO_OOB_INLINE(so)); 1408 } 1409 SO_UNBLOCK_FALLBACK(so); 1410 return (error); 1411 } 1412 1413 /* 1414 * If the protocol has the recv down call, then pass the request 1415 * down. 1416 */ 1417 if (so->so_downcalls->sd_recv_uio != NULL) { 1418 error = (*so->so_downcalls->sd_recv_uio) 1419 (so->so_proto_handle, uiop, msg, cr); 1420 SO_UNBLOCK_FALLBACK(so); 1421 return (error); 1422 } 1423 1424 /* 1425 * Reading data from the socket buffer 1426 */ 1427 flags = msg->msg_flags; 1428 msg->msg_flags = 0; 1429 1430 /* 1431 * Set msg_controllen and msg_namelen to zero here to make it 1432 * simpler in the cases that no control or name is returned. 1433 */ 1434 controllen = msg->msg_controllen; 1435 namelen = msg->msg_namelen; 1436 msg->msg_controllen = 0; 1437 msg->msg_namelen = 0; 1438 1439 mutex_enter(&so->so_lock); 1440 /* Set SOREADLOCKED */ 1441 error = so_lock_read_intr(so, 1442 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1443 mutex_exit(&so->so_lock); 1444 if (error) { 1445 SO_UNBLOCK_FALLBACK(so); 1446 return (error); 1447 } 1448 1449 suiop = sod_rcv_init(so, flags, &uiop); 1450 retry: 1451 saved_resid = uiop->uio_resid; 1452 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1453 if (error != 0) { 1454 goto out; 1455 } 1456 /* 1457 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1458 * For non-datagrams MOREDATA is used to set MSG_EOR. 1459 */ 1460 ASSERT(!(rval.r_val1 & MORECTL)); 1461 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1462 msg->msg_flags |= MSG_TRUNC; 1463 if (mctlp == NULL) { 1464 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1465 1466 mutex_enter(&so->so_lock); 1467 /* Set MSG_EOR based on MOREDATA */ 1468 if (!(rval.r_val1 & MOREDATA)) { 1469 if (so->so_state & SS_SAVEDEOR) { 1470 msg->msg_flags |= MSG_EOR; 1471 so->so_state &= ~SS_SAVEDEOR; 1472 } 1473 } 1474 /* 1475 * If some data was received (i.e. not EOF) and the 1476 * read/recv* has not been satisfied wait for some more. 1477 */ 1478 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1479 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1480 mutex_exit(&so->so_lock); 1481 goto retry; 1482 } 1483 1484 goto out_locked; 1485 } 1486 /* strsock_proto has already verified length and alignment */ 1487 tpr = (union T_primitives *)mctlp->b_rptr; 1488 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1489 switch (tpr->type) { 1490 case T_DATA_IND: { 1491 /* 1492 * Set msg_flags to MSG_EOR based on 1493 * MORE_flag and MOREDATA. 1494 */ 1495 mutex_enter(&so->so_lock); 1496 so->so_state &= ~SS_SAVEDEOR; 1497 if (!(tpr->data_ind.MORE_flag & 1)) { 1498 if (!(rval.r_val1 & MOREDATA)) 1499 msg->msg_flags |= MSG_EOR; 1500 else 1501 so->so_state |= SS_SAVEDEOR; 1502 } 1503 freemsg(mctlp); 1504 /* 1505 * If some data was received (i.e. not EOF) and the 1506 * read/recv* has not been satisfied wait for some more. 1507 */ 1508 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1509 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1510 mutex_exit(&so->so_lock); 1511 goto retry; 1512 } 1513 goto out_locked; 1514 } 1515 case T_UNITDATA_IND: { 1516 void *addr; 1517 t_uscalar_t addrlen; 1518 void *abuf; 1519 t_uscalar_t optlen; 1520 void *opt; 1521 1522 if (namelen != 0) { 1523 /* Caller wants source address */ 1524 addrlen = tpr->unitdata_ind.SRC_length; 1525 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1526 addrlen, 1); 1527 if (addr == NULL) { 1528 freemsg(mctlp); 1529 error = EPROTO; 1530 eprintsoline(so, error); 1531 goto out; 1532 } 1533 ASSERT(so->so_family != AF_UNIX); 1534 } 1535 optlen = tpr->unitdata_ind.OPT_length; 1536 if (optlen != 0) { 1537 t_uscalar_t ncontrollen; 1538 1539 /* 1540 * Extract any source address option. 1541 * Determine how large cmsg buffer is needed. 1542 */ 1543 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1544 optlen, __TPI_ALIGN_SIZE); 1545 1546 if (opt == NULL) { 1547 freemsg(mctlp); 1548 error = EPROTO; 1549 eprintsoline(so, error); 1550 goto out; 1551 } 1552 if (so->so_family == AF_UNIX) 1553 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1554 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1555 !(flags & MSG_XPG4_2)); 1556 if (controllen != 0) 1557 controllen = ncontrollen; 1558 else if (ncontrollen != 0) 1559 msg->msg_flags |= MSG_CTRUNC; 1560 } else { 1561 controllen = 0; 1562 } 1563 1564 if (namelen != 0) { 1565 /* 1566 * Return address to caller. 1567 * Caller handles truncation if length 1568 * exceeds msg_namelen. 1569 * NOTE: AF_UNIX NUL termination is ensured by 1570 * the sender's copyin_name(). 1571 */ 1572 abuf = kmem_alloc(addrlen, KM_SLEEP); 1573 1574 bcopy(addr, abuf, addrlen); 1575 msg->msg_name = abuf; 1576 msg->msg_namelen = addrlen; 1577 } 1578 1579 if (controllen != 0) { 1580 /* 1581 * Return control msg to caller. 1582 * Caller handles truncation if length 1583 * exceeds msg_controllen. 1584 */ 1585 control = kmem_zalloc(controllen, KM_SLEEP); 1586 1587 error = so_opt2cmsg(mctlp, opt, optlen, 1588 !(flags & MSG_XPG4_2), control, controllen); 1589 if (error) { 1590 freemsg(mctlp); 1591 if (msg->msg_namelen != 0) 1592 kmem_free(msg->msg_name, 1593 msg->msg_namelen); 1594 kmem_free(control, controllen); 1595 eprintsoline(so, error); 1596 goto out; 1597 } 1598 msg->msg_control = control; 1599 msg->msg_controllen = controllen; 1600 } 1601 1602 freemsg(mctlp); 1603 goto out; 1604 } 1605 case T_OPTDATA_IND: { 1606 struct T_optdata_req *tdr; 1607 void *opt; 1608 t_uscalar_t optlen; 1609 1610 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1611 optlen = tdr->OPT_length; 1612 if (optlen != 0) { 1613 t_uscalar_t ncontrollen; 1614 /* 1615 * Determine how large cmsg buffer is needed. 1616 */ 1617 opt = sogetoff(mctlp, 1618 tpr->optdata_ind.OPT_offset, optlen, 1619 __TPI_ALIGN_SIZE); 1620 1621 if (opt == NULL) { 1622 freemsg(mctlp); 1623 error = EPROTO; 1624 eprintsoline(so, error); 1625 goto out; 1626 } 1627 1628 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1629 !(flags & MSG_XPG4_2)); 1630 if (controllen != 0) 1631 controllen = ncontrollen; 1632 else if (ncontrollen != 0) 1633 msg->msg_flags |= MSG_CTRUNC; 1634 } else { 1635 controllen = 0; 1636 } 1637 1638 if (controllen != 0) { 1639 /* 1640 * Return control msg to caller. 1641 * Caller handles truncation if length 1642 * exceeds msg_controllen. 1643 */ 1644 control = kmem_zalloc(controllen, KM_SLEEP); 1645 1646 error = so_opt2cmsg(mctlp, opt, optlen, 1647 !(flags & MSG_XPG4_2), control, controllen); 1648 if (error) { 1649 freemsg(mctlp); 1650 kmem_free(control, controllen); 1651 eprintsoline(so, error); 1652 goto out; 1653 } 1654 msg->msg_control = control; 1655 msg->msg_controllen = controllen; 1656 } 1657 1658 /* 1659 * Set msg_flags to MSG_EOR based on 1660 * DATA_flag and MOREDATA. 1661 */ 1662 mutex_enter(&so->so_lock); 1663 so->so_state &= ~SS_SAVEDEOR; 1664 if (!(tpr->data_ind.MORE_flag & 1)) { 1665 if (!(rval.r_val1 & MOREDATA)) 1666 msg->msg_flags |= MSG_EOR; 1667 else 1668 so->so_state |= SS_SAVEDEOR; 1669 } 1670 freemsg(mctlp); 1671 /* 1672 * If some data was received (i.e. not EOF) and the 1673 * read/recv* has not been satisfied wait for some more. 1674 * Not possible to wait if control info was received. 1675 */ 1676 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1677 controllen == 0 && 1678 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1679 mutex_exit(&so->so_lock); 1680 goto retry; 1681 } 1682 goto out_locked; 1683 } 1684 default: 1685 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1686 tpr->type); 1687 freemsg(mctlp); 1688 error = EPROTO; 1689 ASSERT(0); 1690 } 1691 out: 1692 mutex_enter(&so->so_lock); 1693 out_locked: 1694 /* The sod_lockp pointers to the sonode so_lock */ 1695 ret = sod_rcv_done(so, suiop, uiop); 1696 if (ret != 0 && error == 0) 1697 error = ret; 1698 1699 so_unlock_read(so); /* Clear SOREADLOCKED */ 1700 mutex_exit(&so->so_lock); 1701 1702 SO_UNBLOCK_FALLBACK(so); 1703 1704 return (error); 1705 } 1706 1707 sonodeops_t so_sonodeops = { 1708 so_init, /* sop_init */ 1709 so_accept, /* sop_accept */ 1710 so_bind, /* sop_bind */ 1711 so_listen, /* sop_listen */ 1712 so_connect, /* sop_connect */ 1713 so_recvmsg, /* sop_recvmsg */ 1714 so_sendmsg, /* sop_sendmsg */ 1715 so_sendmblk, /* sop_sendmblk */ 1716 so_getpeername, /* sop_getpeername */ 1717 so_getsockname, /* sop_getsockname */ 1718 so_shutdown, /* sop_shutdown */ 1719 so_getsockopt, /* sop_getsockopt */ 1720 so_setsockopt, /* sop_setsockopt */ 1721 so_ioctl, /* sop_ioctl */ 1722 so_poll, /* sop_poll */ 1723 so_close, /* sop_close */ 1724 }; 1725 1726 sock_upcalls_t so_upcalls = { 1727 so_newconn, 1728 so_connected, 1729 so_disconnected, 1730 so_opctl, 1731 so_queue_msg, 1732 so_set_prop, 1733 so_txq_full, 1734 so_signal_oob, 1735 so_zcopy_notify, 1736 so_set_error 1737 }; 1738