1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2015, Joyent, Inc. All rights reserved. 28 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. 29 * Copyright 2022 Garrett D'Amore 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/debug.h> 37 #include <sys/cmn_err.h> 38 39 #include <sys/stropts.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 43 #define _SUN_TPI_VERSION 2 44 #include <sys/tihdr.h> 45 #include <sys/sockio.h> 46 #include <sys/kmem_impl.h> 47 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <netinet/in.h> 52 #include <inet/ip.h> 53 54 #include <fs/sockfs/sockcommon.h> 55 #include <fs/sockfs/sockfilter_impl.h> 56 57 #include <sys/socket_proto.h> 58 59 #include <fs/sockfs/socktpi_impl.h> 60 #include <fs/sockfs/sodirect.h> 61 #include <sys/tihdr.h> 62 63 extern int xnet_skip_checks; 64 extern int xnet_check_print; 65 66 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 67 68 69 /*ARGSUSED*/ 70 int 71 so_accept_notsupp(struct sonode *lso, int fflag, 72 struct cred *cr, struct sonode **nsop) 73 { 74 return (EOPNOTSUPP); 75 } 76 77 /*ARGSUSED*/ 78 int 79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 80 { 81 return (EOPNOTSUPP); 82 } 83 84 /*ARGSUSED*/ 85 int 86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 87 socklen_t *len, struct cred *cr) 88 { 89 return (EOPNOTSUPP); 90 } 91 92 /*ARGSUSED*/ 93 int 94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 95 socklen_t *addrlen, boolean_t accept, struct cred *cr) 96 { 97 return (EOPNOTSUPP); 98 } 99 100 /*ARGSUSED*/ 101 int 102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 103 { 104 return (EOPNOTSUPP); 105 } 106 107 /*ARGSUSED*/ 108 int 109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 110 struct cred *cr, mblk_t **mpp) 111 { 112 return (EOPNOTSUPP); 113 } 114 115 /* 116 * Generic Socket Ops 117 */ 118 119 /* ARGSUSED */ 120 int 121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 122 { 123 return (socket_init_common(so, pso, flags, cr)); 124 } 125 126 int 127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 128 int flags, struct cred *cr) 129 { 130 int error; 131 132 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 133 134 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 135 136 /* X/Open requires this check */ 137 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 138 if (xnet_check_print) { 139 printf("sockfs: X/Open bind state check " 140 "caused EINVAL\n"); 141 } 142 error = EINVAL; 143 goto done; 144 } 145 146 /* 147 * a bind to a NULL address is interpreted as unbind. So just 148 * do the downcall. 149 */ 150 if (name == NULL) 151 goto dobind; 152 153 switch (so->so_family) { 154 case AF_INET: 155 if ((size_t)namelen != sizeof (sin_t)) { 156 error = name->sa_family != so->so_family ? 157 EAFNOSUPPORT : EINVAL; 158 eprintsoline(so, error); 159 goto done; 160 } 161 162 if ((flags & _SOBIND_XPG4_2) && 163 (name->sa_family != so->so_family)) { 164 /* 165 * This check has to be made for X/Open 166 * sockets however application failures have 167 * been observed when it is applied to 168 * all sockets. 169 */ 170 error = EAFNOSUPPORT; 171 eprintsoline(so, error); 172 goto done; 173 } 174 /* 175 * Force a zero sa_family to match so_family. 176 * 177 * Some programs like inetd(8) don't set the 178 * family field. Other programs leave 179 * sin_family set to garbage - SunOS 4.X does 180 * not check the family field on a bind. 181 * We use the family field that 182 * was passed in to the socket() call. 183 */ 184 name->sa_family = so->so_family; 185 break; 186 187 case AF_INET6: { 188 #ifdef DEBUG 189 sin6_t *sin6 = (sin6_t *)name; 190 #endif 191 if ((size_t)namelen != sizeof (sin6_t)) { 192 error = name->sa_family != so->so_family ? 193 EAFNOSUPPORT : EINVAL; 194 eprintsoline(so, error); 195 goto done; 196 } 197 198 if (name->sa_family != so->so_family) { 199 /* 200 * With IPv6 we require the family to match 201 * unlike in IPv4. 202 */ 203 error = EAFNOSUPPORT; 204 eprintsoline(so, error); 205 goto done; 206 } 207 #ifdef DEBUG 208 /* 209 * Verify that apps don't forget to clear 210 * sin6_scope_id etc 211 */ 212 if (sin6->sin6_scope_id != 0 && 213 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 214 zcmn_err(getzoneid(), CE_WARN, 215 "bind with uninitialized sin6_scope_id " 216 "(%d) on socket. Pid = %d\n", 217 (int)sin6->sin6_scope_id, 218 (int)curproc->p_pid); 219 } 220 if (sin6->__sin6_src_id != 0) { 221 zcmn_err(getzoneid(), CE_WARN, 222 "bind with uninitialized __sin6_src_id " 223 "(%d) on socket. Pid = %d\n", 224 (int)sin6->__sin6_src_id, 225 (int)curproc->p_pid); 226 } 227 #endif /* DEBUG */ 228 229 break; 230 } 231 default: 232 /* Just pass the request to the protocol */ 233 goto dobind; 234 } 235 236 dobind: 237 if (so->so_filter_active == 0 || 238 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 239 error = (*so->so_downcalls->sd_bind) 240 (so->so_proto_handle, name, namelen, cr); 241 } 242 done: 243 SO_UNBLOCK_FALLBACK(so); 244 245 return (error); 246 } 247 248 int 249 so_listen(struct sonode *so, int backlog, struct cred *cr) 250 { 251 int error = 0; 252 253 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 254 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 255 256 if ((so)->so_filter_active == 0 || 257 (error = sof_filter_listen(so, &backlog, cr)) < 0) 258 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 259 backlog, cr); 260 261 SO_UNBLOCK_FALLBACK(so); 262 263 return (error); 264 } 265 266 267 int 268 so_connect(struct sonode *so, struct sockaddr *name, 269 socklen_t namelen, int fflag, int flags, struct cred *cr) 270 { 271 int error = 0; 272 sock_connid_t id; 273 274 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 275 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 276 277 /* 278 * If there is a pending error, return error 279 * This can happen if a non blocking operation caused an error. 280 */ 281 282 if (so->so_error != 0) { 283 mutex_enter(&so->so_lock); 284 error = sogeterr(so, B_TRUE); 285 mutex_exit(&so->so_lock); 286 if (error != 0) 287 goto done; 288 } 289 290 if (so->so_filter_active == 0 || 291 (error = sof_filter_connect(so, (struct sockaddr *)name, 292 &namelen, cr)) < 0) { 293 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 294 name, namelen, &id, cr); 295 296 if (error == EINPROGRESS) 297 error = so_wait_connected(so, 298 fflag & (FNONBLOCK|FNDELAY), id); 299 } 300 done: 301 SO_UNBLOCK_FALLBACK(so); 302 return (error); 303 } 304 305 /*ARGSUSED*/ 306 int 307 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 308 { 309 int error = 0; 310 struct sonode *nso; 311 312 *nsop = NULL; 313 314 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 315 if ((so->so_state & SS_ACCEPTCONN) == 0) { 316 SO_UNBLOCK_FALLBACK(so); 317 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 318 EOPNOTSUPP : EINVAL); 319 } 320 321 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 322 &nso)) == 0) { 323 ASSERT(nso != NULL); 324 325 /* finish the accept */ 326 if ((so->so_filter_active > 0 && 327 (error = sof_filter_accept(nso, cr)) > 0) || 328 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 329 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 330 (void) socket_close(nso, 0, cr); 331 socket_destroy(nso); 332 } else { 333 *nsop = nso; 334 } 335 } 336 337 SO_UNBLOCK_FALLBACK(so); 338 return (error); 339 } 340 341 int 342 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 343 struct cred *cr) 344 { 345 int error, flags; 346 boolean_t dontblock; 347 ssize_t orig_resid; 348 mblk_t *mp; 349 350 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 351 352 flags = msg->msg_flags; 353 error = 0; 354 dontblock = (flags & MSG_DONTWAIT) || 355 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 356 357 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 358 /* 359 * Old way of passing fd's is not supported 360 */ 361 SO_UNBLOCK_FALLBACK(so); 362 return (EOPNOTSUPP); 363 } 364 365 if ((so->so_mode & SM_ATOMIC) && 366 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 367 so->so_proto_props.sopp_maxpsz != -1) { 368 SO_UNBLOCK_FALLBACK(so); 369 return (EMSGSIZE); 370 } 371 372 /* 373 * For atomic sends we will only do one iteration. 374 */ 375 do { 376 if (so->so_state & SS_CANTSENDMORE) { 377 error = EPIPE; 378 break; 379 } 380 381 if (so->so_error != 0) { 382 mutex_enter(&so->so_lock); 383 error = sogeterr(so, B_TRUE); 384 mutex_exit(&so->so_lock); 385 if (error != 0) 386 break; 387 } 388 389 /* 390 * Send down OOB messages even if the send path is being 391 * flow controlled (assuming the protocol supports OOB data). 392 */ 393 if (flags & MSG_OOB) { 394 if ((so->so_mode & SM_EXDATA) == 0) { 395 error = EOPNOTSUPP; 396 break; 397 } 398 } else if (SO_SND_FLOWCTRLD(so)) { 399 /* 400 * Need to wait until the protocol is ready to receive 401 * more data for transmission. 402 */ 403 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 404 break; 405 } 406 407 /* 408 * Time to send data to the protocol. We either copy the 409 * data into mblks or pass the uio directly to the protocol. 410 * We decide what to do based on the available down calls. 411 */ 412 if (so->so_downcalls->sd_send_uio != NULL) { 413 error = (*so->so_downcalls->sd_send_uio) 414 (so->so_proto_handle, uiop, msg, cr); 415 if (error != 0) 416 break; 417 } else { 418 /* save the resid in case of failure */ 419 orig_resid = uiop->uio_resid; 420 421 if ((mp = socopyinuio(uiop, 422 so->so_proto_props.sopp_maxpsz, 423 so->so_proto_props.sopp_wroff, 424 so->so_proto_props.sopp_maxblk, 425 so->so_proto_props.sopp_tail, &error)) == NULL) { 426 break; 427 } 428 ASSERT(uiop->uio_resid >= 0); 429 430 if (so->so_filter_active > 0 && 431 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 432 &error)) == NULL)) { 433 if (error != 0) 434 break; 435 continue; 436 } 437 error = (*so->so_downcalls->sd_send) 438 (so->so_proto_handle, mp, msg, cr); 439 if (error != 0) { 440 /* 441 * The send failed. We do not have to free the 442 * mblks, because that is the protocol's 443 * responsibility. However, uio_resid must 444 * remain accurate, so adjust that here. 445 */ 446 uiop->uio_resid = orig_resid; 447 break; 448 } 449 } 450 } while (uiop->uio_resid > 0); 451 452 SO_UNBLOCK_FALLBACK(so); 453 454 return (error); 455 } 456 457 int 458 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 459 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 460 boolean_t fil_inject) 461 { 462 int error; 463 boolean_t dontblock; 464 size_t size; 465 mblk_t *mp = *mpp; 466 467 if (so->so_downcalls->sd_send == NULL) 468 return (EOPNOTSUPP); 469 470 error = 0; 471 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 472 (fflag & (FNONBLOCK|FNDELAY)); 473 size = msgdsize(mp); 474 475 if ((so->so_mode & SM_ATOMIC) && 476 size > so->so_proto_props.sopp_maxpsz && 477 so->so_proto_props.sopp_maxpsz != -1) { 478 SO_UNBLOCK_FALLBACK(so); 479 return (EMSGSIZE); 480 } 481 482 while (mp != NULL) { 483 mblk_t *nmp, *last_mblk; 484 size_t mlen; 485 486 if (so->so_state & SS_CANTSENDMORE) { 487 error = EPIPE; 488 break; 489 } 490 if (so->so_error != 0) { 491 mutex_enter(&so->so_lock); 492 error = sogeterr(so, B_TRUE); 493 mutex_exit(&so->so_lock); 494 if (error != 0) 495 break; 496 } 497 /* Socket filters are not flow controlled */ 498 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 499 /* 500 * Need to wait until the protocol is ready to receive 501 * more data for transmission. 502 */ 503 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 504 break; 505 } 506 507 /* 508 * We only allow so_maxpsz of data to be sent down to 509 * the protocol at time. 510 */ 511 mlen = MBLKL(mp); 512 nmp = mp->b_cont; 513 last_mblk = mp; 514 while (nmp != NULL) { 515 mlen += MBLKL(nmp); 516 if (mlen > so->so_proto_props.sopp_maxpsz) { 517 last_mblk->b_cont = NULL; 518 break; 519 } 520 last_mblk = nmp; 521 nmp = nmp->b_cont; 522 } 523 524 if (so->so_filter_active > 0 && 525 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 526 cr, &error)) == NULL) { 527 *mpp = mp = nmp; 528 if (error != 0) 529 break; 530 continue; 531 } 532 error = (*so->so_downcalls->sd_send) 533 (so->so_proto_handle, mp, msg, cr); 534 if (error != 0) { 535 /* 536 * The send failed. The protocol will free the mblks 537 * that were sent down. Let the caller deal with the 538 * rest. 539 */ 540 *mpp = nmp; 541 break; 542 } 543 544 *mpp = mp = nmp; 545 } 546 /* Let the filter know whether the protocol is flow controlled */ 547 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 548 error = ENOSPC; 549 550 return (error); 551 } 552 553 int 554 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 555 struct cred *cr, mblk_t **mpp) 556 { 557 int error; 558 559 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 560 561 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 562 B_FALSE); 563 564 SO_UNBLOCK_FALLBACK(so); 565 566 return (error); 567 } 568 569 int 570 so_shutdown(struct sonode *so, int how, struct cred *cr) 571 { 572 int error = 0; 573 574 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 575 576 /* 577 * SunOS 4.X has no check for datagram sockets. 578 * 5.X checks that it is connected (ENOTCONN) 579 * X/Open requires that we check the connected state. 580 */ 581 if (!(so->so_state & SS_ISCONNECTED)) { 582 if (!xnet_skip_checks) { 583 error = ENOTCONN; 584 if (xnet_check_print) { 585 printf("sockfs: X/Open shutdown check " 586 "caused ENOTCONN\n"); 587 } 588 } 589 goto done; 590 } 591 592 if (so->so_filter_active == 0 || 593 (error = sof_filter_shutdown(so, &how, cr)) < 0) 594 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 595 how, cr)); 596 597 /* 598 * Protocol agreed to shutdown. We need to flush the 599 * receive buffer if the receive side is being shutdown. 600 */ 601 if (error == 0 && how != SHUT_WR) { 602 mutex_enter(&so->so_lock); 603 /* wait for active reader to finish */ 604 (void) so_lock_read(so, 0); 605 606 so_rcv_flush(so); 607 608 so_unlock_read(so); 609 mutex_exit(&so->so_lock); 610 } 611 612 done: 613 SO_UNBLOCK_FALLBACK(so); 614 return (error); 615 } 616 617 int 618 so_getsockname(struct sonode *so, struct sockaddr *addr, 619 socklen_t *addrlen, struct cred *cr) 620 { 621 int error; 622 623 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 624 625 if (so->so_filter_active == 0 || 626 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 627 error = (*so->so_downcalls->sd_getsockname) 628 (so->so_proto_handle, addr, addrlen, cr); 629 630 SO_UNBLOCK_FALLBACK(so); 631 return (error); 632 } 633 634 int 635 so_getpeername(struct sonode *so, struct sockaddr *addr, 636 socklen_t *addrlen, boolean_t accept, struct cred *cr) 637 { 638 int error; 639 640 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 641 642 if (accept) { 643 error = (*so->so_downcalls->sd_getpeername) 644 (so->so_proto_handle, addr, addrlen, cr); 645 } else if (!(so->so_state & SS_ISCONNECTED)) { 646 error = ENOTCONN; 647 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 648 /* Added this check for X/Open */ 649 error = EINVAL; 650 if (xnet_check_print) { 651 printf("sockfs: X/Open getpeername check => EINVAL\n"); 652 } 653 } else if (so->so_filter_active == 0 || 654 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 655 error = (*so->so_downcalls->sd_getpeername) 656 (so->so_proto_handle, addr, addrlen, cr); 657 } 658 659 SO_UNBLOCK_FALLBACK(so); 660 return (error); 661 } 662 663 int 664 so_getsockopt(struct sonode *so, int level, int option_name, 665 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 666 { 667 int error = 0; 668 669 if (level == SOL_FILTER) 670 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 671 672 SO_BLOCK_FALLBACK(so, 673 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 674 675 if ((so->so_filter_active == 0 || 676 (error = sof_filter_getsockopt(so, level, option_name, optval, 677 optlenp, cr)) < 0) && 678 (error = socket_getopt_common(so, level, option_name, optval, 679 optlenp, flags)) < 0) { 680 error = (*so->so_downcalls->sd_getsockopt) 681 (so->so_proto_handle, level, option_name, optval, optlenp, 682 cr); 683 if (error == ENOPROTOOPT) { 684 if (level == SOL_SOCKET) { 685 /* 686 * If a protocol does not support a particular 687 * socket option, set can fail (not allowed) 688 * but get can not fail. This is the previous 689 * sockfs bahvior. 690 */ 691 switch (option_name) { 692 case SO_LINGER: 693 if (*optlenp < (t_uscalar_t) 694 sizeof (struct linger)) { 695 error = EINVAL; 696 break; 697 } 698 error = 0; 699 bzero(optval, sizeof (struct linger)); 700 *optlenp = sizeof (struct linger); 701 break; 702 case SO_RCVTIMEO: 703 case SO_SNDTIMEO: 704 if (*optlenp < (t_uscalar_t) 705 sizeof (struct timeval)) { 706 error = EINVAL; 707 break; 708 } 709 error = 0; 710 bzero(optval, sizeof (struct timeval)); 711 *optlenp = sizeof (struct timeval); 712 break; 713 case SO_SND_BUFINFO: 714 if (*optlenp < (t_uscalar_t) 715 sizeof (struct so_snd_bufinfo)) { 716 error = EINVAL; 717 break; 718 } 719 error = 0; 720 bzero(optval, 721 sizeof (struct so_snd_bufinfo)); 722 *optlenp = 723 sizeof (struct so_snd_bufinfo); 724 break; 725 case SO_DEBUG: 726 case SO_REUSEADDR: 727 case SO_KEEPALIVE: 728 case SO_DONTROUTE: 729 case SO_BROADCAST: 730 case SO_USELOOPBACK: 731 case SO_OOBINLINE: 732 case SO_DGRAM_ERRIND: 733 case SO_SNDBUF: 734 case SO_RCVBUF: 735 error = 0; 736 *((int32_t *)optval) = 0; 737 *optlenp = sizeof (int32_t); 738 break; 739 default: 740 break; 741 } 742 } 743 } 744 } 745 746 SO_UNBLOCK_FALLBACK(so); 747 return (error); 748 } 749 750 int 751 so_setsockopt(struct sonode *so, int level, int option_name, 752 const void *optval, socklen_t optlen, struct cred *cr) 753 { 754 int error = 0; 755 struct timeval tl; 756 const void *opt = optval; 757 758 if (level == SOL_FILTER) 759 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 760 761 SO_BLOCK_FALLBACK(so, 762 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 763 764 /* X/Open requires this check */ 765 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 766 SO_UNBLOCK_FALLBACK(so); 767 if (xnet_check_print) 768 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 769 return (EINVAL); 770 } 771 772 if (so->so_filter_active > 0 && 773 (error = sof_filter_setsockopt(so, level, option_name, 774 (void *)optval, &optlen, cr)) >= 0) 775 goto done; 776 777 if (level == SOL_SOCKET) { 778 switch (option_name) { 779 case SO_RCVTIMEO: 780 case SO_SNDTIMEO: { 781 /* 782 * We pass down these two options to protocol in order 783 * to support some third part protocols which need to 784 * know them. For those protocols which don't care 785 * these two options, simply return 0. 786 */ 787 clock_t t_usec; 788 789 if (get_udatamodel() == DATAMODEL_NONE || 790 get_udatamodel() == DATAMODEL_NATIVE) { 791 if (optlen != sizeof (struct timeval)) { 792 error = EINVAL; 793 goto done; 794 } 795 bcopy((struct timeval *)optval, &tl, 796 sizeof (struct timeval)); 797 } else { 798 if (optlen != sizeof (struct timeval32)) { 799 error = EINVAL; 800 goto done; 801 } 802 TIMEVAL32_TO_TIMEVAL(&tl, 803 (struct timeval32 *)optval); 804 } 805 opt = &tl; 806 optlen = sizeof (tl); 807 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 808 mutex_enter(&so->so_lock); 809 if (option_name == SO_RCVTIMEO) 810 so->so_rcvtimeo = drv_usectohz(t_usec); 811 else 812 so->so_sndtimeo = drv_usectohz(t_usec); 813 mutex_exit(&so->so_lock); 814 break; 815 } 816 case SO_RCVBUF: 817 /* 818 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 819 * sockfs since the transport might adjust the value 820 * and not return exactly what was set by the 821 * application. 822 */ 823 so->so_xpg_rcvbuf = *(int32_t *)optval; 824 break; 825 } 826 } 827 error = (*so->so_downcalls->sd_setsockopt) 828 (so->so_proto_handle, level, option_name, opt, optlen, cr); 829 done: 830 SO_UNBLOCK_FALLBACK(so); 831 return (error); 832 } 833 834 int 835 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 836 struct cred *cr, int32_t *rvalp) 837 { 838 int error = 0; 839 840 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 841 842 /* 843 * If there is a pending error, return error 844 * This can happen if a non blocking operation caused an error. 845 */ 846 if (so->so_error != 0) { 847 mutex_enter(&so->so_lock); 848 error = sogeterr(so, B_TRUE); 849 mutex_exit(&so->so_lock); 850 if (error != 0) 851 goto done; 852 } 853 854 /* 855 * calling strioc can result in the socket falling back to TPI, 856 * if that is supported. 857 */ 858 if ((so->so_filter_active == 0 || 859 (error = sof_filter_ioctl(so, cmd, arg, mode, 860 rvalp, cr)) < 0) && 861 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 862 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 863 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 864 cmd, arg, mode, rvalp, cr); 865 } 866 867 done: 868 SO_UNBLOCK_FALLBACK(so); 869 870 return (error); 871 } 872 873 int 874 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 875 struct pollhead **phpp) 876 { 877 int state = so->so_state, mask; 878 *reventsp = 0; 879 880 /* 881 * In sockets the errors are represented as input/output events 882 */ 883 if (so->so_error != 0 && 884 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 885 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 886 return (0); 887 } 888 889 /* 890 * If the socket is in a state where it can send data 891 * turn on POLLWRBAND and POLLOUT events. 892 */ 893 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 894 /* 895 * out of band data is allowed even if the connection 896 * is flow controlled 897 */ 898 *reventsp |= POLLWRBAND & events; 899 if (!SO_SND_FLOWCTRLD(so)) { 900 /* 901 * As long as there is buffer to send data 902 * turn on POLLOUT events 903 */ 904 *reventsp |= POLLOUT & events; 905 } 906 } 907 908 /* 909 * Turn on POLLIN whenever there is data on the receive queue, 910 * or the socket is in a state where no more data will be received. 911 * Also, if the socket is accepting connections, flip the bit if 912 * there is something on the queue. 913 * 914 * We do an initial check for events without holding locks. However, 915 * if there are no event available, then we redo the check for POLLIN 916 * events under the lock. 917 */ 918 919 /* Pending connections */ 920 if (!list_is_empty(&so->so_acceptq_list)) 921 *reventsp |= (POLLIN|POLLRDNORM) & events; 922 923 /* 924 * If we're looking for POLLRDHUP, indicate it if we have sent the 925 * last rx signal for the socket. 926 */ 927 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG)) 928 *reventsp |= POLLRDHUP; 929 930 /* Data */ 931 /* so_downcalls is null for sctp */ 932 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 933 *reventsp |= (*so->so_downcalls->sd_poll) 934 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 935 CRED()) & events; 936 ASSERT((*reventsp & ~events) == 0); 937 /* do not recheck events */ 938 events &= ~SO_PROTO_POLLEV; 939 } else { 940 if (SO_HAVE_DATA(so)) 941 *reventsp |= (POLLIN|POLLRDNORM) & events; 942 943 /* Urgent data */ 944 if ((state & SS_OOBPEND) != 0) { 945 *reventsp |= (POLLRDBAND | POLLPRI) & events; 946 } 947 948 /* 949 * If the socket has become disconnected, we set POLLHUP. 950 * Note that if we are in this state, we will have set POLLIN 951 * (SO_HAVE_DATA() is true on a disconnected socket), but not 952 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with 953 * the semantics of POLLHUP, which is defined to be mutually 954 * exclusive with respect to POLLOUT but not POLLIN. We are 955 * therefore setting POLLHUP primarily for the benefit of 956 * those not polling on POLLIN, as they have no other way of 957 * knowing that the socket has been disconnected. 958 */ 959 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG; 960 961 if ((state & (mask | SS_ISCONNECTED)) == mask) 962 *reventsp |= POLLHUP; 963 } 964 965 if ((!*reventsp && !anyyet) || (events & POLLET)) { 966 /* Check for read events again, but this time under lock */ 967 if (events & (POLLIN|POLLRDNORM)) { 968 mutex_enter(&so->so_lock); 969 if (SO_HAVE_DATA(so) || 970 !list_is_empty(&so->so_acceptq_list)) { 971 if (events & POLLET) { 972 so->so_pollev |= SO_POLLEV_IN; 973 *phpp = &so->so_poll_list; 974 } 975 976 mutex_exit(&so->so_lock); 977 *reventsp |= (POLLIN|POLLRDNORM) & events; 978 979 return (0); 980 } else { 981 so->so_pollev |= SO_POLLEV_IN; 982 mutex_exit(&so->so_lock); 983 } 984 } 985 *phpp = &so->so_poll_list; 986 } 987 return (0); 988 } 989 990 /* 991 * Generic Upcalls 992 */ 993 void 994 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 995 cred_t *peer_cred, pid_t peer_cpid) 996 { 997 struct sonode *so = (struct sonode *)sock_handle; 998 999 mutex_enter(&so->so_lock); 1000 ASSERT(so->so_proto_handle != NULL); 1001 1002 if (peer_cred != NULL) { 1003 if (so->so_peercred != NULL) 1004 crfree(so->so_peercred); 1005 crhold(peer_cred); 1006 so->so_peercred = peer_cred; 1007 so->so_cpid = peer_cpid; 1008 } 1009 1010 so->so_proto_connid = id; 1011 soisconnected(so); 1012 /* 1013 * Wake ones who're waiting for conn to become established. 1014 */ 1015 so_notify_connected(so); 1016 } 1017 1018 int 1019 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1020 { 1021 struct sonode *so = (struct sonode *)sock_handle; 1022 boolean_t connect_failed; 1023 1024 mutex_enter(&so->so_lock); 1025 1026 /* 1027 * If we aren't currently connected, then this isn't a disconnect but 1028 * rather a failure to connect. 1029 */ 1030 connect_failed = !(so->so_state & SS_ISCONNECTED); 1031 1032 so->so_proto_connid = id; 1033 soisdisconnected(so, error); 1034 so_notify_disconnected(so, connect_failed, error); 1035 1036 return (0); 1037 } 1038 1039 void 1040 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1041 uintptr_t arg) 1042 { 1043 struct sonode *so = (struct sonode *)sock_handle; 1044 1045 switch (action) { 1046 case SOCK_OPCTL_SHUT_SEND: 1047 mutex_enter(&so->so_lock); 1048 socantsendmore(so); 1049 so_notify_disconnecting(so); 1050 break; 1051 case SOCK_OPCTL_SHUT_RECV: { 1052 mutex_enter(&so->so_lock); 1053 socantrcvmore(so); 1054 so_notify_eof(so); 1055 break; 1056 } 1057 case SOCK_OPCTL_ENAB_ACCEPT: 1058 mutex_enter(&so->so_lock); 1059 so->so_state |= SS_ACCEPTCONN; 1060 so->so_backlog = (unsigned int)arg; 1061 /* 1062 * The protocol can stop generating newconn upcalls when 1063 * the backlog is full, so to make sure the listener does 1064 * not end up with a queue full of deferred connections 1065 * we reduce the backlog by one. Thus the listener will 1066 * start closing deferred connections before the backlog 1067 * is full. 1068 */ 1069 if (so->so_filter_active > 0) 1070 so->so_backlog = MAX(1, so->so_backlog - 1); 1071 mutex_exit(&so->so_lock); 1072 break; 1073 default: 1074 ASSERT(0); 1075 break; 1076 } 1077 } 1078 1079 void 1080 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1081 { 1082 struct sonode *so = (struct sonode *)sock_handle; 1083 1084 if (qfull) { 1085 so_snd_qfull(so); 1086 } else { 1087 so_snd_qnotfull(so); 1088 mutex_enter(&so->so_lock); 1089 /* so_notify_writable drops so_lock */ 1090 so_notify_writable(so); 1091 } 1092 } 1093 1094 sock_upper_handle_t 1095 so_newconn(sock_upper_handle_t parenthandle, 1096 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1097 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1098 { 1099 struct sonode *so = (struct sonode *)parenthandle; 1100 struct sonode *nso; 1101 int error; 1102 1103 ASSERT(proto_handle != NULL); 1104 1105 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1106 (so->so_acceptq_len >= so->so_backlog && 1107 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1108 return (NULL); 1109 } 1110 1111 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1112 &error); 1113 if (nso == NULL) 1114 return (NULL); 1115 1116 if (peer_cred != NULL) { 1117 crhold(peer_cred); 1118 nso->so_peercred = peer_cred; 1119 nso->so_cpid = peer_cpid; 1120 } 1121 nso->so_listener = so; 1122 1123 /* 1124 * The new socket (nso), proto_handle and sock_upcallsp are all 1125 * valid at this point. But as soon as nso is placed in the accept 1126 * queue that can no longer be assumed (since an accept() thread may 1127 * pull it off the queue and close the socket). 1128 */ 1129 *sock_upcallsp = &so_upcalls; 1130 1131 mutex_enter(&so->so_acceptq_lock); 1132 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1133 mutex_exit(&so->so_acceptq_lock); 1134 ASSERT(nso->so_count == 1); 1135 nso->so_count--; 1136 nso->so_listener = NULL; 1137 /* drop proto ref */ 1138 VN_RELE(SOTOV(nso)); 1139 socket_destroy(nso); 1140 return (NULL); 1141 } else { 1142 so->so_acceptq_len++; 1143 if (nso->so_state & SS_FIL_DEFER) { 1144 list_insert_tail(&so->so_acceptq_defer, nso); 1145 mutex_exit(&so->so_acceptq_lock); 1146 } else { 1147 list_insert_tail(&so->so_acceptq_list, nso); 1148 cv_signal(&so->so_acceptq_cv); 1149 mutex_exit(&so->so_acceptq_lock); 1150 mutex_enter(&so->so_lock); 1151 so_notify_newconn(so); 1152 } 1153 1154 return ((sock_upper_handle_t)nso); 1155 } 1156 } 1157 1158 void 1159 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1160 { 1161 struct sonode *so; 1162 1163 so = (struct sonode *)sock_handle; 1164 1165 mutex_enter(&so->so_lock); 1166 1167 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1168 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1169 if (soppp->sopp_flags & SOCKOPT_WROFF) 1170 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1171 if (soppp->sopp_flags & SOCKOPT_TAIL) 1172 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1173 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1174 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1175 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1176 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1177 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1178 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1179 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1180 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1181 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1182 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1183 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1184 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1185 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1186 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1187 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1188 } 1189 1190 if (soppp->sopp_zcopyflag & COPYCACHED) { 1191 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1192 } 1193 } 1194 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1195 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1196 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1197 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1198 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1199 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1200 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1201 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1202 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1203 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1204 1205 mutex_exit(&so->so_lock); 1206 1207 if (so->so_filter_active > 0) { 1208 sof_instance_t *inst; 1209 ssize_t maxblk; 1210 ushort_t wroff, tail; 1211 maxblk = so->so_proto_props.sopp_maxblk; 1212 wroff = so->so_proto_props.sopp_wroff; 1213 tail = so->so_proto_props.sopp_tail; 1214 for (inst = so->so_filter_bottom; inst != NULL; 1215 inst = inst->sofi_prev) { 1216 if (SOF_INTERESTED(inst, mblk_prop)) { 1217 (*inst->sofi_ops->sofop_mblk_prop)( 1218 (sof_handle_t)inst, inst->sofi_cookie, 1219 &maxblk, &wroff, &tail); 1220 } 1221 } 1222 mutex_enter(&so->so_lock); 1223 so->so_proto_props.sopp_maxblk = maxblk; 1224 so->so_proto_props.sopp_wroff = wroff; 1225 so->so_proto_props.sopp_tail = tail; 1226 mutex_exit(&so->so_lock); 1227 } 1228 #ifdef DEBUG 1229 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1230 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1231 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1232 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1233 SOCKOPT_LOOPBACK); 1234 ASSERT(soppp->sopp_flags == 0); 1235 #endif 1236 } 1237 1238 /* ARGSUSED */ 1239 ssize_t 1240 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1241 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1242 sof_instance_t *filter) 1243 { 1244 boolean_t force_push = B_TRUE; 1245 int space_left; 1246 sodirect_t *sodp = so->so_direct; 1247 1248 ASSERT(errorp != NULL); 1249 *errorp = 0; 1250 if (mp == NULL) { 1251 if (so->so_downcalls->sd_recv_uio != NULL) { 1252 mutex_enter(&so->so_lock); 1253 /* the notify functions will drop the lock */ 1254 if (flags & MSG_OOB) 1255 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1256 else 1257 so_notify_data(so, msg_size); 1258 return (0); 1259 } 1260 ASSERT(msg_size == 0); 1261 mutex_enter(&so->so_lock); 1262 goto space_check; 1263 } 1264 1265 ASSERT(mp->b_next == NULL); 1266 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1267 ASSERT(msg_size == msgdsize(mp)); 1268 1269 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1270 /* The read pointer is not aligned correctly for TPI */ 1271 zcmn_err(getzoneid(), CE_WARN, 1272 "sockfs: Unaligned TPI message received. rptr = %p\n", 1273 (void *)mp->b_rptr); 1274 freemsg(mp); 1275 mutex_enter(&so->so_lock); 1276 if (sodp != NULL) 1277 SOD_UIOAFINI(sodp); 1278 goto space_check; 1279 } 1280 1281 if (so->so_filter_active > 0) { 1282 for (; filter != NULL; filter = filter->sofi_prev) { 1283 if (!SOF_INTERESTED(filter, data_in)) 1284 continue; 1285 mp = (*filter->sofi_ops->sofop_data_in)( 1286 (sof_handle_t)filter, filter->sofi_cookie, mp, 1287 flags, &msg_size); 1288 ASSERT(msgdsize(mp) == msg_size); 1289 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1290 (mblk_t *), mp); 1291 /* Data was consumed/dropped, just do space check */ 1292 if (msg_size == 0) { 1293 mutex_enter(&so->so_lock); 1294 goto space_check; 1295 } 1296 } 1297 } 1298 1299 mutex_enter(&so->so_lock); 1300 if (so->so_krecv_cb != NULL) { 1301 boolean_t cont; 1302 so_krecv_f func = so->so_krecv_cb; 1303 void *arg = so->so_krecv_arg; 1304 1305 mutex_exit(&so->so_lock); 1306 cont = func(so, mp, msg_size, flags & MSG_OOB, arg); 1307 mutex_enter(&so->so_lock); 1308 if (cont == B_TRUE) { 1309 space_left = so->so_rcvbuf; 1310 } else { 1311 so->so_rcv_queued = so->so_rcvlowat; 1312 *errorp = ENOSPC; 1313 space_left = -1; 1314 } 1315 goto done_unlock; 1316 } 1317 mutex_exit(&so->so_lock); 1318 1319 if (flags & MSG_OOB) { 1320 so_queue_oob(so, mp, msg_size); 1321 mutex_enter(&so->so_lock); 1322 goto space_check; 1323 } 1324 1325 if (force_pushp != NULL) 1326 force_push = *force_pushp; 1327 1328 mutex_enter(&so->so_lock); 1329 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1330 if (sodp != NULL) 1331 SOD_DISABLE(sodp); 1332 mutex_exit(&so->so_lock); 1333 *errorp = EOPNOTSUPP; 1334 return (-1); 1335 } 1336 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1337 freemsg(mp); 1338 if (sodp != NULL) 1339 SOD_DISABLE(sodp); 1340 mutex_exit(&so->so_lock); 1341 return (0); 1342 } 1343 1344 /* process the mblk via I/OAT if capable */ 1345 if (sodp != NULL && sodp->sod_enabled) { 1346 if (DB_TYPE(mp) == M_DATA) { 1347 sod_uioa_mblk_init(sodp, mp, msg_size); 1348 } else { 1349 SOD_UIOAFINI(sodp); 1350 } 1351 } 1352 1353 if (mp->b_next == NULL) { 1354 so_enqueue_msg(so, mp, msg_size); 1355 } else { 1356 do { 1357 mblk_t *nmp; 1358 1359 if ((nmp = mp->b_next) != NULL) { 1360 mp->b_next = NULL; 1361 } 1362 so_enqueue_msg(so, mp, msgdsize(mp)); 1363 mp = nmp; 1364 } while (mp != NULL); 1365 } 1366 1367 space_left = so->so_rcvbuf - so->so_rcv_queued; 1368 if (space_left <= 0) { 1369 so->so_flowctrld = B_TRUE; 1370 *errorp = ENOSPC; 1371 space_left = -1; 1372 } 1373 1374 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1375 so->so_rcv_queued >= so->so_rcv_wanted) { 1376 SOCKET_TIMER_CANCEL(so); 1377 /* 1378 * so_notify_data will release the lock 1379 */ 1380 so_notify_data(so, so->so_rcv_queued); 1381 1382 if (force_pushp != NULL) 1383 *force_pushp = B_TRUE; 1384 goto done; 1385 } else if (so->so_rcv_timer_tid == 0) { 1386 /* Make sure the recv push timer is running */ 1387 SOCKET_TIMER_START(so); 1388 } 1389 1390 done_unlock: 1391 mutex_exit(&so->so_lock); 1392 done: 1393 return (space_left); 1394 1395 space_check: 1396 space_left = so->so_rcvbuf - so->so_rcv_queued; 1397 if (space_left <= 0) { 1398 so->so_flowctrld = B_TRUE; 1399 *errorp = ENOSPC; 1400 space_left = -1; 1401 } 1402 goto done_unlock; 1403 } 1404 1405 ssize_t 1406 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1407 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1408 { 1409 struct sonode *so = (struct sonode *)sock_handle; 1410 1411 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1412 so->so_filter_bottom)); 1413 } 1414 1415 /* 1416 * Set the offset of where the oob data is relative to the bytes in 1417 * queued. Also generate SIGURG 1418 */ 1419 void 1420 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1421 { 1422 struct sonode *so; 1423 1424 ASSERT(offset >= 0); 1425 so = (struct sonode *)sock_handle; 1426 mutex_enter(&so->so_lock); 1427 if (so->so_direct != NULL) 1428 SOD_UIOAFINI(so->so_direct); 1429 1430 /* 1431 * New urgent data on the way so forget about any old 1432 * urgent data. 1433 */ 1434 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1435 1436 /* 1437 * Record that urgent data is pending. 1438 */ 1439 so->so_state |= SS_OOBPEND; 1440 1441 if (so->so_oobmsg != NULL) { 1442 dprintso(so, 1, ("sock: discarding old oob\n")); 1443 freemsg(so->so_oobmsg); 1444 so->so_oobmsg = NULL; 1445 } 1446 1447 /* 1448 * set the offset where the urgent byte is 1449 */ 1450 so->so_oobmark = so->so_rcv_queued + offset; 1451 if (so->so_oobmark == 0) 1452 so->so_state |= SS_RCVATMARK; 1453 else 1454 so->so_state &= ~SS_RCVATMARK; 1455 1456 so_notify_oobsig(so); 1457 } 1458 1459 /* 1460 * Queue the OOB byte 1461 */ 1462 static void 1463 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1464 { 1465 mutex_enter(&so->so_lock); 1466 if (so->so_direct != NULL) 1467 SOD_UIOAFINI(so->so_direct); 1468 1469 ASSERT(mp != NULL); 1470 if (!IS_SO_OOB_INLINE(so)) { 1471 so->so_oobmsg = mp; 1472 so->so_state |= SS_HAVEOOBDATA; 1473 } else { 1474 so_enqueue_msg(so, mp, len); 1475 } 1476 1477 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1478 } 1479 1480 int 1481 so_close(struct sonode *so, int flag, struct cred *cr) 1482 { 1483 int error; 1484 1485 /* 1486 * No new data will be enqueued once the CLOSING flag is set. 1487 */ 1488 mutex_enter(&so->so_lock); 1489 so->so_state |= SS_CLOSING; 1490 ASSERT(so_verify_oobstate(so)); 1491 so_rcv_flush(so); 1492 mutex_exit(&so->so_lock); 1493 1494 if (so->so_filter_active > 0) 1495 sof_sonode_closing(so); 1496 1497 if (so->so_state & SS_ACCEPTCONN) { 1498 /* 1499 * We grab and release the accept lock to ensure that any 1500 * thread about to insert a socket in so_newconn completes 1501 * before we flush the queue. Any thread calling so_newconn 1502 * after we drop the lock will observe the SS_CLOSING flag, 1503 * which will stop it from inserting the socket in the queue. 1504 */ 1505 mutex_enter(&so->so_acceptq_lock); 1506 mutex_exit(&so->so_acceptq_lock); 1507 1508 so_acceptq_flush(so, B_TRUE); 1509 } 1510 1511 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1512 switch (error) { 1513 default: 1514 /* Protocol made a synchronous close; remove proto ref */ 1515 VN_RELE(SOTOV(so)); 1516 break; 1517 case EINPROGRESS: 1518 /* 1519 * Protocol is in the process of closing, it will make a 1520 * 'closed' upcall to remove the reference. 1521 */ 1522 error = 0; 1523 break; 1524 } 1525 1526 return (error); 1527 } 1528 1529 /* 1530 * Upcall made by the protocol when it's doing an asynchronous close. It 1531 * will drop the protocol's reference on the socket. 1532 */ 1533 void 1534 so_closed(sock_upper_handle_t sock_handle) 1535 { 1536 struct sonode *so = (struct sonode *)sock_handle; 1537 1538 VN_RELE(SOTOV(so)); 1539 } 1540 1541 vnode_t * 1542 so_get_vnode(sock_upper_handle_t sock_handle) 1543 { 1544 sonode_t *so = (sonode_t *)sock_handle; 1545 vnode_t *vn; 1546 1547 vn = SOTOV(so); 1548 VN_HOLD(vn); 1549 1550 return (vn); 1551 } 1552 1553 void 1554 so_zcopy_notify(sock_upper_handle_t sock_handle) 1555 { 1556 struct sonode *so = (struct sonode *)sock_handle; 1557 1558 mutex_enter(&so->so_lock); 1559 so->so_copyflag |= STZCNOTIFY; 1560 cv_broadcast(&so->so_copy_cv); 1561 mutex_exit(&so->so_lock); 1562 } 1563 1564 void 1565 so_set_error(sock_upper_handle_t sock_handle, int error) 1566 { 1567 struct sonode *so = (struct sonode *)sock_handle; 1568 1569 mutex_enter(&so->so_lock); 1570 1571 soseterror(so, error); 1572 1573 so_notify_error(so); 1574 } 1575 1576 /* 1577 * so_recvmsg - read data from the socket 1578 * 1579 * There are two ways of obtaining data; either we ask the protocol to 1580 * copy directly into the supplied buffer, or we copy data from the 1581 * sonode's receive queue. The decision which one to use depends on 1582 * whether the protocol has a sd_recv_uio down call. 1583 */ 1584 int 1585 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1586 struct cred *cr) 1587 { 1588 rval_t rval; 1589 int flags = 0; 1590 t_uscalar_t controllen, namelen; 1591 int error = 0; 1592 int ret; 1593 mblk_t *mctlp = NULL; 1594 union T_primitives *tpr; 1595 void *control; 1596 ssize_t saved_resid; 1597 struct uio *suiop; 1598 1599 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1600 1601 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1602 (so->so_mode & SM_CONNREQUIRED)) { 1603 SO_UNBLOCK_FALLBACK(so); 1604 return (ENOTCONN); 1605 } 1606 1607 mutex_enter(&so->so_lock); 1608 if (so->so_krecv_cb != NULL) { 1609 mutex_exit(&so->so_lock); 1610 return (EOPNOTSUPP); 1611 } 1612 mutex_exit(&so->so_lock); 1613 1614 if (msg->msg_flags & MSG_PEEK) 1615 msg->msg_flags &= ~MSG_WAITALL; 1616 1617 if (so->so_mode & SM_ATOMIC) 1618 msg->msg_flags |= MSG_TRUNC; 1619 1620 if (msg->msg_flags & MSG_OOB) { 1621 if ((so->so_mode & SM_EXDATA) == 0) { 1622 error = EOPNOTSUPP; 1623 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1624 error = (*so->so_downcalls->sd_recv_uio) 1625 (so->so_proto_handle, uiop, msg, cr); 1626 } else { 1627 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1628 IS_SO_OOB_INLINE(so)); 1629 } 1630 SO_UNBLOCK_FALLBACK(so); 1631 return (error); 1632 } 1633 1634 /* 1635 * If the protocol has the recv down call, then pass the request 1636 * down. 1637 */ 1638 if (so->so_downcalls->sd_recv_uio != NULL) { 1639 error = (*so->so_downcalls->sd_recv_uio) 1640 (so->so_proto_handle, uiop, msg, cr); 1641 SO_UNBLOCK_FALLBACK(so); 1642 return (error); 1643 } 1644 1645 /* 1646 * Reading data from the socket buffer 1647 */ 1648 flags = msg->msg_flags; 1649 msg->msg_flags = 0; 1650 1651 /* 1652 * Set msg_controllen and msg_namelen to zero here to make it 1653 * simpler in the cases that no control or name is returned. 1654 */ 1655 controllen = msg->msg_controllen; 1656 namelen = msg->msg_namelen; 1657 msg->msg_controllen = 0; 1658 msg->msg_namelen = 0; 1659 1660 mutex_enter(&so->so_lock); 1661 /* Set SOREADLOCKED */ 1662 error = so_lock_read_intr(so, 1663 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1664 mutex_exit(&so->so_lock); 1665 if (error) { 1666 SO_UNBLOCK_FALLBACK(so); 1667 return (error); 1668 } 1669 1670 suiop = sod_rcv_init(so, flags, &uiop); 1671 retry: 1672 saved_resid = uiop->uio_resid; 1673 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1674 if (error != 0) { 1675 goto out; 1676 } 1677 /* 1678 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1679 * For non-datagrams MOREDATA is used to set MSG_EOR. 1680 */ 1681 ASSERT(!(rval.r_val1 & MORECTL)); 1682 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1683 msg->msg_flags |= MSG_TRUNC; 1684 if (mctlp == NULL) { 1685 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1686 1687 mutex_enter(&so->so_lock); 1688 /* Set MSG_EOR based on MOREDATA */ 1689 if (!(rval.r_val1 & MOREDATA)) { 1690 if (so->so_state & SS_SAVEDEOR) { 1691 msg->msg_flags |= MSG_EOR; 1692 so->so_state &= ~SS_SAVEDEOR; 1693 } 1694 } 1695 /* 1696 * If some data was received (i.e. not EOF) and the 1697 * read/recv* has not been satisfied wait for some more. 1698 */ 1699 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1700 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1701 mutex_exit(&so->so_lock); 1702 flags |= MSG_NOMARK; 1703 goto retry; 1704 } 1705 1706 goto out_locked; 1707 } 1708 /* so_queue_msg has already verified length and alignment */ 1709 tpr = (union T_primitives *)mctlp->b_rptr; 1710 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1711 switch (tpr->type) { 1712 case T_DATA_IND: { 1713 /* 1714 * Set msg_flags to MSG_EOR based on 1715 * MORE_flag and MOREDATA. 1716 */ 1717 mutex_enter(&so->so_lock); 1718 so->so_state &= ~SS_SAVEDEOR; 1719 if (!(tpr->data_ind.MORE_flag & 1)) { 1720 if (!(rval.r_val1 & MOREDATA)) 1721 msg->msg_flags |= MSG_EOR; 1722 else 1723 so->so_state |= SS_SAVEDEOR; 1724 } 1725 freemsg(mctlp); 1726 /* 1727 * If some data was received (i.e. not EOF) and the 1728 * read/recv* has not been satisfied wait for some more. 1729 */ 1730 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1731 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1732 mutex_exit(&so->so_lock); 1733 flags |= MSG_NOMARK; 1734 goto retry; 1735 } 1736 goto out_locked; 1737 } 1738 case T_UNITDATA_IND: { 1739 void *addr; 1740 t_uscalar_t addrlen; 1741 void *abuf; 1742 t_uscalar_t optlen; 1743 void *opt; 1744 1745 if (namelen != 0) { 1746 /* Caller wants source address */ 1747 addrlen = tpr->unitdata_ind.SRC_length; 1748 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1749 addrlen, 1); 1750 if (addr == NULL) { 1751 freemsg(mctlp); 1752 error = EPROTO; 1753 eprintsoline(so, error); 1754 goto out; 1755 } 1756 ASSERT(so->so_family != AF_UNIX); 1757 } 1758 optlen = tpr->unitdata_ind.OPT_length; 1759 if (optlen != 0) { 1760 t_uscalar_t ncontrollen; 1761 1762 /* 1763 * Extract any source address option. 1764 * Determine how large cmsg buffer is needed. 1765 */ 1766 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1767 optlen, __TPI_ALIGN_SIZE); 1768 1769 if (opt == NULL) { 1770 freemsg(mctlp); 1771 error = EPROTO; 1772 eprintsoline(so, error); 1773 goto out; 1774 } 1775 if (so->so_family == AF_UNIX) 1776 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1777 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1778 !(flags & MSG_XPG4_2)); 1779 if (controllen != 0) 1780 controllen = ncontrollen; 1781 else if (ncontrollen != 0) 1782 msg->msg_flags |= MSG_CTRUNC; 1783 } else { 1784 controllen = 0; 1785 } 1786 1787 if (namelen != 0) { 1788 /* 1789 * Return address to caller. 1790 * Caller handles truncation if length 1791 * exceeds msg_namelen. 1792 * NOTE: AF_UNIX NUL termination is ensured by 1793 * the sender's copyin_name(). 1794 */ 1795 abuf = kmem_alloc(addrlen, KM_SLEEP); 1796 1797 bcopy(addr, abuf, addrlen); 1798 msg->msg_name = abuf; 1799 msg->msg_namelen = addrlen; 1800 } 1801 1802 if (controllen != 0) { 1803 /* 1804 * Return control msg to caller. 1805 * Caller handles truncation if length 1806 * exceeds msg_controllen. 1807 */ 1808 control = kmem_zalloc(controllen, KM_SLEEP); 1809 1810 error = so_opt2cmsg(mctlp, opt, optlen, flags, control, 1811 controllen); 1812 if (error) { 1813 freemsg(mctlp); 1814 if (msg->msg_namelen != 0) 1815 kmem_free(msg->msg_name, 1816 msg->msg_namelen); 1817 kmem_free(control, controllen); 1818 eprintsoline(so, error); 1819 goto out; 1820 } 1821 msg->msg_control = control; 1822 msg->msg_controllen = controllen; 1823 } 1824 1825 freemsg(mctlp); 1826 goto out; 1827 } 1828 case T_OPTDATA_IND: { 1829 struct T_optdata_req *tdr; 1830 void *opt; 1831 t_uscalar_t optlen; 1832 1833 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1834 optlen = tdr->OPT_length; 1835 if (optlen != 0) { 1836 t_uscalar_t ncontrollen; 1837 /* 1838 * Determine how large cmsg buffer is needed. 1839 */ 1840 opt = sogetoff(mctlp, 1841 tpr->optdata_ind.OPT_offset, optlen, 1842 __TPI_ALIGN_SIZE); 1843 1844 if (opt == NULL) { 1845 freemsg(mctlp); 1846 error = EPROTO; 1847 eprintsoline(so, error); 1848 goto out; 1849 } 1850 1851 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1852 !(flags & MSG_XPG4_2)); 1853 if (controllen != 0) 1854 controllen = ncontrollen; 1855 else if (ncontrollen != 0) 1856 msg->msg_flags |= MSG_CTRUNC; 1857 } else { 1858 controllen = 0; 1859 } 1860 1861 if (controllen != 0) { 1862 /* 1863 * Return control msg to caller. 1864 * Caller handles truncation if length 1865 * exceeds msg_controllen. 1866 */ 1867 control = kmem_zalloc(controllen, KM_SLEEP); 1868 1869 error = so_opt2cmsg(mctlp, opt, optlen, flags, control, 1870 controllen); 1871 if (error) { 1872 freemsg(mctlp); 1873 kmem_free(control, controllen); 1874 eprintsoline(so, error); 1875 goto out; 1876 } 1877 msg->msg_control = control; 1878 msg->msg_controllen = controllen; 1879 } 1880 1881 /* 1882 * Set msg_flags to MSG_EOR based on 1883 * DATA_flag and MOREDATA. 1884 */ 1885 mutex_enter(&so->so_lock); 1886 so->so_state &= ~SS_SAVEDEOR; 1887 if (!(tpr->data_ind.MORE_flag & 1)) { 1888 if (!(rval.r_val1 & MOREDATA)) 1889 msg->msg_flags |= MSG_EOR; 1890 else 1891 so->so_state |= SS_SAVEDEOR; 1892 } 1893 freemsg(mctlp); 1894 /* 1895 * If some data was received (i.e. not EOF) and the 1896 * read/recv* has not been satisfied wait for some more. 1897 * Not possible to wait if control info was received. 1898 */ 1899 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1900 controllen == 0 && 1901 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1902 mutex_exit(&so->so_lock); 1903 flags |= MSG_NOMARK; 1904 goto retry; 1905 } 1906 goto out_locked; 1907 } 1908 default: 1909 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1910 tpr->type); 1911 freemsg(mctlp); 1912 error = EPROTO; 1913 ASSERT(0); 1914 } 1915 out: 1916 mutex_enter(&so->so_lock); 1917 out_locked: 1918 ret = sod_rcv_done(so, suiop, uiop); 1919 if (ret != 0 && error == 0) 1920 error = ret; 1921 1922 so_unlock_read(so); /* Clear SOREADLOCKED */ 1923 mutex_exit(&so->so_lock); 1924 1925 SO_UNBLOCK_FALLBACK(so); 1926 1927 return (error); 1928 } 1929 1930 sonodeops_t so_sonodeops = { 1931 so_init, /* sop_init */ 1932 so_accept, /* sop_accept */ 1933 so_bind, /* sop_bind */ 1934 so_listen, /* sop_listen */ 1935 so_connect, /* sop_connect */ 1936 so_recvmsg, /* sop_recvmsg */ 1937 so_sendmsg, /* sop_sendmsg */ 1938 so_sendmblk, /* sop_sendmblk */ 1939 so_getpeername, /* sop_getpeername */ 1940 so_getsockname, /* sop_getsockname */ 1941 so_shutdown, /* sop_shutdown */ 1942 so_getsockopt, /* sop_getsockopt */ 1943 so_setsockopt, /* sop_setsockopt */ 1944 so_ioctl, /* sop_ioctl */ 1945 so_poll, /* sop_poll */ 1946 so_close, /* sop_close */ 1947 }; 1948 1949 sock_upcalls_t so_upcalls = { 1950 so_newconn, 1951 so_connected, 1952 so_disconnected, 1953 so_opctl, 1954 so_queue_msg, 1955 so_set_prop, 1956 so_txq_full, 1957 so_signal_oob, 1958 so_zcopy_notify, 1959 so_set_error, 1960 so_closed, 1961 so_get_vnode 1962 }; 1963