1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 28 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/debug.h> 36 #include <sys/cmn_err.h> 37 38 #include <sys/stropts.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 42 #define _SUN_TPI_VERSION 2 43 #include <sys/tihdr.h> 44 #include <sys/sockio.h> 45 #include <sys/kmem_impl.h> 46 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/ddi.h> 50 #include <netinet/in.h> 51 #include <inet/ip.h> 52 53 #include <fs/sockfs/sockcommon.h> 54 #include <fs/sockfs/sockfilter_impl.h> 55 56 #include <sys/socket_proto.h> 57 58 #include <fs/sockfs/socktpi_impl.h> 59 #include <fs/sockfs/sodirect.h> 60 #include <sys/tihdr.h> 61 #include <fs/sockfs/nl7c.h> 62 63 extern int xnet_skip_checks; 64 extern int xnet_check_print; 65 66 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 67 68 69 /*ARGSUSED*/ 70 int 71 so_accept_notsupp(struct sonode *lso, int fflag, 72 struct cred *cr, struct sonode **nsop) 73 { 74 return (EOPNOTSUPP); 75 } 76 77 /*ARGSUSED*/ 78 int 79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 80 { 81 return (EOPNOTSUPP); 82 } 83 84 /*ARGSUSED*/ 85 int 86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 87 socklen_t *len, struct cred *cr) 88 { 89 return (EOPNOTSUPP); 90 } 91 92 /*ARGSUSED*/ 93 int 94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 95 socklen_t *addrlen, boolean_t accept, struct cred *cr) 96 { 97 return (EOPNOTSUPP); 98 } 99 100 /*ARGSUSED*/ 101 int 102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 103 { 104 return (EOPNOTSUPP); 105 } 106 107 /*ARGSUSED*/ 108 int 109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 110 struct cred *cr, mblk_t **mpp) 111 { 112 return (EOPNOTSUPP); 113 } 114 115 /* 116 * Generic Socket Ops 117 */ 118 119 /* ARGSUSED */ 120 int 121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 122 { 123 return (socket_init_common(so, pso, flags, cr)); 124 } 125 126 int 127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 128 int flags, struct cred *cr) 129 { 130 int error; 131 132 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 133 134 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 135 136 /* X/Open requires this check */ 137 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 138 if (xnet_check_print) { 139 printf("sockfs: X/Open bind state check " 140 "caused EINVAL\n"); 141 } 142 error = EINVAL; 143 goto done; 144 } 145 146 /* 147 * a bind to a NULL address is interpreted as unbind. So just 148 * do the downcall. 149 */ 150 if (name == NULL) 151 goto dobind; 152 153 switch (so->so_family) { 154 case AF_INET: 155 if ((size_t)namelen != sizeof (sin_t)) { 156 error = name->sa_family != so->so_family ? 157 EAFNOSUPPORT : EINVAL; 158 eprintsoline(so, error); 159 goto done; 160 } 161 162 if ((flags & _SOBIND_XPG4_2) && 163 (name->sa_family != so->so_family)) { 164 /* 165 * This check has to be made for X/Open 166 * sockets however application failures have 167 * been observed when it is applied to 168 * all sockets. 169 */ 170 error = EAFNOSUPPORT; 171 eprintsoline(so, error); 172 goto done; 173 } 174 /* 175 * Force a zero sa_family to match so_family. 176 * 177 * Some programs like inetd(1M) don't set the 178 * family field. Other programs leave 179 * sin_family set to garbage - SunOS 4.X does 180 * not check the family field on a bind. 181 * We use the family field that 182 * was passed in to the socket() call. 183 */ 184 name->sa_family = so->so_family; 185 break; 186 187 case AF_INET6: { 188 #ifdef DEBUG 189 sin6_t *sin6 = (sin6_t *)name; 190 #endif 191 if ((size_t)namelen != sizeof (sin6_t)) { 192 error = name->sa_family != so->so_family ? 193 EAFNOSUPPORT : EINVAL; 194 eprintsoline(so, error); 195 goto done; 196 } 197 198 if (name->sa_family != so->so_family) { 199 /* 200 * With IPv6 we require the family to match 201 * unlike in IPv4. 202 */ 203 error = EAFNOSUPPORT; 204 eprintsoline(so, error); 205 goto done; 206 } 207 #ifdef DEBUG 208 /* 209 * Verify that apps don't forget to clear 210 * sin6_scope_id etc 211 */ 212 if (sin6->sin6_scope_id != 0 && 213 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 214 zcmn_err(getzoneid(), CE_WARN, 215 "bind with uninitialized sin6_scope_id " 216 "(%d) on socket. Pid = %d\n", 217 (int)sin6->sin6_scope_id, 218 (int)curproc->p_pid); 219 } 220 if (sin6->__sin6_src_id != 0) { 221 zcmn_err(getzoneid(), CE_WARN, 222 "bind with uninitialized __sin6_src_id " 223 "(%d) on socket. Pid = %d\n", 224 (int)sin6->__sin6_src_id, 225 (int)curproc->p_pid); 226 } 227 #endif /* DEBUG */ 228 229 break; 230 } 231 default: 232 /* Just pass the request to the protocol */ 233 goto dobind; 234 } 235 236 /* 237 * First we check if either NCA or KSSL has been enabled for 238 * the requested address, and if so, we fall back to TPI. 239 * If neither of those two services are enabled, then we just 240 * pass the request to the protocol. 241 * 242 * Note that KSSL can only be enabled on a socket if NCA is NOT 243 * enabled for that socket, hence the else-statement below. 244 */ 245 if (nl7c_enabled && ((so->so_family == AF_INET || 246 so->so_family == AF_INET6) && 247 nl7c_lookup_addr(name, namelen) != NULL)) { 248 /* 249 * NL7C is not supported in non-global zones, 250 * we enforce this restriction here. 251 */ 252 if (so->so_zoneid == GLOBAL_ZONEID) { 253 /* NCA should be used, so fall back to TPI */ 254 error = so_tpi_fallback(so, cr); 255 SO_UNBLOCK_FALLBACK(so); 256 if (error) 257 return (error); 258 else 259 return (SOP_BIND(so, name, namelen, flags, cr)); 260 } 261 } 262 263 dobind: 264 if (so->so_filter_active == 0 || 265 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 266 error = (*so->so_downcalls->sd_bind) 267 (so->so_proto_handle, name, namelen, cr); 268 } 269 done: 270 SO_UNBLOCK_FALLBACK(so); 271 272 return (error); 273 } 274 275 int 276 so_listen(struct sonode *so, int backlog, struct cred *cr) 277 { 278 int error = 0; 279 280 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 281 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 282 283 if ((so)->so_filter_active == 0 || 284 (error = sof_filter_listen(so, &backlog, cr)) < 0) 285 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 286 backlog, cr); 287 288 SO_UNBLOCK_FALLBACK(so); 289 290 return (error); 291 } 292 293 294 int 295 so_connect(struct sonode *so, struct sockaddr *name, 296 socklen_t namelen, int fflag, int flags, struct cred *cr) 297 { 298 int error = 0; 299 sock_connid_t id; 300 301 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 302 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 303 304 /* 305 * If there is a pending error, return error 306 * This can happen if a non blocking operation caused an error. 307 */ 308 309 if (so->so_error != 0) { 310 mutex_enter(&so->so_lock); 311 error = sogeterr(so, B_TRUE); 312 mutex_exit(&so->so_lock); 313 if (error != 0) 314 goto done; 315 } 316 317 if (so->so_filter_active == 0 || 318 (error = sof_filter_connect(so, (struct sockaddr *)name, 319 &namelen, cr)) < 0) { 320 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 321 name, namelen, &id, cr); 322 323 if (error == EINPROGRESS) 324 error = so_wait_connected(so, 325 fflag & (FNONBLOCK|FNDELAY), id); 326 } 327 done: 328 SO_UNBLOCK_FALLBACK(so); 329 return (error); 330 } 331 332 /*ARGSUSED*/ 333 int 334 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 335 { 336 int error = 0; 337 struct sonode *nso; 338 339 *nsop = NULL; 340 341 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 342 if ((so->so_state & SS_ACCEPTCONN) == 0) { 343 SO_UNBLOCK_FALLBACK(so); 344 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 345 EOPNOTSUPP : EINVAL); 346 } 347 348 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 349 &nso)) == 0) { 350 ASSERT(nso != NULL); 351 352 /* finish the accept */ 353 if ((so->so_filter_active > 0 && 354 (error = sof_filter_accept(nso, cr)) > 0) || 355 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 356 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 357 (void) socket_close(nso, 0, cr); 358 socket_destroy(nso); 359 } else { 360 *nsop = nso; 361 } 362 } 363 364 SO_UNBLOCK_FALLBACK(so); 365 return (error); 366 } 367 368 int 369 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 370 struct cred *cr) 371 { 372 int error, flags; 373 boolean_t dontblock; 374 ssize_t orig_resid; 375 mblk_t *mp; 376 377 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 378 379 flags = msg->msg_flags; 380 error = 0; 381 dontblock = (flags & MSG_DONTWAIT) || 382 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 383 384 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 385 /* 386 * Old way of passing fd's is not supported 387 */ 388 SO_UNBLOCK_FALLBACK(so); 389 return (EOPNOTSUPP); 390 } 391 392 if ((so->so_mode & SM_ATOMIC) && 393 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 394 so->so_proto_props.sopp_maxpsz != -1) { 395 SO_UNBLOCK_FALLBACK(so); 396 return (EMSGSIZE); 397 } 398 399 /* 400 * For atomic sends we will only do one iteration. 401 */ 402 do { 403 if (so->so_state & SS_CANTSENDMORE) { 404 error = EPIPE; 405 break; 406 } 407 408 if (so->so_error != 0) { 409 mutex_enter(&so->so_lock); 410 error = sogeterr(so, B_TRUE); 411 mutex_exit(&so->so_lock); 412 if (error != 0) 413 break; 414 } 415 416 /* 417 * Send down OOB messages even if the send path is being 418 * flow controlled (assuming the protocol supports OOB data). 419 */ 420 if (flags & MSG_OOB) { 421 if ((so->so_mode & SM_EXDATA) == 0) { 422 error = EOPNOTSUPP; 423 break; 424 } 425 } else if (SO_SND_FLOWCTRLD(so)) { 426 /* 427 * Need to wait until the protocol is ready to receive 428 * more data for transmission. 429 */ 430 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 431 break; 432 } 433 434 /* 435 * Time to send data to the protocol. We either copy the 436 * data into mblks or pass the uio directly to the protocol. 437 * We decide what to do based on the available down calls. 438 */ 439 if (so->so_downcalls->sd_send_uio != NULL) { 440 error = (*so->so_downcalls->sd_send_uio) 441 (so->so_proto_handle, uiop, msg, cr); 442 if (error != 0) 443 break; 444 } else { 445 /* save the resid in case of failure */ 446 orig_resid = uiop->uio_resid; 447 448 if ((mp = socopyinuio(uiop, 449 so->so_proto_props.sopp_maxpsz, 450 so->so_proto_props.sopp_wroff, 451 so->so_proto_props.sopp_maxblk, 452 so->so_proto_props.sopp_tail, &error)) == NULL) { 453 break; 454 } 455 ASSERT(uiop->uio_resid >= 0); 456 457 if (so->so_filter_active > 0 && 458 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 459 &error)) == NULL)) { 460 if (error != 0) 461 break; 462 continue; 463 } 464 error = (*so->so_downcalls->sd_send) 465 (so->so_proto_handle, mp, msg, cr); 466 if (error != 0) { 467 /* 468 * The send failed. We do not have to free the 469 * mblks, because that is the protocol's 470 * responsibility. However, uio_resid must 471 * remain accurate, so adjust that here. 472 */ 473 uiop->uio_resid = orig_resid; 474 break; 475 } 476 } 477 } while (uiop->uio_resid > 0); 478 479 SO_UNBLOCK_FALLBACK(so); 480 481 return (error); 482 } 483 484 int 485 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 486 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 487 boolean_t fil_inject) 488 { 489 int error; 490 boolean_t dontblock; 491 size_t size; 492 mblk_t *mp = *mpp; 493 494 if (so->so_downcalls->sd_send == NULL) 495 return (EOPNOTSUPP); 496 497 error = 0; 498 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 499 (fflag & (FNONBLOCK|FNDELAY)); 500 size = msgdsize(mp); 501 502 if ((so->so_mode & SM_ATOMIC) && 503 size > so->so_proto_props.sopp_maxpsz && 504 so->so_proto_props.sopp_maxpsz != -1) { 505 SO_UNBLOCK_FALLBACK(so); 506 return (EMSGSIZE); 507 } 508 509 while (mp != NULL) { 510 mblk_t *nmp, *last_mblk; 511 size_t mlen; 512 513 if (so->so_state & SS_CANTSENDMORE) { 514 error = EPIPE; 515 break; 516 } 517 if (so->so_error != 0) { 518 mutex_enter(&so->so_lock); 519 error = sogeterr(so, B_TRUE); 520 mutex_exit(&so->so_lock); 521 if (error != 0) 522 break; 523 } 524 /* Socket filters are not flow controlled */ 525 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 526 /* 527 * Need to wait until the protocol is ready to receive 528 * more data for transmission. 529 */ 530 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 531 break; 532 } 533 534 /* 535 * We only allow so_maxpsz of data to be sent down to 536 * the protocol at time. 537 */ 538 mlen = MBLKL(mp); 539 nmp = mp->b_cont; 540 last_mblk = mp; 541 while (nmp != NULL) { 542 mlen += MBLKL(nmp); 543 if (mlen > so->so_proto_props.sopp_maxpsz) { 544 last_mblk->b_cont = NULL; 545 break; 546 } 547 last_mblk = nmp; 548 nmp = nmp->b_cont; 549 } 550 551 if (so->so_filter_active > 0 && 552 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 553 cr, &error)) == NULL) { 554 *mpp = mp = nmp; 555 if (error != 0) 556 break; 557 continue; 558 } 559 error = (*so->so_downcalls->sd_send) 560 (so->so_proto_handle, mp, msg, cr); 561 if (error != 0) { 562 /* 563 * The send failed. The protocol will free the mblks 564 * that were sent down. Let the caller deal with the 565 * rest. 566 */ 567 *mpp = nmp; 568 break; 569 } 570 571 *mpp = mp = nmp; 572 } 573 /* Let the filter know whether the protocol is flow controlled */ 574 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 575 error = ENOSPC; 576 577 return (error); 578 } 579 580 #pragma inline(so_sendmblk_impl) 581 582 int 583 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 584 struct cred *cr, mblk_t **mpp) 585 { 586 int error; 587 588 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 589 590 if ((so->so_mode & SM_SENDFILESUPP) == 0) { 591 SO_UNBLOCK_FALLBACK(so); 592 return (EOPNOTSUPP); 593 } 594 595 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 596 B_FALSE); 597 598 SO_UNBLOCK_FALLBACK(so); 599 600 return (error); 601 } 602 603 int 604 so_shutdown(struct sonode *so, int how, struct cred *cr) 605 { 606 int error; 607 608 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 609 610 /* 611 * SunOS 4.X has no check for datagram sockets. 612 * 5.X checks that it is connected (ENOTCONN) 613 * X/Open requires that we check the connected state. 614 */ 615 if (!(so->so_state & SS_ISCONNECTED)) { 616 if (!xnet_skip_checks) { 617 error = ENOTCONN; 618 if (xnet_check_print) { 619 printf("sockfs: X/Open shutdown check " 620 "caused ENOTCONN\n"); 621 } 622 } 623 goto done; 624 } 625 626 if (so->so_filter_active == 0 || 627 (error = sof_filter_shutdown(so, &how, cr)) < 0) 628 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 629 how, cr)); 630 631 /* 632 * Protocol agreed to shutdown. We need to flush the 633 * receive buffer if the receive side is being shutdown. 634 */ 635 if (error == 0 && how != SHUT_WR) { 636 mutex_enter(&so->so_lock); 637 /* wait for active reader to finish */ 638 (void) so_lock_read(so, 0); 639 640 so_rcv_flush(so); 641 642 so_unlock_read(so); 643 mutex_exit(&so->so_lock); 644 } 645 646 done: 647 SO_UNBLOCK_FALLBACK(so); 648 return (error); 649 } 650 651 int 652 so_getsockname(struct sonode *so, struct sockaddr *addr, 653 socklen_t *addrlen, struct cred *cr) 654 { 655 int error; 656 657 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 658 659 if (so->so_filter_active == 0 || 660 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 661 error = (*so->so_downcalls->sd_getsockname) 662 (so->so_proto_handle, addr, addrlen, cr); 663 664 SO_UNBLOCK_FALLBACK(so); 665 return (error); 666 } 667 668 int 669 so_getpeername(struct sonode *so, struct sockaddr *addr, 670 socklen_t *addrlen, boolean_t accept, struct cred *cr) 671 { 672 int error; 673 674 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 675 676 if (accept) { 677 error = (*so->so_downcalls->sd_getpeername) 678 (so->so_proto_handle, addr, addrlen, cr); 679 } else if (!(so->so_state & SS_ISCONNECTED)) { 680 error = ENOTCONN; 681 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 682 /* Added this check for X/Open */ 683 error = EINVAL; 684 if (xnet_check_print) { 685 printf("sockfs: X/Open getpeername check => EINVAL\n"); 686 } 687 } else if (so->so_filter_active == 0 || 688 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 689 error = (*so->so_downcalls->sd_getpeername) 690 (so->so_proto_handle, addr, addrlen, cr); 691 } 692 693 SO_UNBLOCK_FALLBACK(so); 694 return (error); 695 } 696 697 int 698 so_getsockopt(struct sonode *so, int level, int option_name, 699 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 700 { 701 int error = 0; 702 703 if (level == SOL_FILTER) 704 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 705 706 SO_BLOCK_FALLBACK(so, 707 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 708 709 if ((so->so_filter_active == 0 || 710 (error = sof_filter_getsockopt(so, level, option_name, optval, 711 optlenp, cr)) < 0) && 712 (error = socket_getopt_common(so, level, option_name, optval, 713 optlenp, flags)) < 0) { 714 error = (*so->so_downcalls->sd_getsockopt) 715 (so->so_proto_handle, level, option_name, optval, optlenp, 716 cr); 717 if (error == ENOPROTOOPT) { 718 if (level == SOL_SOCKET) { 719 /* 720 * If a protocol does not support a particular 721 * socket option, set can fail (not allowed) 722 * but get can not fail. This is the previous 723 * sockfs bahvior. 724 */ 725 switch (option_name) { 726 case SO_LINGER: 727 if (*optlenp < (t_uscalar_t) 728 sizeof (struct linger)) { 729 error = EINVAL; 730 break; 731 } 732 error = 0; 733 bzero(optval, sizeof (struct linger)); 734 *optlenp = sizeof (struct linger); 735 break; 736 case SO_RCVTIMEO: 737 case SO_SNDTIMEO: 738 if (*optlenp < (t_uscalar_t) 739 sizeof (struct timeval)) { 740 error = EINVAL; 741 break; 742 } 743 error = 0; 744 bzero(optval, sizeof (struct timeval)); 745 *optlenp = sizeof (struct timeval); 746 break; 747 case SO_SND_BUFINFO: 748 if (*optlenp < (t_uscalar_t) 749 sizeof (struct so_snd_bufinfo)) { 750 error = EINVAL; 751 break; 752 } 753 error = 0; 754 bzero(optval, 755 sizeof (struct so_snd_bufinfo)); 756 *optlenp = 757 sizeof (struct so_snd_bufinfo); 758 break; 759 case SO_DEBUG: 760 case SO_REUSEADDR: 761 case SO_KEEPALIVE: 762 case SO_DONTROUTE: 763 case SO_BROADCAST: 764 case SO_USELOOPBACK: 765 case SO_OOBINLINE: 766 case SO_DGRAM_ERRIND: 767 case SO_SNDBUF: 768 case SO_RCVBUF: 769 error = 0; 770 *((int32_t *)optval) = 0; 771 *optlenp = sizeof (int32_t); 772 break; 773 default: 774 break; 775 } 776 } 777 } 778 } 779 780 SO_UNBLOCK_FALLBACK(so); 781 return (error); 782 } 783 784 int 785 so_setsockopt(struct sonode *so, int level, int option_name, 786 const void *optval, socklen_t optlen, struct cred *cr) 787 { 788 int error = 0; 789 struct timeval tl; 790 const void *opt = optval; 791 792 if (level == SOL_FILTER) 793 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 794 795 SO_BLOCK_FALLBACK(so, 796 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 797 798 /* X/Open requires this check */ 799 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 800 SO_UNBLOCK_FALLBACK(so); 801 if (xnet_check_print) 802 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 803 return (EINVAL); 804 } 805 806 if (so->so_filter_active > 0 && 807 (error = sof_filter_setsockopt(so, level, option_name, 808 (void *)optval, &optlen, cr)) >= 0) 809 goto done; 810 811 if (level == SOL_SOCKET) { 812 switch (option_name) { 813 case SO_RCVTIMEO: 814 case SO_SNDTIMEO: { 815 /* 816 * We pass down these two options to protocol in order 817 * to support some third part protocols which need to 818 * know them. For those protocols which don't care 819 * these two options, simply return 0. 820 */ 821 clock_t t_usec; 822 823 if (get_udatamodel() == DATAMODEL_NONE || 824 get_udatamodel() == DATAMODEL_NATIVE) { 825 if (optlen != sizeof (struct timeval)) { 826 error = EINVAL; 827 goto done; 828 } 829 bcopy((struct timeval *)optval, &tl, 830 sizeof (struct timeval)); 831 } else { 832 if (optlen != sizeof (struct timeval32)) { 833 error = EINVAL; 834 goto done; 835 } 836 TIMEVAL32_TO_TIMEVAL(&tl, 837 (struct timeval32 *)optval); 838 } 839 opt = &tl; 840 optlen = sizeof (tl); 841 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 842 mutex_enter(&so->so_lock); 843 if (option_name == SO_RCVTIMEO) 844 so->so_rcvtimeo = drv_usectohz(t_usec); 845 else 846 so->so_sndtimeo = drv_usectohz(t_usec); 847 mutex_exit(&so->so_lock); 848 break; 849 } 850 case SO_RCVBUF: 851 /* 852 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 853 * sockfs since the transport might adjust the value 854 * and not return exactly what was set by the 855 * application. 856 */ 857 so->so_xpg_rcvbuf = *(int32_t *)optval; 858 break; 859 } 860 } 861 error = (*so->so_downcalls->sd_setsockopt) 862 (so->so_proto_handle, level, option_name, opt, optlen, cr); 863 done: 864 SO_UNBLOCK_FALLBACK(so); 865 return (error); 866 } 867 868 int 869 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 870 struct cred *cr, int32_t *rvalp) 871 { 872 int error = 0; 873 874 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 875 876 /* 877 * If there is a pending error, return error 878 * This can happen if a non blocking operation caused an error. 879 */ 880 if (so->so_error != 0) { 881 mutex_enter(&so->so_lock); 882 error = sogeterr(so, B_TRUE); 883 mutex_exit(&so->so_lock); 884 if (error != 0) 885 goto done; 886 } 887 888 /* 889 * calling strioc can result in the socket falling back to TPI, 890 * if that is supported. 891 */ 892 if ((so->so_filter_active == 0 || 893 (error = sof_filter_ioctl(so, cmd, arg, mode, 894 rvalp, cr)) < 0) && 895 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 896 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 897 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 898 cmd, arg, mode, rvalp, cr); 899 } 900 901 done: 902 SO_UNBLOCK_FALLBACK(so); 903 904 return (error); 905 } 906 907 int 908 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 909 struct pollhead **phpp) 910 { 911 int state = so->so_state, mask; 912 *reventsp = 0; 913 914 /* 915 * In sockets the errors are represented as input/output events 916 */ 917 if (so->so_error != 0 && 918 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 919 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 920 return (0); 921 } 922 923 /* 924 * If the socket is in a state where it can send data 925 * turn on POLLWRBAND and POLLOUT events. 926 */ 927 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 928 /* 929 * out of band data is allowed even if the connection 930 * is flow controlled 931 */ 932 *reventsp |= POLLWRBAND & events; 933 if (!SO_SND_FLOWCTRLD(so)) { 934 /* 935 * As long as there is buffer to send data 936 * turn on POLLOUT events 937 */ 938 *reventsp |= POLLOUT & events; 939 } 940 } 941 942 /* 943 * Turn on POLLIN whenever there is data on the receive queue, 944 * or the socket is in a state where no more data will be received. 945 * Also, if the socket is accepting connections, flip the bit if 946 * there is something on the queue. 947 * 948 * We do an initial check for events without holding locks. However, 949 * if there are no event available, then we redo the check for POLLIN 950 * events under the lock. 951 */ 952 953 /* Pending connections */ 954 if (!list_is_empty(&so->so_acceptq_list)) 955 *reventsp |= (POLLIN|POLLRDNORM) & events; 956 957 /* 958 * If we're looking for POLLRDHUP, indicate it if we have sent the 959 * last rx signal for the socket. 960 */ 961 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG)) 962 *reventsp |= POLLRDHUP; 963 964 /* Data */ 965 /* so_downcalls is null for sctp */ 966 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 967 *reventsp |= (*so->so_downcalls->sd_poll) 968 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 969 CRED()) & events; 970 ASSERT((*reventsp & ~events) == 0); 971 /* do not recheck events */ 972 events &= ~SO_PROTO_POLLEV; 973 } else { 974 if (SO_HAVE_DATA(so)) 975 *reventsp |= (POLLIN|POLLRDNORM) & events; 976 977 /* Urgent data */ 978 if ((state & SS_OOBPEND) != 0) { 979 *reventsp |= (POLLRDBAND | POLLPRI) & events; 980 } 981 982 /* 983 * If the socket has become disconnected, we set POLLHUP. 984 * Note that if we are in this state, we will have set POLLIN 985 * (SO_HAVE_DATA() is true on a disconnected socket), but not 986 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with 987 * the semantics of POLLHUP, which is defined to be mutually 988 * exclusive with respect to POLLOUT but not POLLIN. We are 989 * therefore setting POLLHUP primarily for the benefit of 990 * those not polling on POLLIN, as they have no other way of 991 * knowing that the socket has been disconnected. 992 */ 993 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG; 994 995 if ((state & (mask | SS_ISCONNECTED)) == mask) 996 *reventsp |= POLLHUP; 997 } 998 999 if ((!*reventsp && !anyyet) || (events & POLLET)) { 1000 /* Check for read events again, but this time under lock */ 1001 if (events & (POLLIN|POLLRDNORM)) { 1002 mutex_enter(&so->so_lock); 1003 if (SO_HAVE_DATA(so) || 1004 !list_is_empty(&so->so_acceptq_list)) { 1005 if (events & POLLET) { 1006 so->so_pollev |= SO_POLLEV_IN; 1007 *phpp = &so->so_poll_list; 1008 } 1009 1010 mutex_exit(&so->so_lock); 1011 *reventsp |= (POLLIN|POLLRDNORM) & events; 1012 1013 return (0); 1014 } else { 1015 so->so_pollev |= SO_POLLEV_IN; 1016 mutex_exit(&so->so_lock); 1017 } 1018 } 1019 *phpp = &so->so_poll_list; 1020 } 1021 return (0); 1022 } 1023 1024 /* 1025 * Generic Upcalls 1026 */ 1027 void 1028 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 1029 cred_t *peer_cred, pid_t peer_cpid) 1030 { 1031 struct sonode *so = (struct sonode *)sock_handle; 1032 1033 mutex_enter(&so->so_lock); 1034 ASSERT(so->so_proto_handle != NULL); 1035 1036 if (peer_cred != NULL) { 1037 if (so->so_peercred != NULL) 1038 crfree(so->so_peercred); 1039 crhold(peer_cred); 1040 so->so_peercred = peer_cred; 1041 so->so_cpid = peer_cpid; 1042 } 1043 1044 so->so_proto_connid = id; 1045 soisconnected(so); 1046 /* 1047 * Wake ones who're waiting for conn to become established. 1048 */ 1049 so_notify_connected(so); 1050 } 1051 1052 int 1053 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1054 { 1055 struct sonode *so = (struct sonode *)sock_handle; 1056 boolean_t connect_failed; 1057 1058 mutex_enter(&so->so_lock); 1059 1060 /* 1061 * If we aren't currently connected, then this isn't a disconnect but 1062 * rather a failure to connect. 1063 */ 1064 connect_failed = !(so->so_state & SS_ISCONNECTED); 1065 1066 so->so_proto_connid = id; 1067 soisdisconnected(so, error); 1068 so_notify_disconnected(so, connect_failed, error); 1069 1070 return (0); 1071 } 1072 1073 void 1074 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1075 uintptr_t arg) 1076 { 1077 struct sonode *so = (struct sonode *)sock_handle; 1078 1079 switch (action) { 1080 case SOCK_OPCTL_SHUT_SEND: 1081 mutex_enter(&so->so_lock); 1082 socantsendmore(so); 1083 so_notify_disconnecting(so); 1084 break; 1085 case SOCK_OPCTL_SHUT_RECV: { 1086 mutex_enter(&so->so_lock); 1087 socantrcvmore(so); 1088 so_notify_eof(so); 1089 break; 1090 } 1091 case SOCK_OPCTL_ENAB_ACCEPT: 1092 mutex_enter(&so->so_lock); 1093 so->so_state |= SS_ACCEPTCONN; 1094 so->so_backlog = (unsigned int)arg; 1095 /* 1096 * The protocol can stop generating newconn upcalls when 1097 * the backlog is full, so to make sure the listener does 1098 * not end up with a queue full of deferred connections 1099 * we reduce the backlog by one. Thus the listener will 1100 * start closing deferred connections before the backlog 1101 * is full. 1102 */ 1103 if (so->so_filter_active > 0) 1104 so->so_backlog = MAX(1, so->so_backlog - 1); 1105 mutex_exit(&so->so_lock); 1106 break; 1107 default: 1108 ASSERT(0); 1109 break; 1110 } 1111 } 1112 1113 void 1114 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1115 { 1116 struct sonode *so = (struct sonode *)sock_handle; 1117 1118 if (qfull) { 1119 so_snd_qfull(so); 1120 } else { 1121 so_snd_qnotfull(so); 1122 mutex_enter(&so->so_lock); 1123 /* so_notify_writable drops so_lock */ 1124 so_notify_writable(so); 1125 } 1126 } 1127 1128 sock_upper_handle_t 1129 so_newconn(sock_upper_handle_t parenthandle, 1130 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1131 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1132 { 1133 struct sonode *so = (struct sonode *)parenthandle; 1134 struct sonode *nso; 1135 int error; 1136 1137 ASSERT(proto_handle != NULL); 1138 1139 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1140 (so->so_acceptq_len >= so->so_backlog && 1141 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1142 return (NULL); 1143 } 1144 1145 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1146 &error); 1147 if (nso == NULL) 1148 return (NULL); 1149 1150 if (peer_cred != NULL) { 1151 crhold(peer_cred); 1152 nso->so_peercred = peer_cred; 1153 nso->so_cpid = peer_cpid; 1154 } 1155 nso->so_listener = so; 1156 1157 /* 1158 * The new socket (nso), proto_handle and sock_upcallsp are all 1159 * valid at this point. But as soon as nso is placed in the accept 1160 * queue that can no longer be assumed (since an accept() thread may 1161 * pull it off the queue and close the socket). 1162 */ 1163 *sock_upcallsp = &so_upcalls; 1164 1165 mutex_enter(&so->so_acceptq_lock); 1166 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1167 mutex_exit(&so->so_acceptq_lock); 1168 ASSERT(nso->so_count == 1); 1169 nso->so_count--; 1170 nso->so_listener = NULL; 1171 /* drop proto ref */ 1172 VN_RELE(SOTOV(nso)); 1173 socket_destroy(nso); 1174 return (NULL); 1175 } else { 1176 so->so_acceptq_len++; 1177 if (nso->so_state & SS_FIL_DEFER) { 1178 list_insert_tail(&so->so_acceptq_defer, nso); 1179 mutex_exit(&so->so_acceptq_lock); 1180 } else { 1181 list_insert_tail(&so->so_acceptq_list, nso); 1182 cv_signal(&so->so_acceptq_cv); 1183 mutex_exit(&so->so_acceptq_lock); 1184 mutex_enter(&so->so_lock); 1185 so_notify_newconn(so); 1186 } 1187 1188 return ((sock_upper_handle_t)nso); 1189 } 1190 } 1191 1192 void 1193 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1194 { 1195 struct sonode *so; 1196 1197 so = (struct sonode *)sock_handle; 1198 1199 mutex_enter(&so->so_lock); 1200 1201 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1202 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1203 if (soppp->sopp_flags & SOCKOPT_WROFF) 1204 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1205 if (soppp->sopp_flags & SOCKOPT_TAIL) 1206 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1207 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1208 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1209 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1210 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1211 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1212 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1213 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1214 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1215 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1216 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1217 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1218 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1219 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1220 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1221 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1222 } 1223 1224 if (soppp->sopp_zcopyflag & COPYCACHED) { 1225 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1226 } 1227 } 1228 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1229 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1230 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1231 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1232 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1233 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1234 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1235 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1236 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1237 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1238 1239 mutex_exit(&so->so_lock); 1240 1241 if (so->so_filter_active > 0) { 1242 sof_instance_t *inst; 1243 ssize_t maxblk; 1244 ushort_t wroff, tail; 1245 maxblk = so->so_proto_props.sopp_maxblk; 1246 wroff = so->so_proto_props.sopp_wroff; 1247 tail = so->so_proto_props.sopp_tail; 1248 for (inst = so->so_filter_bottom; inst != NULL; 1249 inst = inst->sofi_prev) { 1250 if (SOF_INTERESTED(inst, mblk_prop)) { 1251 (*inst->sofi_ops->sofop_mblk_prop)( 1252 (sof_handle_t)inst, inst->sofi_cookie, 1253 &maxblk, &wroff, &tail); 1254 } 1255 } 1256 mutex_enter(&so->so_lock); 1257 so->so_proto_props.sopp_maxblk = maxblk; 1258 so->so_proto_props.sopp_wroff = wroff; 1259 so->so_proto_props.sopp_tail = tail; 1260 mutex_exit(&so->so_lock); 1261 } 1262 #ifdef DEBUG 1263 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1264 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1265 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1266 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1267 SOCKOPT_LOOPBACK); 1268 ASSERT(soppp->sopp_flags == 0); 1269 #endif 1270 } 1271 1272 /* ARGSUSED */ 1273 ssize_t 1274 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1275 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1276 sof_instance_t *filter) 1277 { 1278 boolean_t force_push = B_TRUE; 1279 int space_left; 1280 sodirect_t *sodp = so->so_direct; 1281 1282 ASSERT(errorp != NULL); 1283 *errorp = 0; 1284 if (mp == NULL) { 1285 if (so->so_downcalls->sd_recv_uio != NULL) { 1286 mutex_enter(&so->so_lock); 1287 /* the notify functions will drop the lock */ 1288 if (flags & MSG_OOB) 1289 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1290 else 1291 so_notify_data(so, msg_size); 1292 return (0); 1293 } 1294 ASSERT(msg_size == 0); 1295 mutex_enter(&so->so_lock); 1296 goto space_check; 1297 } 1298 1299 ASSERT(mp->b_next == NULL); 1300 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1301 ASSERT(msg_size == msgdsize(mp)); 1302 1303 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1304 /* The read pointer is not aligned correctly for TPI */ 1305 zcmn_err(getzoneid(), CE_WARN, 1306 "sockfs: Unaligned TPI message received. rptr = %p\n", 1307 (void *)mp->b_rptr); 1308 freemsg(mp); 1309 mutex_enter(&so->so_lock); 1310 if (sodp != NULL) 1311 SOD_UIOAFINI(sodp); 1312 goto space_check; 1313 } 1314 1315 if (so->so_filter_active > 0) { 1316 for (; filter != NULL; filter = filter->sofi_prev) { 1317 if (!SOF_INTERESTED(filter, data_in)) 1318 continue; 1319 mp = (*filter->sofi_ops->sofop_data_in)( 1320 (sof_handle_t)filter, filter->sofi_cookie, mp, 1321 flags, &msg_size); 1322 ASSERT(msgdsize(mp) == msg_size); 1323 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1324 (mblk_t *), mp); 1325 /* Data was consumed/dropped, just do space check */ 1326 if (msg_size == 0) { 1327 mutex_enter(&so->so_lock); 1328 goto space_check; 1329 } 1330 } 1331 } 1332 1333 if (flags & MSG_OOB) { 1334 so_queue_oob(so, mp, msg_size); 1335 mutex_enter(&so->so_lock); 1336 goto space_check; 1337 } 1338 1339 if (force_pushp != NULL) 1340 force_push = *force_pushp; 1341 1342 mutex_enter(&so->so_lock); 1343 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1344 if (sodp != NULL) 1345 SOD_DISABLE(sodp); 1346 mutex_exit(&so->so_lock); 1347 *errorp = EOPNOTSUPP; 1348 return (-1); 1349 } 1350 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1351 freemsg(mp); 1352 if (sodp != NULL) 1353 SOD_DISABLE(sodp); 1354 mutex_exit(&so->so_lock); 1355 return (0); 1356 } 1357 1358 /* process the mblk via I/OAT if capable */ 1359 if (sodp != NULL && sodp->sod_enabled) { 1360 if (DB_TYPE(mp) == M_DATA) { 1361 sod_uioa_mblk_init(sodp, mp, msg_size); 1362 } else { 1363 SOD_UIOAFINI(sodp); 1364 } 1365 } 1366 1367 if (mp->b_next == NULL) { 1368 so_enqueue_msg(so, mp, msg_size); 1369 } else { 1370 do { 1371 mblk_t *nmp; 1372 1373 if ((nmp = mp->b_next) != NULL) { 1374 mp->b_next = NULL; 1375 } 1376 so_enqueue_msg(so, mp, msgdsize(mp)); 1377 mp = nmp; 1378 } while (mp != NULL); 1379 } 1380 1381 space_left = so->so_rcvbuf - so->so_rcv_queued; 1382 if (space_left <= 0) { 1383 so->so_flowctrld = B_TRUE; 1384 *errorp = ENOSPC; 1385 space_left = -1; 1386 } 1387 1388 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1389 so->so_rcv_queued >= so->so_rcv_wanted) { 1390 SOCKET_TIMER_CANCEL(so); 1391 /* 1392 * so_notify_data will release the lock 1393 */ 1394 so_notify_data(so, so->so_rcv_queued); 1395 1396 if (force_pushp != NULL) 1397 *force_pushp = B_TRUE; 1398 goto done; 1399 } else if (so->so_rcv_timer_tid == 0) { 1400 /* Make sure the recv push timer is running */ 1401 SOCKET_TIMER_START(so); 1402 } 1403 1404 done_unlock: 1405 mutex_exit(&so->so_lock); 1406 done: 1407 return (space_left); 1408 1409 space_check: 1410 space_left = so->so_rcvbuf - so->so_rcv_queued; 1411 if (space_left <= 0) { 1412 so->so_flowctrld = B_TRUE; 1413 *errorp = ENOSPC; 1414 space_left = -1; 1415 } 1416 goto done_unlock; 1417 } 1418 1419 #pragma inline(so_queue_msg_impl) 1420 1421 ssize_t 1422 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1423 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1424 { 1425 struct sonode *so = (struct sonode *)sock_handle; 1426 1427 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1428 so->so_filter_bottom)); 1429 } 1430 1431 /* 1432 * Set the offset of where the oob data is relative to the bytes in 1433 * queued. Also generate SIGURG 1434 */ 1435 void 1436 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1437 { 1438 struct sonode *so; 1439 1440 ASSERT(offset >= 0); 1441 so = (struct sonode *)sock_handle; 1442 mutex_enter(&so->so_lock); 1443 if (so->so_direct != NULL) 1444 SOD_UIOAFINI(so->so_direct); 1445 1446 /* 1447 * New urgent data on the way so forget about any old 1448 * urgent data. 1449 */ 1450 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1451 1452 /* 1453 * Record that urgent data is pending. 1454 */ 1455 so->so_state |= SS_OOBPEND; 1456 1457 if (so->so_oobmsg != NULL) { 1458 dprintso(so, 1, ("sock: discarding old oob\n")); 1459 freemsg(so->so_oobmsg); 1460 so->so_oobmsg = NULL; 1461 } 1462 1463 /* 1464 * set the offset where the urgent byte is 1465 */ 1466 so->so_oobmark = so->so_rcv_queued + offset; 1467 if (so->so_oobmark == 0) 1468 so->so_state |= SS_RCVATMARK; 1469 else 1470 so->so_state &= ~SS_RCVATMARK; 1471 1472 so_notify_oobsig(so); 1473 } 1474 1475 /* 1476 * Queue the OOB byte 1477 */ 1478 static void 1479 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1480 { 1481 mutex_enter(&so->so_lock); 1482 if (so->so_direct != NULL) 1483 SOD_UIOAFINI(so->so_direct); 1484 1485 ASSERT(mp != NULL); 1486 if (!IS_SO_OOB_INLINE(so)) { 1487 so->so_oobmsg = mp; 1488 so->so_state |= SS_HAVEOOBDATA; 1489 } else { 1490 so_enqueue_msg(so, mp, len); 1491 } 1492 1493 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1494 } 1495 1496 int 1497 so_close(struct sonode *so, int flag, struct cred *cr) 1498 { 1499 int error; 1500 1501 /* 1502 * No new data will be enqueued once the CLOSING flag is set. 1503 */ 1504 mutex_enter(&so->so_lock); 1505 so->so_state |= SS_CLOSING; 1506 ASSERT(so_verify_oobstate(so)); 1507 so_rcv_flush(so); 1508 mutex_exit(&so->so_lock); 1509 1510 if (so->so_filter_active > 0) 1511 sof_sonode_closing(so); 1512 1513 if (so->so_state & SS_ACCEPTCONN) { 1514 /* 1515 * We grab and release the accept lock to ensure that any 1516 * thread about to insert a socket in so_newconn completes 1517 * before we flush the queue. Any thread calling so_newconn 1518 * after we drop the lock will observe the SS_CLOSING flag, 1519 * which will stop it from inserting the socket in the queue. 1520 */ 1521 mutex_enter(&so->so_acceptq_lock); 1522 mutex_exit(&so->so_acceptq_lock); 1523 1524 so_acceptq_flush(so, B_TRUE); 1525 } 1526 1527 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1528 switch (error) { 1529 default: 1530 /* Protocol made a synchronous close; remove proto ref */ 1531 VN_RELE(SOTOV(so)); 1532 break; 1533 case EINPROGRESS: 1534 /* 1535 * Protocol is in the process of closing, it will make a 1536 * 'closed' upcall to remove the reference. 1537 */ 1538 error = 0; 1539 break; 1540 } 1541 1542 return (error); 1543 } 1544 1545 /* 1546 * Upcall made by the protocol when it's doing an asynchronous close. It 1547 * will drop the protocol's reference on the socket. 1548 */ 1549 void 1550 so_closed(sock_upper_handle_t sock_handle) 1551 { 1552 struct sonode *so = (struct sonode *)sock_handle; 1553 1554 VN_RELE(SOTOV(so)); 1555 } 1556 1557 vnode_t * 1558 so_get_vnode(sock_upper_handle_t sock_handle) 1559 { 1560 sonode_t *so = (sonode_t *)sock_handle; 1561 vnode_t *vn; 1562 1563 vn = SOTOV(so); 1564 VN_HOLD(vn); 1565 1566 return (vn); 1567 } 1568 1569 void 1570 so_zcopy_notify(sock_upper_handle_t sock_handle) 1571 { 1572 struct sonode *so = (struct sonode *)sock_handle; 1573 1574 mutex_enter(&so->so_lock); 1575 so->so_copyflag |= STZCNOTIFY; 1576 cv_broadcast(&so->so_copy_cv); 1577 mutex_exit(&so->so_lock); 1578 } 1579 1580 void 1581 so_set_error(sock_upper_handle_t sock_handle, int error) 1582 { 1583 struct sonode *so = (struct sonode *)sock_handle; 1584 1585 mutex_enter(&so->so_lock); 1586 1587 soseterror(so, error); 1588 1589 so_notify_error(so); 1590 } 1591 1592 /* 1593 * so_recvmsg - read data from the socket 1594 * 1595 * There are two ways of obtaining data; either we ask the protocol to 1596 * copy directly into the supplied buffer, or we copy data from the 1597 * sonode's receive queue. The decision which one to use depends on 1598 * whether the protocol has a sd_recv_uio down call. 1599 */ 1600 int 1601 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1602 struct cred *cr) 1603 { 1604 rval_t rval; 1605 int flags = 0; 1606 t_uscalar_t controllen, namelen; 1607 int error = 0; 1608 int ret; 1609 mblk_t *mctlp = NULL; 1610 union T_primitives *tpr; 1611 void *control; 1612 ssize_t saved_resid; 1613 struct uio *suiop; 1614 1615 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1616 1617 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1618 (so->so_mode & SM_CONNREQUIRED)) { 1619 SO_UNBLOCK_FALLBACK(so); 1620 return (ENOTCONN); 1621 } 1622 1623 if (msg->msg_flags & MSG_PEEK) 1624 msg->msg_flags &= ~MSG_WAITALL; 1625 1626 if (so->so_mode & SM_ATOMIC) 1627 msg->msg_flags |= MSG_TRUNC; 1628 1629 if (msg->msg_flags & MSG_OOB) { 1630 if ((so->so_mode & SM_EXDATA) == 0) { 1631 error = EOPNOTSUPP; 1632 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1633 error = (*so->so_downcalls->sd_recv_uio) 1634 (so->so_proto_handle, uiop, msg, cr); 1635 } else { 1636 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1637 IS_SO_OOB_INLINE(so)); 1638 } 1639 SO_UNBLOCK_FALLBACK(so); 1640 return (error); 1641 } 1642 1643 /* 1644 * If the protocol has the recv down call, then pass the request 1645 * down. 1646 */ 1647 if (so->so_downcalls->sd_recv_uio != NULL) { 1648 error = (*so->so_downcalls->sd_recv_uio) 1649 (so->so_proto_handle, uiop, msg, cr); 1650 SO_UNBLOCK_FALLBACK(so); 1651 return (error); 1652 } 1653 1654 /* 1655 * Reading data from the socket buffer 1656 */ 1657 flags = msg->msg_flags; 1658 msg->msg_flags = 0; 1659 1660 /* 1661 * Set msg_controllen and msg_namelen to zero here to make it 1662 * simpler in the cases that no control or name is returned. 1663 */ 1664 controllen = msg->msg_controllen; 1665 namelen = msg->msg_namelen; 1666 msg->msg_controllen = 0; 1667 msg->msg_namelen = 0; 1668 1669 mutex_enter(&so->so_lock); 1670 /* Set SOREADLOCKED */ 1671 error = so_lock_read_intr(so, 1672 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1673 mutex_exit(&so->so_lock); 1674 if (error) { 1675 SO_UNBLOCK_FALLBACK(so); 1676 return (error); 1677 } 1678 1679 suiop = sod_rcv_init(so, flags, &uiop); 1680 retry: 1681 saved_resid = uiop->uio_resid; 1682 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1683 if (error != 0) { 1684 goto out; 1685 } 1686 /* 1687 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1688 * For non-datagrams MOREDATA is used to set MSG_EOR. 1689 */ 1690 ASSERT(!(rval.r_val1 & MORECTL)); 1691 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1692 msg->msg_flags |= MSG_TRUNC; 1693 if (mctlp == NULL) { 1694 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1695 1696 mutex_enter(&so->so_lock); 1697 /* Set MSG_EOR based on MOREDATA */ 1698 if (!(rval.r_val1 & MOREDATA)) { 1699 if (so->so_state & SS_SAVEDEOR) { 1700 msg->msg_flags |= MSG_EOR; 1701 so->so_state &= ~SS_SAVEDEOR; 1702 } 1703 } 1704 /* 1705 * If some data was received (i.e. not EOF) and the 1706 * read/recv* has not been satisfied wait for some more. 1707 */ 1708 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1709 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1710 mutex_exit(&so->so_lock); 1711 flags |= MSG_NOMARK; 1712 goto retry; 1713 } 1714 1715 goto out_locked; 1716 } 1717 /* so_queue_msg has already verified length and alignment */ 1718 tpr = (union T_primitives *)mctlp->b_rptr; 1719 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1720 switch (tpr->type) { 1721 case T_DATA_IND: { 1722 /* 1723 * Set msg_flags to MSG_EOR based on 1724 * MORE_flag and MOREDATA. 1725 */ 1726 mutex_enter(&so->so_lock); 1727 so->so_state &= ~SS_SAVEDEOR; 1728 if (!(tpr->data_ind.MORE_flag & 1)) { 1729 if (!(rval.r_val1 & MOREDATA)) 1730 msg->msg_flags |= MSG_EOR; 1731 else 1732 so->so_state |= SS_SAVEDEOR; 1733 } 1734 freemsg(mctlp); 1735 /* 1736 * If some data was received (i.e. not EOF) and the 1737 * read/recv* has not been satisfied wait for some more. 1738 */ 1739 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1740 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1741 mutex_exit(&so->so_lock); 1742 flags |= MSG_NOMARK; 1743 goto retry; 1744 } 1745 goto out_locked; 1746 } 1747 case T_UNITDATA_IND: { 1748 void *addr; 1749 t_uscalar_t addrlen; 1750 void *abuf; 1751 t_uscalar_t optlen; 1752 void *opt; 1753 1754 if (namelen != 0) { 1755 /* Caller wants source address */ 1756 addrlen = tpr->unitdata_ind.SRC_length; 1757 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1758 addrlen, 1); 1759 if (addr == NULL) { 1760 freemsg(mctlp); 1761 error = EPROTO; 1762 eprintsoline(so, error); 1763 goto out; 1764 } 1765 ASSERT(so->so_family != AF_UNIX); 1766 } 1767 optlen = tpr->unitdata_ind.OPT_length; 1768 if (optlen != 0) { 1769 t_uscalar_t ncontrollen; 1770 1771 /* 1772 * Extract any source address option. 1773 * Determine how large cmsg buffer is needed. 1774 */ 1775 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1776 optlen, __TPI_ALIGN_SIZE); 1777 1778 if (opt == NULL) { 1779 freemsg(mctlp); 1780 error = EPROTO; 1781 eprintsoline(so, error); 1782 goto out; 1783 } 1784 if (so->so_family == AF_UNIX) 1785 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1786 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1787 !(flags & MSG_XPG4_2)); 1788 if (controllen != 0) 1789 controllen = ncontrollen; 1790 else if (ncontrollen != 0) 1791 msg->msg_flags |= MSG_CTRUNC; 1792 } else { 1793 controllen = 0; 1794 } 1795 1796 if (namelen != 0) { 1797 /* 1798 * Return address to caller. 1799 * Caller handles truncation if length 1800 * exceeds msg_namelen. 1801 * NOTE: AF_UNIX NUL termination is ensured by 1802 * the sender's copyin_name(). 1803 */ 1804 abuf = kmem_alloc(addrlen, KM_SLEEP); 1805 1806 bcopy(addr, abuf, addrlen); 1807 msg->msg_name = abuf; 1808 msg->msg_namelen = addrlen; 1809 } 1810 1811 if (controllen != 0) { 1812 /* 1813 * Return control msg to caller. 1814 * Caller handles truncation if length 1815 * exceeds msg_controllen. 1816 */ 1817 control = kmem_zalloc(controllen, KM_SLEEP); 1818 1819 error = so_opt2cmsg(mctlp, opt, optlen, 1820 !(flags & MSG_XPG4_2), control, controllen); 1821 if (error) { 1822 freemsg(mctlp); 1823 if (msg->msg_namelen != 0) 1824 kmem_free(msg->msg_name, 1825 msg->msg_namelen); 1826 kmem_free(control, controllen); 1827 eprintsoline(so, error); 1828 goto out; 1829 } 1830 msg->msg_control = control; 1831 msg->msg_controllen = controllen; 1832 } 1833 1834 freemsg(mctlp); 1835 goto out; 1836 } 1837 case T_OPTDATA_IND: { 1838 struct T_optdata_req *tdr; 1839 void *opt; 1840 t_uscalar_t optlen; 1841 1842 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1843 optlen = tdr->OPT_length; 1844 if (optlen != 0) { 1845 t_uscalar_t ncontrollen; 1846 /* 1847 * Determine how large cmsg buffer is needed. 1848 */ 1849 opt = sogetoff(mctlp, 1850 tpr->optdata_ind.OPT_offset, optlen, 1851 __TPI_ALIGN_SIZE); 1852 1853 if (opt == NULL) { 1854 freemsg(mctlp); 1855 error = EPROTO; 1856 eprintsoline(so, error); 1857 goto out; 1858 } 1859 1860 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1861 !(flags & MSG_XPG4_2)); 1862 if (controllen != 0) 1863 controllen = ncontrollen; 1864 else if (ncontrollen != 0) 1865 msg->msg_flags |= MSG_CTRUNC; 1866 } else { 1867 controllen = 0; 1868 } 1869 1870 if (controllen != 0) { 1871 /* 1872 * Return control msg to caller. 1873 * Caller handles truncation if length 1874 * exceeds msg_controllen. 1875 */ 1876 control = kmem_zalloc(controllen, KM_SLEEP); 1877 1878 error = so_opt2cmsg(mctlp, opt, optlen, 1879 !(flags & MSG_XPG4_2), control, controllen); 1880 if (error) { 1881 freemsg(mctlp); 1882 kmem_free(control, controllen); 1883 eprintsoline(so, error); 1884 goto out; 1885 } 1886 msg->msg_control = control; 1887 msg->msg_controllen = controllen; 1888 } 1889 1890 /* 1891 * Set msg_flags to MSG_EOR based on 1892 * DATA_flag and MOREDATA. 1893 */ 1894 mutex_enter(&so->so_lock); 1895 so->so_state &= ~SS_SAVEDEOR; 1896 if (!(tpr->data_ind.MORE_flag & 1)) { 1897 if (!(rval.r_val1 & MOREDATA)) 1898 msg->msg_flags |= MSG_EOR; 1899 else 1900 so->so_state |= SS_SAVEDEOR; 1901 } 1902 freemsg(mctlp); 1903 /* 1904 * If some data was received (i.e. not EOF) and the 1905 * read/recv* has not been satisfied wait for some more. 1906 * Not possible to wait if control info was received. 1907 */ 1908 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1909 controllen == 0 && 1910 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1911 mutex_exit(&so->so_lock); 1912 flags |= MSG_NOMARK; 1913 goto retry; 1914 } 1915 goto out_locked; 1916 } 1917 default: 1918 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1919 tpr->type); 1920 freemsg(mctlp); 1921 error = EPROTO; 1922 ASSERT(0); 1923 } 1924 out: 1925 mutex_enter(&so->so_lock); 1926 out_locked: 1927 ret = sod_rcv_done(so, suiop, uiop); 1928 if (ret != 0 && error == 0) 1929 error = ret; 1930 1931 so_unlock_read(so); /* Clear SOREADLOCKED */ 1932 mutex_exit(&so->so_lock); 1933 1934 SO_UNBLOCK_FALLBACK(so); 1935 1936 return (error); 1937 } 1938 1939 sonodeops_t so_sonodeops = { 1940 so_init, /* sop_init */ 1941 so_accept, /* sop_accept */ 1942 so_bind, /* sop_bind */ 1943 so_listen, /* sop_listen */ 1944 so_connect, /* sop_connect */ 1945 so_recvmsg, /* sop_recvmsg */ 1946 so_sendmsg, /* sop_sendmsg */ 1947 so_sendmblk, /* sop_sendmblk */ 1948 so_getpeername, /* sop_getpeername */ 1949 so_getsockname, /* sop_getsockname */ 1950 so_shutdown, /* sop_shutdown */ 1951 so_getsockopt, /* sop_getsockopt */ 1952 so_setsockopt, /* sop_setsockopt */ 1953 so_ioctl, /* sop_ioctl */ 1954 so_poll, /* sop_poll */ 1955 so_close, /* sop_close */ 1956 }; 1957 1958 sock_upcalls_t so_upcalls = { 1959 so_newconn, 1960 so_connected, 1961 so_disconnected, 1962 so_opctl, 1963 so_queue_msg, 1964 so_set_prop, 1965 so_txq_full, 1966 so_signal_oob, 1967 so_zcopy_notify, 1968 so_set_error, 1969 so_closed, 1970 so_get_vnode 1971 }; 1972