1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2015, Joyent, Inc. All rights reserved. 28 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/debug.h> 36 #include <sys/cmn_err.h> 37 38 #include <sys/stropts.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 42 #define _SUN_TPI_VERSION 2 43 #include <sys/tihdr.h> 44 #include <sys/sockio.h> 45 #include <sys/kmem_impl.h> 46 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/ddi.h> 50 #include <netinet/in.h> 51 #include <inet/ip.h> 52 53 #include <fs/sockfs/sockcommon.h> 54 #include <fs/sockfs/sockfilter_impl.h> 55 56 #include <sys/socket_proto.h> 57 58 #include <fs/sockfs/socktpi_impl.h> 59 #include <fs/sockfs/sodirect.h> 60 #include <sys/tihdr.h> 61 #include <fs/sockfs/nl7c.h> 62 63 extern int xnet_skip_checks; 64 extern int xnet_check_print; 65 66 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 67 68 69 /*ARGSUSED*/ 70 int 71 so_accept_notsupp(struct sonode *lso, int fflag, 72 struct cred *cr, struct sonode **nsop) 73 { 74 return (EOPNOTSUPP); 75 } 76 77 /*ARGSUSED*/ 78 int 79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 80 { 81 return (EOPNOTSUPP); 82 } 83 84 /*ARGSUSED*/ 85 int 86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 87 socklen_t *len, struct cred *cr) 88 { 89 return (EOPNOTSUPP); 90 } 91 92 /*ARGSUSED*/ 93 int 94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 95 socklen_t *addrlen, boolean_t accept, struct cred *cr) 96 { 97 return (EOPNOTSUPP); 98 } 99 100 /*ARGSUSED*/ 101 int 102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 103 { 104 return (EOPNOTSUPP); 105 } 106 107 /*ARGSUSED*/ 108 int 109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 110 struct cred *cr, mblk_t **mpp) 111 { 112 return (EOPNOTSUPP); 113 } 114 115 /* 116 * Generic Socket Ops 117 */ 118 119 /* ARGSUSED */ 120 int 121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 122 { 123 return (socket_init_common(so, pso, flags, cr)); 124 } 125 126 int 127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 128 int flags, struct cred *cr) 129 { 130 int error; 131 132 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 133 134 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 135 136 /* X/Open requires this check */ 137 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 138 if (xnet_check_print) { 139 printf("sockfs: X/Open bind state check " 140 "caused EINVAL\n"); 141 } 142 error = EINVAL; 143 goto done; 144 } 145 146 /* 147 * a bind to a NULL address is interpreted as unbind. So just 148 * do the downcall. 149 */ 150 if (name == NULL) 151 goto dobind; 152 153 switch (so->so_family) { 154 case AF_INET: 155 if ((size_t)namelen != sizeof (sin_t)) { 156 error = name->sa_family != so->so_family ? 157 EAFNOSUPPORT : EINVAL; 158 eprintsoline(so, error); 159 goto done; 160 } 161 162 if ((flags & _SOBIND_XPG4_2) && 163 (name->sa_family != so->so_family)) { 164 /* 165 * This check has to be made for X/Open 166 * sockets however application failures have 167 * been observed when it is applied to 168 * all sockets. 169 */ 170 error = EAFNOSUPPORT; 171 eprintsoline(so, error); 172 goto done; 173 } 174 /* 175 * Force a zero sa_family to match so_family. 176 * 177 * Some programs like inetd(1M) don't set the 178 * family field. Other programs leave 179 * sin_family set to garbage - SunOS 4.X does 180 * not check the family field on a bind. 181 * We use the family field that 182 * was passed in to the socket() call. 183 */ 184 name->sa_family = so->so_family; 185 break; 186 187 case AF_INET6: { 188 #ifdef DEBUG 189 sin6_t *sin6 = (sin6_t *)name; 190 #endif 191 if ((size_t)namelen != sizeof (sin6_t)) { 192 error = name->sa_family != so->so_family ? 193 EAFNOSUPPORT : EINVAL; 194 eprintsoline(so, error); 195 goto done; 196 } 197 198 if (name->sa_family != so->so_family) { 199 /* 200 * With IPv6 we require the family to match 201 * unlike in IPv4. 202 */ 203 error = EAFNOSUPPORT; 204 eprintsoline(so, error); 205 goto done; 206 } 207 #ifdef DEBUG 208 /* 209 * Verify that apps don't forget to clear 210 * sin6_scope_id etc 211 */ 212 if (sin6->sin6_scope_id != 0 && 213 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 214 zcmn_err(getzoneid(), CE_WARN, 215 "bind with uninitialized sin6_scope_id " 216 "(%d) on socket. Pid = %d\n", 217 (int)sin6->sin6_scope_id, 218 (int)curproc->p_pid); 219 } 220 if (sin6->__sin6_src_id != 0) { 221 zcmn_err(getzoneid(), CE_WARN, 222 "bind with uninitialized __sin6_src_id " 223 "(%d) on socket. Pid = %d\n", 224 (int)sin6->__sin6_src_id, 225 (int)curproc->p_pid); 226 } 227 #endif /* DEBUG */ 228 229 break; 230 } 231 default: 232 /* Just pass the request to the protocol */ 233 goto dobind; 234 } 235 236 /* 237 * First we check if either NCA or KSSL has been enabled for 238 * the requested address, and if so, we fall back to TPI. 239 * If neither of those two services are enabled, then we just 240 * pass the request to the protocol. 241 * 242 * Note that KSSL can only be enabled on a socket if NCA is NOT 243 * enabled for that socket, hence the else-statement below. 244 */ 245 if (nl7c_enabled && ((so->so_family == AF_INET || 246 so->so_family == AF_INET6) && 247 nl7c_lookup_addr(name, namelen) != NULL)) { 248 /* 249 * NL7C is not supported in non-global zones, 250 * we enforce this restriction here. 251 */ 252 if (so->so_zoneid == GLOBAL_ZONEID) { 253 /* NCA should be used, so fall back to TPI */ 254 error = so_tpi_fallback(so, cr); 255 SO_UNBLOCK_FALLBACK(so); 256 if (error) 257 return (error); 258 else 259 return (SOP_BIND(so, name, namelen, flags, cr)); 260 } 261 } 262 263 dobind: 264 if (so->so_filter_active == 0 || 265 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 266 error = (*so->so_downcalls->sd_bind) 267 (so->so_proto_handle, name, namelen, cr); 268 } 269 done: 270 SO_UNBLOCK_FALLBACK(so); 271 272 return (error); 273 } 274 275 int 276 so_listen(struct sonode *so, int backlog, struct cred *cr) 277 { 278 int error = 0; 279 280 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 281 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 282 283 if ((so)->so_filter_active == 0 || 284 (error = sof_filter_listen(so, &backlog, cr)) < 0) 285 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 286 backlog, cr); 287 288 SO_UNBLOCK_FALLBACK(so); 289 290 return (error); 291 } 292 293 294 int 295 so_connect(struct sonode *so, struct sockaddr *name, 296 socklen_t namelen, int fflag, int flags, struct cred *cr) 297 { 298 int error = 0; 299 sock_connid_t id; 300 301 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 302 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 303 304 /* 305 * If there is a pending error, return error 306 * This can happen if a non blocking operation caused an error. 307 */ 308 309 if (so->so_error != 0) { 310 mutex_enter(&so->so_lock); 311 error = sogeterr(so, B_TRUE); 312 mutex_exit(&so->so_lock); 313 if (error != 0) 314 goto done; 315 } 316 317 if (so->so_filter_active == 0 || 318 (error = sof_filter_connect(so, (struct sockaddr *)name, 319 &namelen, cr)) < 0) { 320 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 321 name, namelen, &id, cr); 322 323 if (error == EINPROGRESS) 324 error = so_wait_connected(so, 325 fflag & (FNONBLOCK|FNDELAY), id); 326 } 327 done: 328 SO_UNBLOCK_FALLBACK(so); 329 return (error); 330 } 331 332 /*ARGSUSED*/ 333 int 334 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 335 { 336 int error = 0; 337 struct sonode *nso; 338 339 *nsop = NULL; 340 341 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 342 if ((so->so_state & SS_ACCEPTCONN) == 0) { 343 SO_UNBLOCK_FALLBACK(so); 344 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 345 EOPNOTSUPP : EINVAL); 346 } 347 348 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 349 &nso)) == 0) { 350 ASSERT(nso != NULL); 351 352 /* finish the accept */ 353 if ((so->so_filter_active > 0 && 354 (error = sof_filter_accept(nso, cr)) > 0) || 355 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 356 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 357 (void) socket_close(nso, 0, cr); 358 socket_destroy(nso); 359 } else { 360 *nsop = nso; 361 } 362 } 363 364 SO_UNBLOCK_FALLBACK(so); 365 return (error); 366 } 367 368 int 369 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 370 struct cred *cr) 371 { 372 int error, flags; 373 boolean_t dontblock; 374 ssize_t orig_resid; 375 mblk_t *mp; 376 377 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 378 379 flags = msg->msg_flags; 380 error = 0; 381 dontblock = (flags & MSG_DONTWAIT) || 382 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 383 384 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 385 /* 386 * Old way of passing fd's is not supported 387 */ 388 SO_UNBLOCK_FALLBACK(so); 389 return (EOPNOTSUPP); 390 } 391 392 if ((so->so_mode & SM_ATOMIC) && 393 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 394 so->so_proto_props.sopp_maxpsz != -1) { 395 SO_UNBLOCK_FALLBACK(so); 396 return (EMSGSIZE); 397 } 398 399 /* 400 * For atomic sends we will only do one iteration. 401 */ 402 do { 403 if (so->so_state & SS_CANTSENDMORE) { 404 error = EPIPE; 405 break; 406 } 407 408 if (so->so_error != 0) { 409 mutex_enter(&so->so_lock); 410 error = sogeterr(so, B_TRUE); 411 mutex_exit(&so->so_lock); 412 if (error != 0) 413 break; 414 } 415 416 /* 417 * Send down OOB messages even if the send path is being 418 * flow controlled (assuming the protocol supports OOB data). 419 */ 420 if (flags & MSG_OOB) { 421 if ((so->so_mode & SM_EXDATA) == 0) { 422 error = EOPNOTSUPP; 423 break; 424 } 425 } else if (SO_SND_FLOWCTRLD(so)) { 426 /* 427 * Need to wait until the protocol is ready to receive 428 * more data for transmission. 429 */ 430 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 431 break; 432 } 433 434 /* 435 * Time to send data to the protocol. We either copy the 436 * data into mblks or pass the uio directly to the protocol. 437 * We decide what to do based on the available down calls. 438 */ 439 if (so->so_downcalls->sd_send_uio != NULL) { 440 error = (*so->so_downcalls->sd_send_uio) 441 (so->so_proto_handle, uiop, msg, cr); 442 if (error != 0) 443 break; 444 } else { 445 /* save the resid in case of failure */ 446 orig_resid = uiop->uio_resid; 447 448 if ((mp = socopyinuio(uiop, 449 so->so_proto_props.sopp_maxpsz, 450 so->so_proto_props.sopp_wroff, 451 so->so_proto_props.sopp_maxblk, 452 so->so_proto_props.sopp_tail, &error)) == NULL) { 453 break; 454 } 455 ASSERT(uiop->uio_resid >= 0); 456 457 if (so->so_filter_active > 0 && 458 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 459 &error)) == NULL)) { 460 if (error != 0) 461 break; 462 continue; 463 } 464 error = (*so->so_downcalls->sd_send) 465 (so->so_proto_handle, mp, msg, cr); 466 if (error != 0) { 467 /* 468 * The send failed. We do not have to free the 469 * mblks, because that is the protocol's 470 * responsibility. However, uio_resid must 471 * remain accurate, so adjust that here. 472 */ 473 uiop->uio_resid = orig_resid; 474 break; 475 } 476 } 477 } while (uiop->uio_resid > 0); 478 479 SO_UNBLOCK_FALLBACK(so); 480 481 return (error); 482 } 483 484 int 485 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 486 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 487 boolean_t fil_inject) 488 { 489 int error; 490 boolean_t dontblock; 491 size_t size; 492 mblk_t *mp = *mpp; 493 494 if (so->so_downcalls->sd_send == NULL) 495 return (EOPNOTSUPP); 496 497 error = 0; 498 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 499 (fflag & (FNONBLOCK|FNDELAY)); 500 size = msgdsize(mp); 501 502 if ((so->so_mode & SM_ATOMIC) && 503 size > so->so_proto_props.sopp_maxpsz && 504 so->so_proto_props.sopp_maxpsz != -1) { 505 SO_UNBLOCK_FALLBACK(so); 506 return (EMSGSIZE); 507 } 508 509 while (mp != NULL) { 510 mblk_t *nmp, *last_mblk; 511 size_t mlen; 512 513 if (so->so_state & SS_CANTSENDMORE) { 514 error = EPIPE; 515 break; 516 } 517 if (so->so_error != 0) { 518 mutex_enter(&so->so_lock); 519 error = sogeterr(so, B_TRUE); 520 mutex_exit(&so->so_lock); 521 if (error != 0) 522 break; 523 } 524 /* Socket filters are not flow controlled */ 525 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 526 /* 527 * Need to wait until the protocol is ready to receive 528 * more data for transmission. 529 */ 530 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 531 break; 532 } 533 534 /* 535 * We only allow so_maxpsz of data to be sent down to 536 * the protocol at time. 537 */ 538 mlen = MBLKL(mp); 539 nmp = mp->b_cont; 540 last_mblk = mp; 541 while (nmp != NULL) { 542 mlen += MBLKL(nmp); 543 if (mlen > so->so_proto_props.sopp_maxpsz) { 544 last_mblk->b_cont = NULL; 545 break; 546 } 547 last_mblk = nmp; 548 nmp = nmp->b_cont; 549 } 550 551 if (so->so_filter_active > 0 && 552 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 553 cr, &error)) == NULL) { 554 *mpp = mp = nmp; 555 if (error != 0) 556 break; 557 continue; 558 } 559 error = (*so->so_downcalls->sd_send) 560 (so->so_proto_handle, mp, msg, cr); 561 if (error != 0) { 562 /* 563 * The send failed. The protocol will free the mblks 564 * that were sent down. Let the caller deal with the 565 * rest. 566 */ 567 *mpp = nmp; 568 break; 569 } 570 571 *mpp = mp = nmp; 572 } 573 /* Let the filter know whether the protocol is flow controlled */ 574 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 575 error = ENOSPC; 576 577 return (error); 578 } 579 580 #pragma inline(so_sendmblk_impl) 581 582 int 583 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 584 struct cred *cr, mblk_t **mpp) 585 { 586 int error; 587 588 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 589 590 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 591 B_FALSE); 592 593 SO_UNBLOCK_FALLBACK(so); 594 595 return (error); 596 } 597 598 int 599 so_shutdown(struct sonode *so, int how, struct cred *cr) 600 { 601 int error; 602 603 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 604 605 /* 606 * SunOS 4.X has no check for datagram sockets. 607 * 5.X checks that it is connected (ENOTCONN) 608 * X/Open requires that we check the connected state. 609 */ 610 if (!(so->so_state & SS_ISCONNECTED)) { 611 if (!xnet_skip_checks) { 612 error = ENOTCONN; 613 if (xnet_check_print) { 614 printf("sockfs: X/Open shutdown check " 615 "caused ENOTCONN\n"); 616 } 617 } 618 goto done; 619 } 620 621 if (so->so_filter_active == 0 || 622 (error = sof_filter_shutdown(so, &how, cr)) < 0) 623 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 624 how, cr)); 625 626 /* 627 * Protocol agreed to shutdown. We need to flush the 628 * receive buffer if the receive side is being shutdown. 629 */ 630 if (error == 0 && how != SHUT_WR) { 631 mutex_enter(&so->so_lock); 632 /* wait for active reader to finish */ 633 (void) so_lock_read(so, 0); 634 635 so_rcv_flush(so); 636 637 so_unlock_read(so); 638 mutex_exit(&so->so_lock); 639 } 640 641 done: 642 SO_UNBLOCK_FALLBACK(so); 643 return (error); 644 } 645 646 int 647 so_getsockname(struct sonode *so, struct sockaddr *addr, 648 socklen_t *addrlen, struct cred *cr) 649 { 650 int error; 651 652 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 653 654 if (so->so_filter_active == 0 || 655 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 656 error = (*so->so_downcalls->sd_getsockname) 657 (so->so_proto_handle, addr, addrlen, cr); 658 659 SO_UNBLOCK_FALLBACK(so); 660 return (error); 661 } 662 663 int 664 so_getpeername(struct sonode *so, struct sockaddr *addr, 665 socklen_t *addrlen, boolean_t accept, struct cred *cr) 666 { 667 int error; 668 669 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 670 671 if (accept) { 672 error = (*so->so_downcalls->sd_getpeername) 673 (so->so_proto_handle, addr, addrlen, cr); 674 } else if (!(so->so_state & SS_ISCONNECTED)) { 675 error = ENOTCONN; 676 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 677 /* Added this check for X/Open */ 678 error = EINVAL; 679 if (xnet_check_print) { 680 printf("sockfs: X/Open getpeername check => EINVAL\n"); 681 } 682 } else if (so->so_filter_active == 0 || 683 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 684 error = (*so->so_downcalls->sd_getpeername) 685 (so->so_proto_handle, addr, addrlen, cr); 686 } 687 688 SO_UNBLOCK_FALLBACK(so); 689 return (error); 690 } 691 692 int 693 so_getsockopt(struct sonode *so, int level, int option_name, 694 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 695 { 696 int error = 0; 697 698 if (level == SOL_FILTER) 699 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 700 701 SO_BLOCK_FALLBACK(so, 702 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 703 704 if ((so->so_filter_active == 0 || 705 (error = sof_filter_getsockopt(so, level, option_name, optval, 706 optlenp, cr)) < 0) && 707 (error = socket_getopt_common(so, level, option_name, optval, 708 optlenp, flags)) < 0) { 709 error = (*so->so_downcalls->sd_getsockopt) 710 (so->so_proto_handle, level, option_name, optval, optlenp, 711 cr); 712 if (error == ENOPROTOOPT) { 713 if (level == SOL_SOCKET) { 714 /* 715 * If a protocol does not support a particular 716 * socket option, set can fail (not allowed) 717 * but get can not fail. This is the previous 718 * sockfs bahvior. 719 */ 720 switch (option_name) { 721 case SO_LINGER: 722 if (*optlenp < (t_uscalar_t) 723 sizeof (struct linger)) { 724 error = EINVAL; 725 break; 726 } 727 error = 0; 728 bzero(optval, sizeof (struct linger)); 729 *optlenp = sizeof (struct linger); 730 break; 731 case SO_RCVTIMEO: 732 case SO_SNDTIMEO: 733 if (*optlenp < (t_uscalar_t) 734 sizeof (struct timeval)) { 735 error = EINVAL; 736 break; 737 } 738 error = 0; 739 bzero(optval, sizeof (struct timeval)); 740 *optlenp = sizeof (struct timeval); 741 break; 742 case SO_SND_BUFINFO: 743 if (*optlenp < (t_uscalar_t) 744 sizeof (struct so_snd_bufinfo)) { 745 error = EINVAL; 746 break; 747 } 748 error = 0; 749 bzero(optval, 750 sizeof (struct so_snd_bufinfo)); 751 *optlenp = 752 sizeof (struct so_snd_bufinfo); 753 break; 754 case SO_DEBUG: 755 case SO_REUSEADDR: 756 case SO_KEEPALIVE: 757 case SO_DONTROUTE: 758 case SO_BROADCAST: 759 case SO_USELOOPBACK: 760 case SO_OOBINLINE: 761 case SO_DGRAM_ERRIND: 762 case SO_SNDBUF: 763 case SO_RCVBUF: 764 error = 0; 765 *((int32_t *)optval) = 0; 766 *optlenp = sizeof (int32_t); 767 break; 768 default: 769 break; 770 } 771 } 772 } 773 } 774 775 SO_UNBLOCK_FALLBACK(so); 776 return (error); 777 } 778 779 int 780 so_setsockopt(struct sonode *so, int level, int option_name, 781 const void *optval, socklen_t optlen, struct cred *cr) 782 { 783 int error = 0; 784 struct timeval tl; 785 const void *opt = optval; 786 787 if (level == SOL_FILTER) 788 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 789 790 SO_BLOCK_FALLBACK(so, 791 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 792 793 /* X/Open requires this check */ 794 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 795 SO_UNBLOCK_FALLBACK(so); 796 if (xnet_check_print) 797 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 798 return (EINVAL); 799 } 800 801 if (so->so_filter_active > 0 && 802 (error = sof_filter_setsockopt(so, level, option_name, 803 (void *)optval, &optlen, cr)) >= 0) 804 goto done; 805 806 if (level == SOL_SOCKET) { 807 switch (option_name) { 808 case SO_RCVTIMEO: 809 case SO_SNDTIMEO: { 810 /* 811 * We pass down these two options to protocol in order 812 * to support some third part protocols which need to 813 * know them. For those protocols which don't care 814 * these two options, simply return 0. 815 */ 816 clock_t t_usec; 817 818 if (get_udatamodel() == DATAMODEL_NONE || 819 get_udatamodel() == DATAMODEL_NATIVE) { 820 if (optlen != sizeof (struct timeval)) { 821 error = EINVAL; 822 goto done; 823 } 824 bcopy((struct timeval *)optval, &tl, 825 sizeof (struct timeval)); 826 } else { 827 if (optlen != sizeof (struct timeval32)) { 828 error = EINVAL; 829 goto done; 830 } 831 TIMEVAL32_TO_TIMEVAL(&tl, 832 (struct timeval32 *)optval); 833 } 834 opt = &tl; 835 optlen = sizeof (tl); 836 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 837 mutex_enter(&so->so_lock); 838 if (option_name == SO_RCVTIMEO) 839 so->so_rcvtimeo = drv_usectohz(t_usec); 840 else 841 so->so_sndtimeo = drv_usectohz(t_usec); 842 mutex_exit(&so->so_lock); 843 break; 844 } 845 case SO_RCVBUF: 846 /* 847 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 848 * sockfs since the transport might adjust the value 849 * and not return exactly what was set by the 850 * application. 851 */ 852 so->so_xpg_rcvbuf = *(int32_t *)optval; 853 break; 854 } 855 } 856 error = (*so->so_downcalls->sd_setsockopt) 857 (so->so_proto_handle, level, option_name, opt, optlen, cr); 858 done: 859 SO_UNBLOCK_FALLBACK(so); 860 return (error); 861 } 862 863 int 864 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 865 struct cred *cr, int32_t *rvalp) 866 { 867 int error = 0; 868 869 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 870 871 /* 872 * If there is a pending error, return error 873 * This can happen if a non blocking operation caused an error. 874 */ 875 if (so->so_error != 0) { 876 mutex_enter(&so->so_lock); 877 error = sogeterr(so, B_TRUE); 878 mutex_exit(&so->so_lock); 879 if (error != 0) 880 goto done; 881 } 882 883 /* 884 * calling strioc can result in the socket falling back to TPI, 885 * if that is supported. 886 */ 887 if ((so->so_filter_active == 0 || 888 (error = sof_filter_ioctl(so, cmd, arg, mode, 889 rvalp, cr)) < 0) && 890 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 891 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 892 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 893 cmd, arg, mode, rvalp, cr); 894 } 895 896 done: 897 SO_UNBLOCK_FALLBACK(so); 898 899 return (error); 900 } 901 902 int 903 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 904 struct pollhead **phpp) 905 { 906 int state = so->so_state, mask; 907 *reventsp = 0; 908 909 /* 910 * In sockets the errors are represented as input/output events 911 */ 912 if (so->so_error != 0 && 913 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 914 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 915 return (0); 916 } 917 918 /* 919 * If the socket is in a state where it can send data 920 * turn on POLLWRBAND and POLLOUT events. 921 */ 922 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 923 /* 924 * out of band data is allowed even if the connection 925 * is flow controlled 926 */ 927 *reventsp |= POLLWRBAND & events; 928 if (!SO_SND_FLOWCTRLD(so)) { 929 /* 930 * As long as there is buffer to send data 931 * turn on POLLOUT events 932 */ 933 *reventsp |= POLLOUT & events; 934 } 935 } 936 937 /* 938 * Turn on POLLIN whenever there is data on the receive queue, 939 * or the socket is in a state where no more data will be received. 940 * Also, if the socket is accepting connections, flip the bit if 941 * there is something on the queue. 942 * 943 * We do an initial check for events without holding locks. However, 944 * if there are no event available, then we redo the check for POLLIN 945 * events under the lock. 946 */ 947 948 /* Pending connections */ 949 if (!list_is_empty(&so->so_acceptq_list)) 950 *reventsp |= (POLLIN|POLLRDNORM) & events; 951 952 /* 953 * If we're looking for POLLRDHUP, indicate it if we have sent the 954 * last rx signal for the socket. 955 */ 956 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG)) 957 *reventsp |= POLLRDHUP; 958 959 /* Data */ 960 /* so_downcalls is null for sctp */ 961 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 962 *reventsp |= (*so->so_downcalls->sd_poll) 963 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 964 CRED()) & events; 965 ASSERT((*reventsp & ~events) == 0); 966 /* do not recheck events */ 967 events &= ~SO_PROTO_POLLEV; 968 } else { 969 if (SO_HAVE_DATA(so)) 970 *reventsp |= (POLLIN|POLLRDNORM) & events; 971 972 /* Urgent data */ 973 if ((state & SS_OOBPEND) != 0) { 974 *reventsp |= (POLLRDBAND | POLLPRI) & events; 975 } 976 977 /* 978 * If the socket has become disconnected, we set POLLHUP. 979 * Note that if we are in this state, we will have set POLLIN 980 * (SO_HAVE_DATA() is true on a disconnected socket), but not 981 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with 982 * the semantics of POLLHUP, which is defined to be mutually 983 * exclusive with respect to POLLOUT but not POLLIN. We are 984 * therefore setting POLLHUP primarily for the benefit of 985 * those not polling on POLLIN, as they have no other way of 986 * knowing that the socket has been disconnected. 987 */ 988 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG; 989 990 if ((state & (mask | SS_ISCONNECTED)) == mask) 991 *reventsp |= POLLHUP; 992 } 993 994 if ((!*reventsp && !anyyet) || (events & POLLET)) { 995 /* Check for read events again, but this time under lock */ 996 if (events & (POLLIN|POLLRDNORM)) { 997 mutex_enter(&so->so_lock); 998 if (SO_HAVE_DATA(so) || 999 !list_is_empty(&so->so_acceptq_list)) { 1000 if (events & POLLET) { 1001 so->so_pollev |= SO_POLLEV_IN; 1002 *phpp = &so->so_poll_list; 1003 } 1004 1005 mutex_exit(&so->so_lock); 1006 *reventsp |= (POLLIN|POLLRDNORM) & events; 1007 1008 return (0); 1009 } else { 1010 so->so_pollev |= SO_POLLEV_IN; 1011 mutex_exit(&so->so_lock); 1012 } 1013 } 1014 *phpp = &so->so_poll_list; 1015 } 1016 return (0); 1017 } 1018 1019 /* 1020 * Generic Upcalls 1021 */ 1022 void 1023 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 1024 cred_t *peer_cred, pid_t peer_cpid) 1025 { 1026 struct sonode *so = (struct sonode *)sock_handle; 1027 1028 mutex_enter(&so->so_lock); 1029 ASSERT(so->so_proto_handle != NULL); 1030 1031 if (peer_cred != NULL) { 1032 if (so->so_peercred != NULL) 1033 crfree(so->so_peercred); 1034 crhold(peer_cred); 1035 so->so_peercred = peer_cred; 1036 so->so_cpid = peer_cpid; 1037 } 1038 1039 so->so_proto_connid = id; 1040 soisconnected(so); 1041 /* 1042 * Wake ones who're waiting for conn to become established. 1043 */ 1044 so_notify_connected(so); 1045 } 1046 1047 int 1048 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1049 { 1050 struct sonode *so = (struct sonode *)sock_handle; 1051 boolean_t connect_failed; 1052 1053 mutex_enter(&so->so_lock); 1054 1055 /* 1056 * If we aren't currently connected, then this isn't a disconnect but 1057 * rather a failure to connect. 1058 */ 1059 connect_failed = !(so->so_state & SS_ISCONNECTED); 1060 1061 so->so_proto_connid = id; 1062 soisdisconnected(so, error); 1063 so_notify_disconnected(so, connect_failed, error); 1064 1065 return (0); 1066 } 1067 1068 void 1069 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1070 uintptr_t arg) 1071 { 1072 struct sonode *so = (struct sonode *)sock_handle; 1073 1074 switch (action) { 1075 case SOCK_OPCTL_SHUT_SEND: 1076 mutex_enter(&so->so_lock); 1077 socantsendmore(so); 1078 so_notify_disconnecting(so); 1079 break; 1080 case SOCK_OPCTL_SHUT_RECV: { 1081 mutex_enter(&so->so_lock); 1082 socantrcvmore(so); 1083 so_notify_eof(so); 1084 break; 1085 } 1086 case SOCK_OPCTL_ENAB_ACCEPT: 1087 mutex_enter(&so->so_lock); 1088 so->so_state |= SS_ACCEPTCONN; 1089 so->so_backlog = (unsigned int)arg; 1090 /* 1091 * The protocol can stop generating newconn upcalls when 1092 * the backlog is full, so to make sure the listener does 1093 * not end up with a queue full of deferred connections 1094 * we reduce the backlog by one. Thus the listener will 1095 * start closing deferred connections before the backlog 1096 * is full. 1097 */ 1098 if (so->so_filter_active > 0) 1099 so->so_backlog = MAX(1, so->so_backlog - 1); 1100 mutex_exit(&so->so_lock); 1101 break; 1102 default: 1103 ASSERT(0); 1104 break; 1105 } 1106 } 1107 1108 void 1109 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1110 { 1111 struct sonode *so = (struct sonode *)sock_handle; 1112 1113 if (qfull) { 1114 so_snd_qfull(so); 1115 } else { 1116 so_snd_qnotfull(so); 1117 mutex_enter(&so->so_lock); 1118 /* so_notify_writable drops so_lock */ 1119 so_notify_writable(so); 1120 } 1121 } 1122 1123 sock_upper_handle_t 1124 so_newconn(sock_upper_handle_t parenthandle, 1125 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1126 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1127 { 1128 struct sonode *so = (struct sonode *)parenthandle; 1129 struct sonode *nso; 1130 int error; 1131 1132 ASSERT(proto_handle != NULL); 1133 1134 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1135 (so->so_acceptq_len >= so->so_backlog && 1136 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1137 return (NULL); 1138 } 1139 1140 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1141 &error); 1142 if (nso == NULL) 1143 return (NULL); 1144 1145 if (peer_cred != NULL) { 1146 crhold(peer_cred); 1147 nso->so_peercred = peer_cred; 1148 nso->so_cpid = peer_cpid; 1149 } 1150 nso->so_listener = so; 1151 1152 /* 1153 * The new socket (nso), proto_handle and sock_upcallsp are all 1154 * valid at this point. But as soon as nso is placed in the accept 1155 * queue that can no longer be assumed (since an accept() thread may 1156 * pull it off the queue and close the socket). 1157 */ 1158 *sock_upcallsp = &so_upcalls; 1159 1160 mutex_enter(&so->so_acceptq_lock); 1161 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1162 mutex_exit(&so->so_acceptq_lock); 1163 ASSERT(nso->so_count == 1); 1164 nso->so_count--; 1165 nso->so_listener = NULL; 1166 /* drop proto ref */ 1167 VN_RELE(SOTOV(nso)); 1168 socket_destroy(nso); 1169 return (NULL); 1170 } else { 1171 so->so_acceptq_len++; 1172 if (nso->so_state & SS_FIL_DEFER) { 1173 list_insert_tail(&so->so_acceptq_defer, nso); 1174 mutex_exit(&so->so_acceptq_lock); 1175 } else { 1176 list_insert_tail(&so->so_acceptq_list, nso); 1177 cv_signal(&so->so_acceptq_cv); 1178 mutex_exit(&so->so_acceptq_lock); 1179 mutex_enter(&so->so_lock); 1180 so_notify_newconn(so); 1181 } 1182 1183 return ((sock_upper_handle_t)nso); 1184 } 1185 } 1186 1187 void 1188 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1189 { 1190 struct sonode *so; 1191 1192 so = (struct sonode *)sock_handle; 1193 1194 mutex_enter(&so->so_lock); 1195 1196 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1197 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1198 if (soppp->sopp_flags & SOCKOPT_WROFF) 1199 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1200 if (soppp->sopp_flags & SOCKOPT_TAIL) 1201 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1202 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1203 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1204 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1205 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1206 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1207 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1208 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1209 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1210 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1211 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1212 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1213 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1214 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1215 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1216 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1217 } 1218 1219 if (soppp->sopp_zcopyflag & COPYCACHED) { 1220 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1221 } 1222 } 1223 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1224 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1225 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1226 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1227 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1228 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1229 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1230 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1231 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1232 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1233 1234 mutex_exit(&so->so_lock); 1235 1236 if (so->so_filter_active > 0) { 1237 sof_instance_t *inst; 1238 ssize_t maxblk; 1239 ushort_t wroff, tail; 1240 maxblk = so->so_proto_props.sopp_maxblk; 1241 wroff = so->so_proto_props.sopp_wroff; 1242 tail = so->so_proto_props.sopp_tail; 1243 for (inst = so->so_filter_bottom; inst != NULL; 1244 inst = inst->sofi_prev) { 1245 if (SOF_INTERESTED(inst, mblk_prop)) { 1246 (*inst->sofi_ops->sofop_mblk_prop)( 1247 (sof_handle_t)inst, inst->sofi_cookie, 1248 &maxblk, &wroff, &tail); 1249 } 1250 } 1251 mutex_enter(&so->so_lock); 1252 so->so_proto_props.sopp_maxblk = maxblk; 1253 so->so_proto_props.sopp_wroff = wroff; 1254 so->so_proto_props.sopp_tail = tail; 1255 mutex_exit(&so->so_lock); 1256 } 1257 #ifdef DEBUG 1258 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1259 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1260 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1261 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1262 SOCKOPT_LOOPBACK); 1263 ASSERT(soppp->sopp_flags == 0); 1264 #endif 1265 } 1266 1267 /* ARGSUSED */ 1268 ssize_t 1269 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1270 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1271 sof_instance_t *filter) 1272 { 1273 boolean_t force_push = B_TRUE; 1274 int space_left; 1275 sodirect_t *sodp = so->so_direct; 1276 1277 ASSERT(errorp != NULL); 1278 *errorp = 0; 1279 if (mp == NULL) { 1280 if (so->so_downcalls->sd_recv_uio != NULL) { 1281 mutex_enter(&so->so_lock); 1282 /* the notify functions will drop the lock */ 1283 if (flags & MSG_OOB) 1284 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1285 else 1286 so_notify_data(so, msg_size); 1287 return (0); 1288 } 1289 ASSERT(msg_size == 0); 1290 mutex_enter(&so->so_lock); 1291 goto space_check; 1292 } 1293 1294 ASSERT(mp->b_next == NULL); 1295 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1296 ASSERT(msg_size == msgdsize(mp)); 1297 1298 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1299 /* The read pointer is not aligned correctly for TPI */ 1300 zcmn_err(getzoneid(), CE_WARN, 1301 "sockfs: Unaligned TPI message received. rptr = %p\n", 1302 (void *)mp->b_rptr); 1303 freemsg(mp); 1304 mutex_enter(&so->so_lock); 1305 if (sodp != NULL) 1306 SOD_UIOAFINI(sodp); 1307 goto space_check; 1308 } 1309 1310 if (so->so_filter_active > 0) { 1311 for (; filter != NULL; filter = filter->sofi_prev) { 1312 if (!SOF_INTERESTED(filter, data_in)) 1313 continue; 1314 mp = (*filter->sofi_ops->sofop_data_in)( 1315 (sof_handle_t)filter, filter->sofi_cookie, mp, 1316 flags, &msg_size); 1317 ASSERT(msgdsize(mp) == msg_size); 1318 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1319 (mblk_t *), mp); 1320 /* Data was consumed/dropped, just do space check */ 1321 if (msg_size == 0) { 1322 mutex_enter(&so->so_lock); 1323 goto space_check; 1324 } 1325 } 1326 } 1327 1328 mutex_enter(&so->so_lock); 1329 if (so->so_krecv_cb != NULL) { 1330 boolean_t cont; 1331 so_krecv_f func = so->so_krecv_cb; 1332 void *arg = so->so_krecv_arg; 1333 1334 mutex_exit(&so->so_lock); 1335 cont = func(so, mp, msg_size, flags & MSG_OOB, arg); 1336 mutex_enter(&so->so_lock); 1337 if (cont == B_TRUE) { 1338 space_left = so->so_rcvbuf; 1339 } else { 1340 so->so_rcv_queued = so->so_rcvlowat; 1341 *errorp = ENOSPC; 1342 space_left = -1; 1343 } 1344 goto done_unlock; 1345 } 1346 mutex_exit(&so->so_lock); 1347 1348 if (flags & MSG_OOB) { 1349 so_queue_oob(so, mp, msg_size); 1350 mutex_enter(&so->so_lock); 1351 goto space_check; 1352 } 1353 1354 if (force_pushp != NULL) 1355 force_push = *force_pushp; 1356 1357 mutex_enter(&so->so_lock); 1358 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1359 if (sodp != NULL) 1360 SOD_DISABLE(sodp); 1361 mutex_exit(&so->so_lock); 1362 *errorp = EOPNOTSUPP; 1363 return (-1); 1364 } 1365 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1366 freemsg(mp); 1367 if (sodp != NULL) 1368 SOD_DISABLE(sodp); 1369 mutex_exit(&so->so_lock); 1370 return (0); 1371 } 1372 1373 /* process the mblk via I/OAT if capable */ 1374 if (sodp != NULL && sodp->sod_enabled) { 1375 if (DB_TYPE(mp) == M_DATA) { 1376 sod_uioa_mblk_init(sodp, mp, msg_size); 1377 } else { 1378 SOD_UIOAFINI(sodp); 1379 } 1380 } 1381 1382 if (mp->b_next == NULL) { 1383 so_enqueue_msg(so, mp, msg_size); 1384 } else { 1385 do { 1386 mblk_t *nmp; 1387 1388 if ((nmp = mp->b_next) != NULL) { 1389 mp->b_next = NULL; 1390 } 1391 so_enqueue_msg(so, mp, msgdsize(mp)); 1392 mp = nmp; 1393 } while (mp != NULL); 1394 } 1395 1396 space_left = so->so_rcvbuf - so->so_rcv_queued; 1397 if (space_left <= 0) { 1398 so->so_flowctrld = B_TRUE; 1399 *errorp = ENOSPC; 1400 space_left = -1; 1401 } 1402 1403 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1404 so->so_rcv_queued >= so->so_rcv_wanted) { 1405 SOCKET_TIMER_CANCEL(so); 1406 /* 1407 * so_notify_data will release the lock 1408 */ 1409 so_notify_data(so, so->so_rcv_queued); 1410 1411 if (force_pushp != NULL) 1412 *force_pushp = B_TRUE; 1413 goto done; 1414 } else if (so->so_rcv_timer_tid == 0) { 1415 /* Make sure the recv push timer is running */ 1416 SOCKET_TIMER_START(so); 1417 } 1418 1419 done_unlock: 1420 mutex_exit(&so->so_lock); 1421 done: 1422 return (space_left); 1423 1424 space_check: 1425 space_left = so->so_rcvbuf - so->so_rcv_queued; 1426 if (space_left <= 0) { 1427 so->so_flowctrld = B_TRUE; 1428 *errorp = ENOSPC; 1429 space_left = -1; 1430 } 1431 goto done_unlock; 1432 } 1433 1434 #pragma inline(so_queue_msg_impl) 1435 1436 ssize_t 1437 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1438 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1439 { 1440 struct sonode *so = (struct sonode *)sock_handle; 1441 1442 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1443 so->so_filter_bottom)); 1444 } 1445 1446 /* 1447 * Set the offset of where the oob data is relative to the bytes in 1448 * queued. Also generate SIGURG 1449 */ 1450 void 1451 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1452 { 1453 struct sonode *so; 1454 1455 ASSERT(offset >= 0); 1456 so = (struct sonode *)sock_handle; 1457 mutex_enter(&so->so_lock); 1458 if (so->so_direct != NULL) 1459 SOD_UIOAFINI(so->so_direct); 1460 1461 /* 1462 * New urgent data on the way so forget about any old 1463 * urgent data. 1464 */ 1465 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1466 1467 /* 1468 * Record that urgent data is pending. 1469 */ 1470 so->so_state |= SS_OOBPEND; 1471 1472 if (so->so_oobmsg != NULL) { 1473 dprintso(so, 1, ("sock: discarding old oob\n")); 1474 freemsg(so->so_oobmsg); 1475 so->so_oobmsg = NULL; 1476 } 1477 1478 /* 1479 * set the offset where the urgent byte is 1480 */ 1481 so->so_oobmark = so->so_rcv_queued + offset; 1482 if (so->so_oobmark == 0) 1483 so->so_state |= SS_RCVATMARK; 1484 else 1485 so->so_state &= ~SS_RCVATMARK; 1486 1487 so_notify_oobsig(so); 1488 } 1489 1490 /* 1491 * Queue the OOB byte 1492 */ 1493 static void 1494 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1495 { 1496 mutex_enter(&so->so_lock); 1497 if (so->so_direct != NULL) 1498 SOD_UIOAFINI(so->so_direct); 1499 1500 ASSERT(mp != NULL); 1501 if (!IS_SO_OOB_INLINE(so)) { 1502 so->so_oobmsg = mp; 1503 so->so_state |= SS_HAVEOOBDATA; 1504 } else { 1505 so_enqueue_msg(so, mp, len); 1506 } 1507 1508 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1509 } 1510 1511 int 1512 so_close(struct sonode *so, int flag, struct cred *cr) 1513 { 1514 int error; 1515 1516 /* 1517 * No new data will be enqueued once the CLOSING flag is set. 1518 */ 1519 mutex_enter(&so->so_lock); 1520 so->so_state |= SS_CLOSING; 1521 ASSERT(so_verify_oobstate(so)); 1522 so_rcv_flush(so); 1523 mutex_exit(&so->so_lock); 1524 1525 if (so->so_filter_active > 0) 1526 sof_sonode_closing(so); 1527 1528 if (so->so_state & SS_ACCEPTCONN) { 1529 /* 1530 * We grab and release the accept lock to ensure that any 1531 * thread about to insert a socket in so_newconn completes 1532 * before we flush the queue. Any thread calling so_newconn 1533 * after we drop the lock will observe the SS_CLOSING flag, 1534 * which will stop it from inserting the socket in the queue. 1535 */ 1536 mutex_enter(&so->so_acceptq_lock); 1537 mutex_exit(&so->so_acceptq_lock); 1538 1539 so_acceptq_flush(so, B_TRUE); 1540 } 1541 1542 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1543 switch (error) { 1544 default: 1545 /* Protocol made a synchronous close; remove proto ref */ 1546 VN_RELE(SOTOV(so)); 1547 break; 1548 case EINPROGRESS: 1549 /* 1550 * Protocol is in the process of closing, it will make a 1551 * 'closed' upcall to remove the reference. 1552 */ 1553 error = 0; 1554 break; 1555 } 1556 1557 return (error); 1558 } 1559 1560 /* 1561 * Upcall made by the protocol when it's doing an asynchronous close. It 1562 * will drop the protocol's reference on the socket. 1563 */ 1564 void 1565 so_closed(sock_upper_handle_t sock_handle) 1566 { 1567 struct sonode *so = (struct sonode *)sock_handle; 1568 1569 VN_RELE(SOTOV(so)); 1570 } 1571 1572 vnode_t * 1573 so_get_vnode(sock_upper_handle_t sock_handle) 1574 { 1575 sonode_t *so = (sonode_t *)sock_handle; 1576 vnode_t *vn; 1577 1578 vn = SOTOV(so); 1579 VN_HOLD(vn); 1580 1581 return (vn); 1582 } 1583 1584 void 1585 so_zcopy_notify(sock_upper_handle_t sock_handle) 1586 { 1587 struct sonode *so = (struct sonode *)sock_handle; 1588 1589 mutex_enter(&so->so_lock); 1590 so->so_copyflag |= STZCNOTIFY; 1591 cv_broadcast(&so->so_copy_cv); 1592 mutex_exit(&so->so_lock); 1593 } 1594 1595 void 1596 so_set_error(sock_upper_handle_t sock_handle, int error) 1597 { 1598 struct sonode *so = (struct sonode *)sock_handle; 1599 1600 mutex_enter(&so->so_lock); 1601 1602 soseterror(so, error); 1603 1604 so_notify_error(so); 1605 } 1606 1607 /* 1608 * so_recvmsg - read data from the socket 1609 * 1610 * There are two ways of obtaining data; either we ask the protocol to 1611 * copy directly into the supplied buffer, or we copy data from the 1612 * sonode's receive queue. The decision which one to use depends on 1613 * whether the protocol has a sd_recv_uio down call. 1614 */ 1615 int 1616 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1617 struct cred *cr) 1618 { 1619 rval_t rval; 1620 int flags = 0; 1621 t_uscalar_t controllen, namelen; 1622 int error = 0; 1623 int ret; 1624 mblk_t *mctlp = NULL; 1625 union T_primitives *tpr; 1626 void *control; 1627 ssize_t saved_resid; 1628 struct uio *suiop; 1629 1630 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1631 1632 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1633 (so->so_mode & SM_CONNREQUIRED)) { 1634 SO_UNBLOCK_FALLBACK(so); 1635 return (ENOTCONN); 1636 } 1637 1638 mutex_enter(&so->so_lock); 1639 if (so->so_krecv_cb != NULL) { 1640 mutex_exit(&so->so_lock); 1641 return (EOPNOTSUPP); 1642 } 1643 mutex_exit(&so->so_lock); 1644 1645 if (msg->msg_flags & MSG_PEEK) 1646 msg->msg_flags &= ~MSG_WAITALL; 1647 1648 if (so->so_mode & SM_ATOMIC) 1649 msg->msg_flags |= MSG_TRUNC; 1650 1651 if (msg->msg_flags & MSG_OOB) { 1652 if ((so->so_mode & SM_EXDATA) == 0) { 1653 error = EOPNOTSUPP; 1654 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1655 error = (*so->so_downcalls->sd_recv_uio) 1656 (so->so_proto_handle, uiop, msg, cr); 1657 } else { 1658 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1659 IS_SO_OOB_INLINE(so)); 1660 } 1661 SO_UNBLOCK_FALLBACK(so); 1662 return (error); 1663 } 1664 1665 /* 1666 * If the protocol has the recv down call, then pass the request 1667 * down. 1668 */ 1669 if (so->so_downcalls->sd_recv_uio != NULL) { 1670 error = (*so->so_downcalls->sd_recv_uio) 1671 (so->so_proto_handle, uiop, msg, cr); 1672 SO_UNBLOCK_FALLBACK(so); 1673 return (error); 1674 } 1675 1676 /* 1677 * Reading data from the socket buffer 1678 */ 1679 flags = msg->msg_flags; 1680 msg->msg_flags = 0; 1681 1682 /* 1683 * Set msg_controllen and msg_namelen to zero here to make it 1684 * simpler in the cases that no control or name is returned. 1685 */ 1686 controllen = msg->msg_controllen; 1687 namelen = msg->msg_namelen; 1688 msg->msg_controllen = 0; 1689 msg->msg_namelen = 0; 1690 1691 mutex_enter(&so->so_lock); 1692 /* Set SOREADLOCKED */ 1693 error = so_lock_read_intr(so, 1694 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1695 mutex_exit(&so->so_lock); 1696 if (error) { 1697 SO_UNBLOCK_FALLBACK(so); 1698 return (error); 1699 } 1700 1701 suiop = sod_rcv_init(so, flags, &uiop); 1702 retry: 1703 saved_resid = uiop->uio_resid; 1704 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1705 if (error != 0) { 1706 goto out; 1707 } 1708 /* 1709 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1710 * For non-datagrams MOREDATA is used to set MSG_EOR. 1711 */ 1712 ASSERT(!(rval.r_val1 & MORECTL)); 1713 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1714 msg->msg_flags |= MSG_TRUNC; 1715 if (mctlp == NULL) { 1716 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1717 1718 mutex_enter(&so->so_lock); 1719 /* Set MSG_EOR based on MOREDATA */ 1720 if (!(rval.r_val1 & MOREDATA)) { 1721 if (so->so_state & SS_SAVEDEOR) { 1722 msg->msg_flags |= MSG_EOR; 1723 so->so_state &= ~SS_SAVEDEOR; 1724 } 1725 } 1726 /* 1727 * If some data was received (i.e. not EOF) and the 1728 * read/recv* has not been satisfied wait for some more. 1729 */ 1730 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1731 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1732 mutex_exit(&so->so_lock); 1733 flags |= MSG_NOMARK; 1734 goto retry; 1735 } 1736 1737 goto out_locked; 1738 } 1739 /* so_queue_msg has already verified length and alignment */ 1740 tpr = (union T_primitives *)mctlp->b_rptr; 1741 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1742 switch (tpr->type) { 1743 case T_DATA_IND: { 1744 /* 1745 * Set msg_flags to MSG_EOR based on 1746 * MORE_flag and MOREDATA. 1747 */ 1748 mutex_enter(&so->so_lock); 1749 so->so_state &= ~SS_SAVEDEOR; 1750 if (!(tpr->data_ind.MORE_flag & 1)) { 1751 if (!(rval.r_val1 & MOREDATA)) 1752 msg->msg_flags |= MSG_EOR; 1753 else 1754 so->so_state |= SS_SAVEDEOR; 1755 } 1756 freemsg(mctlp); 1757 /* 1758 * If some data was received (i.e. not EOF) and the 1759 * read/recv* has not been satisfied wait for some more. 1760 */ 1761 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1762 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1763 mutex_exit(&so->so_lock); 1764 flags |= MSG_NOMARK; 1765 goto retry; 1766 } 1767 goto out_locked; 1768 } 1769 case T_UNITDATA_IND: { 1770 void *addr; 1771 t_uscalar_t addrlen; 1772 void *abuf; 1773 t_uscalar_t optlen; 1774 void *opt; 1775 1776 if (namelen != 0) { 1777 /* Caller wants source address */ 1778 addrlen = tpr->unitdata_ind.SRC_length; 1779 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1780 addrlen, 1); 1781 if (addr == NULL) { 1782 freemsg(mctlp); 1783 error = EPROTO; 1784 eprintsoline(so, error); 1785 goto out; 1786 } 1787 ASSERT(so->so_family != AF_UNIX); 1788 } 1789 optlen = tpr->unitdata_ind.OPT_length; 1790 if (optlen != 0) { 1791 t_uscalar_t ncontrollen; 1792 1793 /* 1794 * Extract any source address option. 1795 * Determine how large cmsg buffer is needed. 1796 */ 1797 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1798 optlen, __TPI_ALIGN_SIZE); 1799 1800 if (opt == NULL) { 1801 freemsg(mctlp); 1802 error = EPROTO; 1803 eprintsoline(so, error); 1804 goto out; 1805 } 1806 if (so->so_family == AF_UNIX) 1807 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1808 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1809 !(flags & MSG_XPG4_2)); 1810 if (controllen != 0) 1811 controllen = ncontrollen; 1812 else if (ncontrollen != 0) 1813 msg->msg_flags |= MSG_CTRUNC; 1814 } else { 1815 controllen = 0; 1816 } 1817 1818 if (namelen != 0) { 1819 /* 1820 * Return address to caller. 1821 * Caller handles truncation if length 1822 * exceeds msg_namelen. 1823 * NOTE: AF_UNIX NUL termination is ensured by 1824 * the sender's copyin_name(). 1825 */ 1826 abuf = kmem_alloc(addrlen, KM_SLEEP); 1827 1828 bcopy(addr, abuf, addrlen); 1829 msg->msg_name = abuf; 1830 msg->msg_namelen = addrlen; 1831 } 1832 1833 if (controllen != 0) { 1834 /* 1835 * Return control msg to caller. 1836 * Caller handles truncation if length 1837 * exceeds msg_controllen. 1838 */ 1839 control = kmem_zalloc(controllen, KM_SLEEP); 1840 1841 error = so_opt2cmsg(mctlp, opt, optlen, 1842 !(flags & MSG_XPG4_2), control, controllen); 1843 if (error) { 1844 freemsg(mctlp); 1845 if (msg->msg_namelen != 0) 1846 kmem_free(msg->msg_name, 1847 msg->msg_namelen); 1848 kmem_free(control, controllen); 1849 eprintsoline(so, error); 1850 goto out; 1851 } 1852 msg->msg_control = control; 1853 msg->msg_controllen = controllen; 1854 } 1855 1856 freemsg(mctlp); 1857 goto out; 1858 } 1859 case T_OPTDATA_IND: { 1860 struct T_optdata_req *tdr; 1861 void *opt; 1862 t_uscalar_t optlen; 1863 1864 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1865 optlen = tdr->OPT_length; 1866 if (optlen != 0) { 1867 t_uscalar_t ncontrollen; 1868 /* 1869 * Determine how large cmsg buffer is needed. 1870 */ 1871 opt = sogetoff(mctlp, 1872 tpr->optdata_ind.OPT_offset, optlen, 1873 __TPI_ALIGN_SIZE); 1874 1875 if (opt == NULL) { 1876 freemsg(mctlp); 1877 error = EPROTO; 1878 eprintsoline(so, error); 1879 goto out; 1880 } 1881 1882 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1883 !(flags & MSG_XPG4_2)); 1884 if (controllen != 0) 1885 controllen = ncontrollen; 1886 else if (ncontrollen != 0) 1887 msg->msg_flags |= MSG_CTRUNC; 1888 } else { 1889 controllen = 0; 1890 } 1891 1892 if (controllen != 0) { 1893 /* 1894 * Return control msg to caller. 1895 * Caller handles truncation if length 1896 * exceeds msg_controllen. 1897 */ 1898 control = kmem_zalloc(controllen, KM_SLEEP); 1899 1900 error = so_opt2cmsg(mctlp, opt, optlen, 1901 !(flags & MSG_XPG4_2), control, controllen); 1902 if (error) { 1903 freemsg(mctlp); 1904 kmem_free(control, controllen); 1905 eprintsoline(so, error); 1906 goto out; 1907 } 1908 msg->msg_control = control; 1909 msg->msg_controllen = controllen; 1910 } 1911 1912 /* 1913 * Set msg_flags to MSG_EOR based on 1914 * DATA_flag and MOREDATA. 1915 */ 1916 mutex_enter(&so->so_lock); 1917 so->so_state &= ~SS_SAVEDEOR; 1918 if (!(tpr->data_ind.MORE_flag & 1)) { 1919 if (!(rval.r_val1 & MOREDATA)) 1920 msg->msg_flags |= MSG_EOR; 1921 else 1922 so->so_state |= SS_SAVEDEOR; 1923 } 1924 freemsg(mctlp); 1925 /* 1926 * If some data was received (i.e. not EOF) and the 1927 * read/recv* has not been satisfied wait for some more. 1928 * Not possible to wait if control info was received. 1929 */ 1930 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1931 controllen == 0 && 1932 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1933 mutex_exit(&so->so_lock); 1934 flags |= MSG_NOMARK; 1935 goto retry; 1936 } 1937 goto out_locked; 1938 } 1939 default: 1940 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1941 tpr->type); 1942 freemsg(mctlp); 1943 error = EPROTO; 1944 ASSERT(0); 1945 } 1946 out: 1947 mutex_enter(&so->so_lock); 1948 out_locked: 1949 ret = sod_rcv_done(so, suiop, uiop); 1950 if (ret != 0 && error == 0) 1951 error = ret; 1952 1953 so_unlock_read(so); /* Clear SOREADLOCKED */ 1954 mutex_exit(&so->so_lock); 1955 1956 SO_UNBLOCK_FALLBACK(so); 1957 1958 return (error); 1959 } 1960 1961 sonodeops_t so_sonodeops = { 1962 so_init, /* sop_init */ 1963 so_accept, /* sop_accept */ 1964 so_bind, /* sop_bind */ 1965 so_listen, /* sop_listen */ 1966 so_connect, /* sop_connect */ 1967 so_recvmsg, /* sop_recvmsg */ 1968 so_sendmsg, /* sop_sendmsg */ 1969 so_sendmblk, /* sop_sendmblk */ 1970 so_getpeername, /* sop_getpeername */ 1971 so_getsockname, /* sop_getsockname */ 1972 so_shutdown, /* sop_shutdown */ 1973 so_getsockopt, /* sop_getsockopt */ 1974 so_setsockopt, /* sop_setsockopt */ 1975 so_ioctl, /* sop_ioctl */ 1976 so_poll, /* sop_poll */ 1977 so_close, /* sop_close */ 1978 }; 1979 1980 sock_upcalls_t so_upcalls = { 1981 so_newconn, 1982 so_connected, 1983 so_disconnected, 1984 so_opctl, 1985 so_queue_msg, 1986 so_set_prop, 1987 so_txq_full, 1988 so_signal_oob, 1989 so_zcopy_notify, 1990 so_set_error, 1991 so_closed, 1992 so_get_vnode 1993 }; 1994