1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/debug.h> 35 #include <sys/cmn_err.h> 36 37 #include <sys/stropts.h> 38 #include <sys/socket.h> 39 #include <sys/socketvar.h> 40 41 #define _SUN_TPI_VERSION 2 42 #include <sys/tihdr.h> 43 #include <sys/sockio.h> 44 #include <sys/kmem_impl.h> 45 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/ddi.h> 49 #include <netinet/in.h> 50 #include <inet/ip.h> 51 52 #include <fs/sockfs/sockcommon.h> 53 #include <fs/sockfs/sockfilter_impl.h> 54 55 #include <sys/socket_proto.h> 56 57 #include <fs/sockfs/socktpi_impl.h> 58 #include <fs/sockfs/sodirect.h> 59 #include <sys/tihdr.h> 60 #include <fs/sockfs/nl7c.h> 61 62 extern int xnet_skip_checks; 63 extern int xnet_check_print; 64 65 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 66 67 68 /*ARGSUSED*/ 69 int 70 so_accept_notsupp(struct sonode *lso, int fflag, 71 struct cred *cr, struct sonode **nsop) 72 { 73 return (EOPNOTSUPP); 74 } 75 76 /*ARGSUSED*/ 77 int 78 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 79 { 80 return (EOPNOTSUPP); 81 } 82 83 /*ARGSUSED*/ 84 int 85 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 86 socklen_t *len, struct cred *cr) 87 { 88 return (EOPNOTSUPP); 89 } 90 91 /*ARGSUSED*/ 92 int 93 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 94 socklen_t *addrlen, boolean_t accept, struct cred *cr) 95 { 96 return (EOPNOTSUPP); 97 } 98 99 /*ARGSUSED*/ 100 int 101 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 102 { 103 return (EOPNOTSUPP); 104 } 105 106 /*ARGSUSED*/ 107 int 108 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 109 struct cred *cr, mblk_t **mpp) 110 { 111 return (EOPNOTSUPP); 112 } 113 114 /* 115 * Generic Socket Ops 116 */ 117 118 /* ARGSUSED */ 119 int 120 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 121 { 122 return (socket_init_common(so, pso, flags, cr)); 123 } 124 125 int 126 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 127 int flags, struct cred *cr) 128 { 129 int error; 130 131 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 132 133 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 134 135 /* X/Open requires this check */ 136 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 137 if (xnet_check_print) { 138 printf("sockfs: X/Open bind state check " 139 "caused EINVAL\n"); 140 } 141 error = EINVAL; 142 goto done; 143 } 144 145 /* 146 * a bind to a NULL address is interpreted as unbind. So just 147 * do the downcall. 148 */ 149 if (name == NULL) 150 goto dobind; 151 152 switch (so->so_family) { 153 case AF_INET: 154 if ((size_t)namelen != sizeof (sin_t)) { 155 error = name->sa_family != so->so_family ? 156 EAFNOSUPPORT : EINVAL; 157 eprintsoline(so, error); 158 goto done; 159 } 160 161 if ((flags & _SOBIND_XPG4_2) && 162 (name->sa_family != so->so_family)) { 163 /* 164 * This check has to be made for X/Open 165 * sockets however application failures have 166 * been observed when it is applied to 167 * all sockets. 168 */ 169 error = EAFNOSUPPORT; 170 eprintsoline(so, error); 171 goto done; 172 } 173 /* 174 * Force a zero sa_family to match so_family. 175 * 176 * Some programs like inetd(1M) don't set the 177 * family field. Other programs leave 178 * sin_family set to garbage - SunOS 4.X does 179 * not check the family field on a bind. 180 * We use the family field that 181 * was passed in to the socket() call. 182 */ 183 name->sa_family = so->so_family; 184 break; 185 186 case AF_INET6: { 187 #ifdef DEBUG 188 sin6_t *sin6 = (sin6_t *)name; 189 #endif 190 if ((size_t)namelen != sizeof (sin6_t)) { 191 error = name->sa_family != so->so_family ? 192 EAFNOSUPPORT : EINVAL; 193 eprintsoline(so, error); 194 goto done; 195 } 196 197 if (name->sa_family != so->so_family) { 198 /* 199 * With IPv6 we require the family to match 200 * unlike in IPv4. 201 */ 202 error = EAFNOSUPPORT; 203 eprintsoline(so, error); 204 goto done; 205 } 206 #ifdef DEBUG 207 /* 208 * Verify that apps don't forget to clear 209 * sin6_scope_id etc 210 */ 211 if (sin6->sin6_scope_id != 0 && 212 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 213 zcmn_err(getzoneid(), CE_WARN, 214 "bind with uninitialized sin6_scope_id " 215 "(%d) on socket. Pid = %d\n", 216 (int)sin6->sin6_scope_id, 217 (int)curproc->p_pid); 218 } 219 if (sin6->__sin6_src_id != 0) { 220 zcmn_err(getzoneid(), CE_WARN, 221 "bind with uninitialized __sin6_src_id " 222 "(%d) on socket. Pid = %d\n", 223 (int)sin6->__sin6_src_id, 224 (int)curproc->p_pid); 225 } 226 #endif /* DEBUG */ 227 228 break; 229 } 230 default: 231 /* Just pass the request to the protocol */ 232 goto dobind; 233 } 234 235 /* 236 * First we check if either NCA or KSSL has been enabled for 237 * the requested address, and if so, we fall back to TPI. 238 * If neither of those two services are enabled, then we just 239 * pass the request to the protocol. 240 * 241 * Note that KSSL can only be enabled on a socket if NCA is NOT 242 * enabled for that socket, hence the else-statement below. 243 */ 244 if (nl7c_enabled && ((so->so_family == AF_INET || 245 so->so_family == AF_INET6) && 246 nl7c_lookup_addr(name, namelen) != NULL)) { 247 /* 248 * NL7C is not supported in non-global zones, 249 * we enforce this restriction here. 250 */ 251 if (so->so_zoneid == GLOBAL_ZONEID) { 252 /* NCA should be used, so fall back to TPI */ 253 error = so_tpi_fallback(so, cr); 254 SO_UNBLOCK_FALLBACK(so); 255 if (error) 256 return (error); 257 else 258 return (SOP_BIND(so, name, namelen, flags, cr)); 259 } 260 } 261 262 dobind: 263 if (so->so_filter_active == 0 || 264 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 265 error = (*so->so_downcalls->sd_bind) 266 (so->so_proto_handle, name, namelen, cr); 267 } 268 done: 269 SO_UNBLOCK_FALLBACK(so); 270 271 return (error); 272 } 273 274 int 275 so_listen(struct sonode *so, int backlog, struct cred *cr) 276 { 277 int error = 0; 278 279 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 280 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 281 282 if ((so)->so_filter_active == 0 || 283 (error = sof_filter_listen(so, &backlog, cr)) < 0) 284 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 285 backlog, cr); 286 287 SO_UNBLOCK_FALLBACK(so); 288 289 return (error); 290 } 291 292 293 int 294 so_connect(struct sonode *so, struct sockaddr *name, 295 socklen_t namelen, int fflag, int flags, struct cred *cr) 296 { 297 int error = 0; 298 sock_connid_t id; 299 300 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 301 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 302 303 /* 304 * If there is a pending error, return error 305 * This can happen if a non blocking operation caused an error. 306 */ 307 308 if (so->so_error != 0) { 309 mutex_enter(&so->so_lock); 310 error = sogeterr(so, B_TRUE); 311 mutex_exit(&so->so_lock); 312 if (error != 0) 313 goto done; 314 } 315 316 if (so->so_filter_active == 0 || 317 (error = sof_filter_connect(so, (struct sockaddr *)name, 318 &namelen, cr)) < 0) { 319 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 320 name, namelen, &id, cr); 321 322 if (error == EINPROGRESS) 323 error = so_wait_connected(so, 324 fflag & (FNONBLOCK|FNDELAY), id); 325 } 326 done: 327 SO_UNBLOCK_FALLBACK(so); 328 return (error); 329 } 330 331 /*ARGSUSED*/ 332 int 333 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 334 { 335 int error = 0; 336 struct sonode *nso; 337 338 *nsop = NULL; 339 340 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 341 if ((so->so_state & SS_ACCEPTCONN) == 0) { 342 SO_UNBLOCK_FALLBACK(so); 343 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 344 EOPNOTSUPP : EINVAL); 345 } 346 347 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 348 &nso)) == 0) { 349 ASSERT(nso != NULL); 350 351 /* finish the accept */ 352 if ((so->so_filter_active > 0 && 353 (error = sof_filter_accept(nso, cr)) > 0) || 354 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 355 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 356 (void) socket_close(nso, 0, cr); 357 socket_destroy(nso); 358 } else { 359 *nsop = nso; 360 } 361 } 362 363 SO_UNBLOCK_FALLBACK(so); 364 return (error); 365 } 366 367 int 368 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 369 struct cred *cr) 370 { 371 int error, flags; 372 boolean_t dontblock; 373 ssize_t orig_resid; 374 mblk_t *mp; 375 376 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 377 378 flags = msg->msg_flags; 379 error = 0; 380 dontblock = (flags & MSG_DONTWAIT) || 381 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 382 383 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 384 /* 385 * Old way of passing fd's is not supported 386 */ 387 SO_UNBLOCK_FALLBACK(so); 388 return (EOPNOTSUPP); 389 } 390 391 if ((so->so_mode & SM_ATOMIC) && 392 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 393 so->so_proto_props.sopp_maxpsz != -1) { 394 SO_UNBLOCK_FALLBACK(so); 395 return (EMSGSIZE); 396 } 397 398 /* 399 * For atomic sends we will only do one iteration. 400 */ 401 do { 402 if (so->so_state & SS_CANTSENDMORE) { 403 error = EPIPE; 404 break; 405 } 406 407 if (so->so_error != 0) { 408 mutex_enter(&so->so_lock); 409 error = sogeterr(so, B_TRUE); 410 mutex_exit(&so->so_lock); 411 if (error != 0) 412 break; 413 } 414 415 /* 416 * Send down OOB messages even if the send path is being 417 * flow controlled (assuming the protocol supports OOB data). 418 */ 419 if (flags & MSG_OOB) { 420 if ((so->so_mode & SM_EXDATA) == 0) { 421 error = EOPNOTSUPP; 422 break; 423 } 424 } else if (SO_SND_FLOWCTRLD(so)) { 425 /* 426 * Need to wait until the protocol is ready to receive 427 * more data for transmission. 428 */ 429 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 430 break; 431 } 432 433 /* 434 * Time to send data to the protocol. We either copy the 435 * data into mblks or pass the uio directly to the protocol. 436 * We decide what to do based on the available down calls. 437 */ 438 if (so->so_downcalls->sd_send_uio != NULL) { 439 error = (*so->so_downcalls->sd_send_uio) 440 (so->so_proto_handle, uiop, msg, cr); 441 if (error != 0) 442 break; 443 } else { 444 /* save the resid in case of failure */ 445 orig_resid = uiop->uio_resid; 446 447 if ((mp = socopyinuio(uiop, 448 so->so_proto_props.sopp_maxpsz, 449 so->so_proto_props.sopp_wroff, 450 so->so_proto_props.sopp_maxblk, 451 so->so_proto_props.sopp_tail, &error)) == NULL) { 452 break; 453 } 454 ASSERT(uiop->uio_resid >= 0); 455 456 if (so->so_filter_active > 0 && 457 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 458 &error)) == NULL)) { 459 if (error != 0) 460 break; 461 continue; 462 } 463 error = (*so->so_downcalls->sd_send) 464 (so->so_proto_handle, mp, msg, cr); 465 if (error != 0) { 466 /* 467 * The send failed. We do not have to free the 468 * mblks, because that is the protocol's 469 * responsibility. However, uio_resid must 470 * remain accurate, so adjust that here. 471 */ 472 uiop->uio_resid = orig_resid; 473 break; 474 } 475 } 476 } while (uiop->uio_resid > 0); 477 478 SO_UNBLOCK_FALLBACK(so); 479 480 return (error); 481 } 482 483 int 484 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 485 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 486 boolean_t fil_inject) 487 { 488 int error; 489 boolean_t dontblock; 490 size_t size; 491 mblk_t *mp = *mpp; 492 493 if (so->so_downcalls->sd_send == NULL) 494 return (EOPNOTSUPP); 495 496 error = 0; 497 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 498 (fflag & (FNONBLOCK|FNDELAY)); 499 size = msgdsize(mp); 500 501 if ((so->so_mode & SM_ATOMIC) && 502 size > so->so_proto_props.sopp_maxpsz && 503 so->so_proto_props.sopp_maxpsz != -1) { 504 SO_UNBLOCK_FALLBACK(so); 505 return (EMSGSIZE); 506 } 507 508 while (mp != NULL) { 509 mblk_t *nmp, *last_mblk; 510 size_t mlen; 511 512 if (so->so_state & SS_CANTSENDMORE) { 513 error = EPIPE; 514 break; 515 } 516 if (so->so_error != 0) { 517 mutex_enter(&so->so_lock); 518 error = sogeterr(so, B_TRUE); 519 mutex_exit(&so->so_lock); 520 if (error != 0) 521 break; 522 } 523 /* Socket filters are not flow controlled */ 524 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 525 /* 526 * Need to wait until the protocol is ready to receive 527 * more data for transmission. 528 */ 529 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 530 break; 531 } 532 533 /* 534 * We only allow so_maxpsz of data to be sent down to 535 * the protocol at time. 536 */ 537 mlen = MBLKL(mp); 538 nmp = mp->b_cont; 539 last_mblk = mp; 540 while (nmp != NULL) { 541 mlen += MBLKL(nmp); 542 if (mlen > so->so_proto_props.sopp_maxpsz) { 543 last_mblk->b_cont = NULL; 544 break; 545 } 546 last_mblk = nmp; 547 nmp = nmp->b_cont; 548 } 549 550 if (so->so_filter_active > 0 && 551 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 552 cr, &error)) == NULL) { 553 *mpp = mp = nmp; 554 if (error != 0) 555 break; 556 continue; 557 } 558 error = (*so->so_downcalls->sd_send) 559 (so->so_proto_handle, mp, msg, cr); 560 if (error != 0) { 561 /* 562 * The send failed. The protocol will free the mblks 563 * that were sent down. Let the caller deal with the 564 * rest. 565 */ 566 *mpp = nmp; 567 break; 568 } 569 570 *mpp = mp = nmp; 571 } 572 /* Let the filter know whether the protocol is flow controlled */ 573 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 574 error = ENOSPC; 575 576 return (error); 577 } 578 579 #pragma inline(so_sendmblk_impl) 580 581 int 582 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 583 struct cred *cr, mblk_t **mpp) 584 { 585 int error; 586 587 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 588 589 if ((so->so_mode & SM_SENDFILESUPP) == 0) { 590 SO_UNBLOCK_FALLBACK(so); 591 return (EOPNOTSUPP); 592 } 593 594 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 595 B_FALSE); 596 597 SO_UNBLOCK_FALLBACK(so); 598 599 return (error); 600 } 601 602 int 603 so_shutdown(struct sonode *so, int how, struct cred *cr) 604 { 605 int error; 606 607 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 608 609 /* 610 * SunOS 4.X has no check for datagram sockets. 611 * 5.X checks that it is connected (ENOTCONN) 612 * X/Open requires that we check the connected state. 613 */ 614 if (!(so->so_state & SS_ISCONNECTED)) { 615 if (!xnet_skip_checks) { 616 error = ENOTCONN; 617 if (xnet_check_print) { 618 printf("sockfs: X/Open shutdown check " 619 "caused ENOTCONN\n"); 620 } 621 } 622 goto done; 623 } 624 625 if (so->so_filter_active == 0 || 626 (error = sof_filter_shutdown(so, &how, cr)) < 0) 627 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 628 how, cr)); 629 630 /* 631 * Protocol agreed to shutdown. We need to flush the 632 * receive buffer if the receive side is being shutdown. 633 */ 634 if (error == 0 && how != SHUT_WR) { 635 mutex_enter(&so->so_lock); 636 /* wait for active reader to finish */ 637 (void) so_lock_read(so, 0); 638 639 so_rcv_flush(so); 640 641 so_unlock_read(so); 642 mutex_exit(&so->so_lock); 643 } 644 645 done: 646 SO_UNBLOCK_FALLBACK(so); 647 return (error); 648 } 649 650 int 651 so_getsockname(struct sonode *so, struct sockaddr *addr, 652 socklen_t *addrlen, struct cred *cr) 653 { 654 int error; 655 656 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 657 658 if (so->so_filter_active == 0 || 659 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 660 error = (*so->so_downcalls->sd_getsockname) 661 (so->so_proto_handle, addr, addrlen, cr); 662 663 SO_UNBLOCK_FALLBACK(so); 664 return (error); 665 } 666 667 int 668 so_getpeername(struct sonode *so, struct sockaddr *addr, 669 socklen_t *addrlen, boolean_t accept, struct cred *cr) 670 { 671 int error; 672 673 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 674 675 if (accept) { 676 error = (*so->so_downcalls->sd_getpeername) 677 (so->so_proto_handle, addr, addrlen, cr); 678 } else if (!(so->so_state & SS_ISCONNECTED)) { 679 error = ENOTCONN; 680 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 681 /* Added this check for X/Open */ 682 error = EINVAL; 683 if (xnet_check_print) { 684 printf("sockfs: X/Open getpeername check => EINVAL\n"); 685 } 686 } else if (so->so_filter_active == 0 || 687 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 688 error = (*so->so_downcalls->sd_getpeername) 689 (so->so_proto_handle, addr, addrlen, cr); 690 } 691 692 SO_UNBLOCK_FALLBACK(so); 693 return (error); 694 } 695 696 int 697 so_getsockopt(struct sonode *so, int level, int option_name, 698 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 699 { 700 int error = 0; 701 702 if (level == SOL_FILTER) 703 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 704 705 SO_BLOCK_FALLBACK(so, 706 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 707 708 if ((so->so_filter_active == 0 || 709 (error = sof_filter_getsockopt(so, level, option_name, optval, 710 optlenp, cr)) < 0) && 711 (error = socket_getopt_common(so, level, option_name, optval, 712 optlenp, flags)) < 0) { 713 error = (*so->so_downcalls->sd_getsockopt) 714 (so->so_proto_handle, level, option_name, optval, optlenp, 715 cr); 716 if (error == ENOPROTOOPT) { 717 if (level == SOL_SOCKET) { 718 /* 719 * If a protocol does not support a particular 720 * socket option, set can fail (not allowed) 721 * but get can not fail. This is the previous 722 * sockfs bahvior. 723 */ 724 switch (option_name) { 725 case SO_LINGER: 726 if (*optlenp < (t_uscalar_t) 727 sizeof (struct linger)) { 728 error = EINVAL; 729 break; 730 } 731 error = 0; 732 bzero(optval, sizeof (struct linger)); 733 *optlenp = sizeof (struct linger); 734 break; 735 case SO_RCVTIMEO: 736 case SO_SNDTIMEO: 737 if (*optlenp < (t_uscalar_t) 738 sizeof (struct timeval)) { 739 error = EINVAL; 740 break; 741 } 742 error = 0; 743 bzero(optval, sizeof (struct timeval)); 744 *optlenp = sizeof (struct timeval); 745 break; 746 case SO_SND_BUFINFO: 747 if (*optlenp < (t_uscalar_t) 748 sizeof (struct so_snd_bufinfo)) { 749 error = EINVAL; 750 break; 751 } 752 error = 0; 753 bzero(optval, 754 sizeof (struct so_snd_bufinfo)); 755 *optlenp = 756 sizeof (struct so_snd_bufinfo); 757 break; 758 case SO_DEBUG: 759 case SO_REUSEADDR: 760 case SO_KEEPALIVE: 761 case SO_DONTROUTE: 762 case SO_BROADCAST: 763 case SO_USELOOPBACK: 764 case SO_OOBINLINE: 765 case SO_DGRAM_ERRIND: 766 case SO_SNDBUF: 767 case SO_RCVBUF: 768 error = 0; 769 *((int32_t *)optval) = 0; 770 *optlenp = sizeof (int32_t); 771 break; 772 default: 773 break; 774 } 775 } 776 } 777 } 778 779 SO_UNBLOCK_FALLBACK(so); 780 return (error); 781 } 782 783 int 784 so_setsockopt(struct sonode *so, int level, int option_name, 785 const void *optval, socklen_t optlen, struct cred *cr) 786 { 787 int error = 0; 788 struct timeval tl; 789 const void *opt = optval; 790 791 if (level == SOL_FILTER) 792 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 793 794 SO_BLOCK_FALLBACK(so, 795 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 796 797 /* X/Open requires this check */ 798 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 799 SO_UNBLOCK_FALLBACK(so); 800 if (xnet_check_print) 801 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 802 return (EINVAL); 803 } 804 805 if (so->so_filter_active > 0 && 806 (error = sof_filter_setsockopt(so, level, option_name, 807 (void *)optval, &optlen, cr)) >= 0) 808 goto done; 809 810 if (level == SOL_SOCKET) { 811 switch (option_name) { 812 case SO_RCVTIMEO: 813 case SO_SNDTIMEO: { 814 /* 815 * We pass down these two options to protocol in order 816 * to support some third part protocols which need to 817 * know them. For those protocols which don't care 818 * these two options, simply return 0. 819 */ 820 clock_t t_usec; 821 822 if (get_udatamodel() == DATAMODEL_NONE || 823 get_udatamodel() == DATAMODEL_NATIVE) { 824 if (optlen != sizeof (struct timeval)) { 825 error = EINVAL; 826 goto done; 827 } 828 bcopy((struct timeval *)optval, &tl, 829 sizeof (struct timeval)); 830 } else { 831 if (optlen != sizeof (struct timeval32)) { 832 error = EINVAL; 833 goto done; 834 } 835 TIMEVAL32_TO_TIMEVAL(&tl, 836 (struct timeval32 *)optval); 837 } 838 opt = &tl; 839 optlen = sizeof (tl); 840 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 841 mutex_enter(&so->so_lock); 842 if (option_name == SO_RCVTIMEO) 843 so->so_rcvtimeo = drv_usectohz(t_usec); 844 else 845 so->so_sndtimeo = drv_usectohz(t_usec); 846 mutex_exit(&so->so_lock); 847 break; 848 } 849 case SO_RCVBUF: 850 /* 851 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 852 * sockfs since the transport might adjust the value 853 * and not return exactly what was set by the 854 * application. 855 */ 856 so->so_xpg_rcvbuf = *(int32_t *)optval; 857 break; 858 } 859 } 860 error = (*so->so_downcalls->sd_setsockopt) 861 (so->so_proto_handle, level, option_name, opt, optlen, cr); 862 done: 863 SO_UNBLOCK_FALLBACK(so); 864 return (error); 865 } 866 867 int 868 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 869 struct cred *cr, int32_t *rvalp) 870 { 871 int error = 0; 872 873 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 874 875 /* 876 * If there is a pending error, return error 877 * This can happen if a non blocking operation caused an error. 878 */ 879 if (so->so_error != 0) { 880 mutex_enter(&so->so_lock); 881 error = sogeterr(so, B_TRUE); 882 mutex_exit(&so->so_lock); 883 if (error != 0) 884 goto done; 885 } 886 887 /* 888 * calling strioc can result in the socket falling back to TPI, 889 * if that is supported. 890 */ 891 if ((so->so_filter_active == 0 || 892 (error = sof_filter_ioctl(so, cmd, arg, mode, 893 rvalp, cr)) < 0) && 894 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 895 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 896 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 897 cmd, arg, mode, rvalp, cr); 898 } 899 900 done: 901 SO_UNBLOCK_FALLBACK(so); 902 903 return (error); 904 } 905 906 int 907 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 908 struct pollhead **phpp) 909 { 910 int state = so->so_state, mask; 911 *reventsp = 0; 912 913 /* 914 * In sockets the errors are represented as input/output events 915 */ 916 if (so->so_error != 0 && 917 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 918 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 919 return (0); 920 } 921 922 /* 923 * If the socket is in a state where it can send data 924 * turn on POLLWRBAND and POLLOUT events. 925 */ 926 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 927 /* 928 * out of band data is allowed even if the connection 929 * is flow controlled 930 */ 931 *reventsp |= POLLWRBAND & events; 932 if (!SO_SND_FLOWCTRLD(so)) { 933 /* 934 * As long as there is buffer to send data 935 * turn on POLLOUT events 936 */ 937 *reventsp |= POLLOUT & events; 938 } 939 } 940 941 /* 942 * Turn on POLLIN whenever there is data on the receive queue, 943 * or the socket is in a state where no more data will be received. 944 * Also, if the socket is accepting connections, flip the bit if 945 * there is something on the queue. 946 * 947 * We do an initial check for events without holding locks. However, 948 * if there are no event available, then we redo the check for POLLIN 949 * events under the lock. 950 */ 951 952 /* Pending connections */ 953 if (!list_is_empty(&so->so_acceptq_list)) 954 *reventsp |= (POLLIN|POLLRDNORM) & events; 955 956 /* 957 * If we're looking for POLLRDHUP, indicate it if we have sent the 958 * last rx signal for the socket. 959 */ 960 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG)) 961 *reventsp |= POLLRDHUP; 962 963 /* Data */ 964 /* so_downcalls is null for sctp */ 965 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 966 *reventsp |= (*so->so_downcalls->sd_poll) 967 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 968 CRED()) & events; 969 ASSERT((*reventsp & ~events) == 0); 970 /* do not recheck events */ 971 events &= ~SO_PROTO_POLLEV; 972 } else { 973 if (SO_HAVE_DATA(so)) 974 *reventsp |= (POLLIN|POLLRDNORM) & events; 975 976 /* Urgent data */ 977 if ((state & SS_OOBPEND) != 0) { 978 *reventsp |= (POLLRDBAND | POLLPRI) & events; 979 } 980 981 /* 982 * If the socket has become disconnected, we set POLLHUP. 983 * Note that if we are in this state, we will have set POLLIN 984 * (SO_HAVE_DATA() is true on a disconnected socket), but not 985 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with 986 * the semantics of POLLHUP, which is defined to be mutually 987 * exclusive with respect to POLLOUT but not POLLIN. We are 988 * therefore setting POLLHUP primarily for the benefit of 989 * those not polling on POLLIN, as they have no other way of 990 * knowing that the socket has been disconnected. 991 */ 992 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG; 993 994 if ((state & (mask | SS_ISCONNECTED)) == mask) 995 *reventsp |= POLLHUP; 996 } 997 998 if ((!*reventsp && !anyyet) || (events & POLLET)) { 999 /* Check for read events again, but this time under lock */ 1000 if (events & (POLLIN|POLLRDNORM)) { 1001 mutex_enter(&so->so_lock); 1002 if (SO_HAVE_DATA(so) || 1003 !list_is_empty(&so->so_acceptq_list)) { 1004 if (events & POLLET) { 1005 so->so_pollev |= SO_POLLEV_IN; 1006 *phpp = &so->so_poll_list; 1007 } 1008 1009 mutex_exit(&so->so_lock); 1010 *reventsp |= (POLLIN|POLLRDNORM) & events; 1011 1012 return (0); 1013 } else { 1014 so->so_pollev |= SO_POLLEV_IN; 1015 mutex_exit(&so->so_lock); 1016 } 1017 } 1018 *phpp = &so->so_poll_list; 1019 } 1020 return (0); 1021 } 1022 1023 /* 1024 * Generic Upcalls 1025 */ 1026 void 1027 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 1028 cred_t *peer_cred, pid_t peer_cpid) 1029 { 1030 struct sonode *so = (struct sonode *)sock_handle; 1031 1032 mutex_enter(&so->so_lock); 1033 ASSERT(so->so_proto_handle != NULL); 1034 1035 if (peer_cred != NULL) { 1036 if (so->so_peercred != NULL) 1037 crfree(so->so_peercred); 1038 crhold(peer_cred); 1039 so->so_peercred = peer_cred; 1040 so->so_cpid = peer_cpid; 1041 } 1042 1043 so->so_proto_connid = id; 1044 soisconnected(so); 1045 /* 1046 * Wake ones who're waiting for conn to become established. 1047 */ 1048 so_notify_connected(so); 1049 } 1050 1051 int 1052 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1053 { 1054 struct sonode *so = (struct sonode *)sock_handle; 1055 boolean_t connect_failed; 1056 1057 mutex_enter(&so->so_lock); 1058 1059 /* 1060 * If we aren't currently connected, then this isn't a disconnect but 1061 * rather a failure to connect. 1062 */ 1063 connect_failed = !(so->so_state & SS_ISCONNECTED); 1064 1065 so->so_proto_connid = id; 1066 soisdisconnected(so, error); 1067 so_notify_disconnected(so, connect_failed, error); 1068 1069 return (0); 1070 } 1071 1072 void 1073 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1074 uintptr_t arg) 1075 { 1076 struct sonode *so = (struct sonode *)sock_handle; 1077 1078 switch (action) { 1079 case SOCK_OPCTL_SHUT_SEND: 1080 mutex_enter(&so->so_lock); 1081 socantsendmore(so); 1082 so_notify_disconnecting(so); 1083 break; 1084 case SOCK_OPCTL_SHUT_RECV: { 1085 mutex_enter(&so->so_lock); 1086 socantrcvmore(so); 1087 so_notify_eof(so); 1088 break; 1089 } 1090 case SOCK_OPCTL_ENAB_ACCEPT: 1091 mutex_enter(&so->so_lock); 1092 so->so_state |= SS_ACCEPTCONN; 1093 so->so_backlog = (unsigned int)arg; 1094 /* 1095 * The protocol can stop generating newconn upcalls when 1096 * the backlog is full, so to make sure the listener does 1097 * not end up with a queue full of deferred connections 1098 * we reduce the backlog by one. Thus the listener will 1099 * start closing deferred connections before the backlog 1100 * is full. 1101 */ 1102 if (so->so_filter_active > 0) 1103 so->so_backlog = MAX(1, so->so_backlog - 1); 1104 mutex_exit(&so->so_lock); 1105 break; 1106 default: 1107 ASSERT(0); 1108 break; 1109 } 1110 } 1111 1112 void 1113 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1114 { 1115 struct sonode *so = (struct sonode *)sock_handle; 1116 1117 if (qfull) { 1118 so_snd_qfull(so); 1119 } else { 1120 so_snd_qnotfull(so); 1121 mutex_enter(&so->so_lock); 1122 /* so_notify_writable drops so_lock */ 1123 so_notify_writable(so); 1124 } 1125 } 1126 1127 sock_upper_handle_t 1128 so_newconn(sock_upper_handle_t parenthandle, 1129 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1130 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1131 { 1132 struct sonode *so = (struct sonode *)parenthandle; 1133 struct sonode *nso; 1134 int error; 1135 1136 ASSERT(proto_handle != NULL); 1137 1138 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1139 (so->so_acceptq_len >= so->so_backlog && 1140 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1141 return (NULL); 1142 } 1143 1144 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1145 &error); 1146 if (nso == NULL) 1147 return (NULL); 1148 1149 if (peer_cred != NULL) { 1150 crhold(peer_cred); 1151 nso->so_peercred = peer_cred; 1152 nso->so_cpid = peer_cpid; 1153 } 1154 nso->so_listener = so; 1155 1156 /* 1157 * The new socket (nso), proto_handle and sock_upcallsp are all 1158 * valid at this point. But as soon as nso is placed in the accept 1159 * queue that can no longer be assumed (since an accept() thread may 1160 * pull it off the queue and close the socket). 1161 */ 1162 *sock_upcallsp = &so_upcalls; 1163 1164 mutex_enter(&so->so_acceptq_lock); 1165 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1166 mutex_exit(&so->so_acceptq_lock); 1167 ASSERT(nso->so_count == 1); 1168 nso->so_count--; 1169 nso->so_listener = NULL; 1170 /* drop proto ref */ 1171 VN_RELE(SOTOV(nso)); 1172 socket_destroy(nso); 1173 return (NULL); 1174 } else { 1175 so->so_acceptq_len++; 1176 if (nso->so_state & SS_FIL_DEFER) { 1177 list_insert_tail(&so->so_acceptq_defer, nso); 1178 mutex_exit(&so->so_acceptq_lock); 1179 } else { 1180 list_insert_tail(&so->so_acceptq_list, nso); 1181 cv_signal(&so->so_acceptq_cv); 1182 mutex_exit(&so->so_acceptq_lock); 1183 mutex_enter(&so->so_lock); 1184 so_notify_newconn(so); 1185 } 1186 1187 return ((sock_upper_handle_t)nso); 1188 } 1189 } 1190 1191 void 1192 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1193 { 1194 struct sonode *so; 1195 1196 so = (struct sonode *)sock_handle; 1197 1198 mutex_enter(&so->so_lock); 1199 1200 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1201 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1202 if (soppp->sopp_flags & SOCKOPT_WROFF) 1203 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1204 if (soppp->sopp_flags & SOCKOPT_TAIL) 1205 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1206 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1207 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1208 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1209 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1210 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1211 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1212 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1213 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1214 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1215 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1216 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1217 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1218 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1219 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1220 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1221 } 1222 1223 if (soppp->sopp_zcopyflag & COPYCACHED) { 1224 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1225 } 1226 } 1227 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1228 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1229 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1230 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1231 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1232 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1233 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1234 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1235 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1236 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1237 1238 mutex_exit(&so->so_lock); 1239 1240 if (so->so_filter_active > 0) { 1241 sof_instance_t *inst; 1242 ssize_t maxblk; 1243 ushort_t wroff, tail; 1244 maxblk = so->so_proto_props.sopp_maxblk; 1245 wroff = so->so_proto_props.sopp_wroff; 1246 tail = so->so_proto_props.sopp_tail; 1247 for (inst = so->so_filter_bottom; inst != NULL; 1248 inst = inst->sofi_prev) { 1249 if (SOF_INTERESTED(inst, mblk_prop)) { 1250 (*inst->sofi_ops->sofop_mblk_prop)( 1251 (sof_handle_t)inst, inst->sofi_cookie, 1252 &maxblk, &wroff, &tail); 1253 } 1254 } 1255 mutex_enter(&so->so_lock); 1256 so->so_proto_props.sopp_maxblk = maxblk; 1257 so->so_proto_props.sopp_wroff = wroff; 1258 so->so_proto_props.sopp_tail = tail; 1259 mutex_exit(&so->so_lock); 1260 } 1261 #ifdef DEBUG 1262 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1263 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1264 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1265 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1266 SOCKOPT_LOOPBACK); 1267 ASSERT(soppp->sopp_flags == 0); 1268 #endif 1269 } 1270 1271 /* ARGSUSED */ 1272 ssize_t 1273 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1274 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1275 sof_instance_t *filter) 1276 { 1277 boolean_t force_push = B_TRUE; 1278 int space_left; 1279 sodirect_t *sodp = so->so_direct; 1280 1281 ASSERT(errorp != NULL); 1282 *errorp = 0; 1283 if (mp == NULL) { 1284 if (so->so_downcalls->sd_recv_uio != NULL) { 1285 mutex_enter(&so->so_lock); 1286 /* the notify functions will drop the lock */ 1287 if (flags & MSG_OOB) 1288 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1289 else 1290 so_notify_data(so, msg_size); 1291 return (0); 1292 } 1293 ASSERT(msg_size == 0); 1294 mutex_enter(&so->so_lock); 1295 goto space_check; 1296 } 1297 1298 ASSERT(mp->b_next == NULL); 1299 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1300 ASSERT(msg_size == msgdsize(mp)); 1301 1302 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1303 /* The read pointer is not aligned correctly for TPI */ 1304 zcmn_err(getzoneid(), CE_WARN, 1305 "sockfs: Unaligned TPI message received. rptr = %p\n", 1306 (void *)mp->b_rptr); 1307 freemsg(mp); 1308 mutex_enter(&so->so_lock); 1309 if (sodp != NULL) 1310 SOD_UIOAFINI(sodp); 1311 goto space_check; 1312 } 1313 1314 if (so->so_filter_active > 0) { 1315 for (; filter != NULL; filter = filter->sofi_prev) { 1316 if (!SOF_INTERESTED(filter, data_in)) 1317 continue; 1318 mp = (*filter->sofi_ops->sofop_data_in)( 1319 (sof_handle_t)filter, filter->sofi_cookie, mp, 1320 flags, &msg_size); 1321 ASSERT(msgdsize(mp) == msg_size); 1322 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1323 (mblk_t *), mp); 1324 /* Data was consumed/dropped, just do space check */ 1325 if (msg_size == 0) { 1326 mutex_enter(&so->so_lock); 1327 goto space_check; 1328 } 1329 } 1330 } 1331 1332 if (flags & MSG_OOB) { 1333 so_queue_oob(so, mp, msg_size); 1334 mutex_enter(&so->so_lock); 1335 goto space_check; 1336 } 1337 1338 if (force_pushp != NULL) 1339 force_push = *force_pushp; 1340 1341 mutex_enter(&so->so_lock); 1342 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1343 if (sodp != NULL) 1344 SOD_DISABLE(sodp); 1345 mutex_exit(&so->so_lock); 1346 *errorp = EOPNOTSUPP; 1347 return (-1); 1348 } 1349 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1350 freemsg(mp); 1351 if (sodp != NULL) 1352 SOD_DISABLE(sodp); 1353 mutex_exit(&so->so_lock); 1354 return (0); 1355 } 1356 1357 /* process the mblk via I/OAT if capable */ 1358 if (sodp != NULL && sodp->sod_enabled) { 1359 if (DB_TYPE(mp) == M_DATA) { 1360 sod_uioa_mblk_init(sodp, mp, msg_size); 1361 } else { 1362 SOD_UIOAFINI(sodp); 1363 } 1364 } 1365 1366 if (mp->b_next == NULL) { 1367 so_enqueue_msg(so, mp, msg_size); 1368 } else { 1369 do { 1370 mblk_t *nmp; 1371 1372 if ((nmp = mp->b_next) != NULL) { 1373 mp->b_next = NULL; 1374 } 1375 so_enqueue_msg(so, mp, msgdsize(mp)); 1376 mp = nmp; 1377 } while (mp != NULL); 1378 } 1379 1380 space_left = so->so_rcvbuf - so->so_rcv_queued; 1381 if (space_left <= 0) { 1382 so->so_flowctrld = B_TRUE; 1383 *errorp = ENOSPC; 1384 space_left = -1; 1385 } 1386 1387 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1388 so->so_rcv_queued >= so->so_rcv_wanted) { 1389 SOCKET_TIMER_CANCEL(so); 1390 /* 1391 * so_notify_data will release the lock 1392 */ 1393 so_notify_data(so, so->so_rcv_queued); 1394 1395 if (force_pushp != NULL) 1396 *force_pushp = B_TRUE; 1397 goto done; 1398 } else if (so->so_rcv_timer_tid == 0) { 1399 /* Make sure the recv push timer is running */ 1400 SOCKET_TIMER_START(so); 1401 } 1402 1403 done_unlock: 1404 mutex_exit(&so->so_lock); 1405 done: 1406 return (space_left); 1407 1408 space_check: 1409 space_left = so->so_rcvbuf - so->so_rcv_queued; 1410 if (space_left <= 0) { 1411 so->so_flowctrld = B_TRUE; 1412 *errorp = ENOSPC; 1413 space_left = -1; 1414 } 1415 goto done_unlock; 1416 } 1417 1418 #pragma inline(so_queue_msg_impl) 1419 1420 ssize_t 1421 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1422 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1423 { 1424 struct sonode *so = (struct sonode *)sock_handle; 1425 1426 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1427 so->so_filter_bottom)); 1428 } 1429 1430 /* 1431 * Set the offset of where the oob data is relative to the bytes in 1432 * queued. Also generate SIGURG 1433 */ 1434 void 1435 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1436 { 1437 struct sonode *so; 1438 1439 ASSERT(offset >= 0); 1440 so = (struct sonode *)sock_handle; 1441 mutex_enter(&so->so_lock); 1442 if (so->so_direct != NULL) 1443 SOD_UIOAFINI(so->so_direct); 1444 1445 /* 1446 * New urgent data on the way so forget about any old 1447 * urgent data. 1448 */ 1449 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1450 1451 /* 1452 * Record that urgent data is pending. 1453 */ 1454 so->so_state |= SS_OOBPEND; 1455 1456 if (so->so_oobmsg != NULL) { 1457 dprintso(so, 1, ("sock: discarding old oob\n")); 1458 freemsg(so->so_oobmsg); 1459 so->so_oobmsg = NULL; 1460 } 1461 1462 /* 1463 * set the offset where the urgent byte is 1464 */ 1465 so->so_oobmark = so->so_rcv_queued + offset; 1466 if (so->so_oobmark == 0) 1467 so->so_state |= SS_RCVATMARK; 1468 else 1469 so->so_state &= ~SS_RCVATMARK; 1470 1471 so_notify_oobsig(so); 1472 } 1473 1474 /* 1475 * Queue the OOB byte 1476 */ 1477 static void 1478 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1479 { 1480 mutex_enter(&so->so_lock); 1481 if (so->so_direct != NULL) 1482 SOD_UIOAFINI(so->so_direct); 1483 1484 ASSERT(mp != NULL); 1485 if (!IS_SO_OOB_INLINE(so)) { 1486 so->so_oobmsg = mp; 1487 so->so_state |= SS_HAVEOOBDATA; 1488 } else { 1489 so_enqueue_msg(so, mp, len); 1490 } 1491 1492 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1493 } 1494 1495 int 1496 so_close(struct sonode *so, int flag, struct cred *cr) 1497 { 1498 int error; 1499 1500 /* 1501 * No new data will be enqueued once the CLOSING flag is set. 1502 */ 1503 mutex_enter(&so->so_lock); 1504 so->so_state |= SS_CLOSING; 1505 ASSERT(so_verify_oobstate(so)); 1506 so_rcv_flush(so); 1507 mutex_exit(&so->so_lock); 1508 1509 if (so->so_filter_active > 0) 1510 sof_sonode_closing(so); 1511 1512 if (so->so_state & SS_ACCEPTCONN) { 1513 /* 1514 * We grab and release the accept lock to ensure that any 1515 * thread about to insert a socket in so_newconn completes 1516 * before we flush the queue. Any thread calling so_newconn 1517 * after we drop the lock will observe the SS_CLOSING flag, 1518 * which will stop it from inserting the socket in the queue. 1519 */ 1520 mutex_enter(&so->so_acceptq_lock); 1521 mutex_exit(&so->so_acceptq_lock); 1522 1523 so_acceptq_flush(so, B_TRUE); 1524 } 1525 1526 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1527 switch (error) { 1528 default: 1529 /* Protocol made a synchronous close; remove proto ref */ 1530 VN_RELE(SOTOV(so)); 1531 break; 1532 case EINPROGRESS: 1533 /* 1534 * Protocol is in the process of closing, it will make a 1535 * 'closed' upcall to remove the reference. 1536 */ 1537 error = 0; 1538 break; 1539 } 1540 1541 return (error); 1542 } 1543 1544 /* 1545 * Upcall made by the protocol when it's doing an asynchronous close. It 1546 * will drop the protocol's reference on the socket. 1547 */ 1548 void 1549 so_closed(sock_upper_handle_t sock_handle) 1550 { 1551 struct sonode *so = (struct sonode *)sock_handle; 1552 1553 VN_RELE(SOTOV(so)); 1554 } 1555 1556 void 1557 so_zcopy_notify(sock_upper_handle_t sock_handle) 1558 { 1559 struct sonode *so = (struct sonode *)sock_handle; 1560 1561 mutex_enter(&so->so_lock); 1562 so->so_copyflag |= STZCNOTIFY; 1563 cv_broadcast(&so->so_copy_cv); 1564 mutex_exit(&so->so_lock); 1565 } 1566 1567 void 1568 so_set_error(sock_upper_handle_t sock_handle, int error) 1569 { 1570 struct sonode *so = (struct sonode *)sock_handle; 1571 1572 mutex_enter(&so->so_lock); 1573 1574 soseterror(so, error); 1575 1576 so_notify_error(so); 1577 } 1578 1579 /* 1580 * so_recvmsg - read data from the socket 1581 * 1582 * There are two ways of obtaining data; either we ask the protocol to 1583 * copy directly into the supplied buffer, or we copy data from the 1584 * sonode's receive queue. The decision which one to use depends on 1585 * whether the protocol has a sd_recv_uio down call. 1586 */ 1587 int 1588 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1589 struct cred *cr) 1590 { 1591 rval_t rval; 1592 int flags = 0; 1593 t_uscalar_t controllen, namelen; 1594 int error = 0; 1595 int ret; 1596 mblk_t *mctlp = NULL; 1597 union T_primitives *tpr; 1598 void *control; 1599 ssize_t saved_resid; 1600 struct uio *suiop; 1601 1602 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1603 1604 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1605 (so->so_mode & SM_CONNREQUIRED)) { 1606 SO_UNBLOCK_FALLBACK(so); 1607 return (ENOTCONN); 1608 } 1609 1610 if (msg->msg_flags & MSG_PEEK) 1611 msg->msg_flags &= ~MSG_WAITALL; 1612 1613 if (so->so_mode & SM_ATOMIC) 1614 msg->msg_flags |= MSG_TRUNC; 1615 1616 if (msg->msg_flags & MSG_OOB) { 1617 if ((so->so_mode & SM_EXDATA) == 0) { 1618 error = EOPNOTSUPP; 1619 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1620 error = (*so->so_downcalls->sd_recv_uio) 1621 (so->so_proto_handle, uiop, msg, cr); 1622 } else { 1623 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1624 IS_SO_OOB_INLINE(so)); 1625 } 1626 SO_UNBLOCK_FALLBACK(so); 1627 return (error); 1628 } 1629 1630 /* 1631 * If the protocol has the recv down call, then pass the request 1632 * down. 1633 */ 1634 if (so->so_downcalls->sd_recv_uio != NULL) { 1635 error = (*so->so_downcalls->sd_recv_uio) 1636 (so->so_proto_handle, uiop, msg, cr); 1637 SO_UNBLOCK_FALLBACK(so); 1638 return (error); 1639 } 1640 1641 /* 1642 * Reading data from the socket buffer 1643 */ 1644 flags = msg->msg_flags; 1645 msg->msg_flags = 0; 1646 1647 /* 1648 * Set msg_controllen and msg_namelen to zero here to make it 1649 * simpler in the cases that no control or name is returned. 1650 */ 1651 controllen = msg->msg_controllen; 1652 namelen = msg->msg_namelen; 1653 msg->msg_controllen = 0; 1654 msg->msg_namelen = 0; 1655 1656 mutex_enter(&so->so_lock); 1657 /* Set SOREADLOCKED */ 1658 error = so_lock_read_intr(so, 1659 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1660 mutex_exit(&so->so_lock); 1661 if (error) { 1662 SO_UNBLOCK_FALLBACK(so); 1663 return (error); 1664 } 1665 1666 suiop = sod_rcv_init(so, flags, &uiop); 1667 retry: 1668 saved_resid = uiop->uio_resid; 1669 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1670 if (error != 0) { 1671 goto out; 1672 } 1673 /* 1674 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1675 * For non-datagrams MOREDATA is used to set MSG_EOR. 1676 */ 1677 ASSERT(!(rval.r_val1 & MORECTL)); 1678 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1679 msg->msg_flags |= MSG_TRUNC; 1680 if (mctlp == NULL) { 1681 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1682 1683 mutex_enter(&so->so_lock); 1684 /* Set MSG_EOR based on MOREDATA */ 1685 if (!(rval.r_val1 & MOREDATA)) { 1686 if (so->so_state & SS_SAVEDEOR) { 1687 msg->msg_flags |= MSG_EOR; 1688 so->so_state &= ~SS_SAVEDEOR; 1689 } 1690 } 1691 /* 1692 * If some data was received (i.e. not EOF) and the 1693 * read/recv* has not been satisfied wait for some more. 1694 */ 1695 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1696 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1697 mutex_exit(&so->so_lock); 1698 flags |= MSG_NOMARK; 1699 goto retry; 1700 } 1701 1702 goto out_locked; 1703 } 1704 /* so_queue_msg has already verified length and alignment */ 1705 tpr = (union T_primitives *)mctlp->b_rptr; 1706 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1707 switch (tpr->type) { 1708 case T_DATA_IND: { 1709 /* 1710 * Set msg_flags to MSG_EOR based on 1711 * MORE_flag and MOREDATA. 1712 */ 1713 mutex_enter(&so->so_lock); 1714 so->so_state &= ~SS_SAVEDEOR; 1715 if (!(tpr->data_ind.MORE_flag & 1)) { 1716 if (!(rval.r_val1 & MOREDATA)) 1717 msg->msg_flags |= MSG_EOR; 1718 else 1719 so->so_state |= SS_SAVEDEOR; 1720 } 1721 freemsg(mctlp); 1722 /* 1723 * If some data was received (i.e. not EOF) and the 1724 * read/recv* has not been satisfied wait for some more. 1725 */ 1726 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1727 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1728 mutex_exit(&so->so_lock); 1729 flags |= MSG_NOMARK; 1730 goto retry; 1731 } 1732 goto out_locked; 1733 } 1734 case T_UNITDATA_IND: { 1735 void *addr; 1736 t_uscalar_t addrlen; 1737 void *abuf; 1738 t_uscalar_t optlen; 1739 void *opt; 1740 1741 if (namelen != 0) { 1742 /* Caller wants source address */ 1743 addrlen = tpr->unitdata_ind.SRC_length; 1744 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1745 addrlen, 1); 1746 if (addr == NULL) { 1747 freemsg(mctlp); 1748 error = EPROTO; 1749 eprintsoline(so, error); 1750 goto out; 1751 } 1752 ASSERT(so->so_family != AF_UNIX); 1753 } 1754 optlen = tpr->unitdata_ind.OPT_length; 1755 if (optlen != 0) { 1756 t_uscalar_t ncontrollen; 1757 1758 /* 1759 * Extract any source address option. 1760 * Determine how large cmsg buffer is needed. 1761 */ 1762 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1763 optlen, __TPI_ALIGN_SIZE); 1764 1765 if (opt == NULL) { 1766 freemsg(mctlp); 1767 error = EPROTO; 1768 eprintsoline(so, error); 1769 goto out; 1770 } 1771 if (so->so_family == AF_UNIX) 1772 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1773 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1774 !(flags & MSG_XPG4_2)); 1775 if (controllen != 0) 1776 controllen = ncontrollen; 1777 else if (ncontrollen != 0) 1778 msg->msg_flags |= MSG_CTRUNC; 1779 } else { 1780 controllen = 0; 1781 } 1782 1783 if (namelen != 0) { 1784 /* 1785 * Return address to caller. 1786 * Caller handles truncation if length 1787 * exceeds msg_namelen. 1788 * NOTE: AF_UNIX NUL termination is ensured by 1789 * the sender's copyin_name(). 1790 */ 1791 abuf = kmem_alloc(addrlen, KM_SLEEP); 1792 1793 bcopy(addr, abuf, addrlen); 1794 msg->msg_name = abuf; 1795 msg->msg_namelen = addrlen; 1796 } 1797 1798 if (controllen != 0) { 1799 /* 1800 * Return control msg to caller. 1801 * Caller handles truncation if length 1802 * exceeds msg_controllen. 1803 */ 1804 control = kmem_zalloc(controllen, KM_SLEEP); 1805 1806 error = so_opt2cmsg(mctlp, opt, optlen, 1807 !(flags & MSG_XPG4_2), control, controllen); 1808 if (error) { 1809 freemsg(mctlp); 1810 if (msg->msg_namelen != 0) 1811 kmem_free(msg->msg_name, 1812 msg->msg_namelen); 1813 kmem_free(control, controllen); 1814 eprintsoline(so, error); 1815 goto out; 1816 } 1817 msg->msg_control = control; 1818 msg->msg_controllen = controllen; 1819 } 1820 1821 freemsg(mctlp); 1822 goto out; 1823 } 1824 case T_OPTDATA_IND: { 1825 struct T_optdata_req *tdr; 1826 void *opt; 1827 t_uscalar_t optlen; 1828 1829 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1830 optlen = tdr->OPT_length; 1831 if (optlen != 0) { 1832 t_uscalar_t ncontrollen; 1833 /* 1834 * Determine how large cmsg buffer is needed. 1835 */ 1836 opt = sogetoff(mctlp, 1837 tpr->optdata_ind.OPT_offset, optlen, 1838 __TPI_ALIGN_SIZE); 1839 1840 if (opt == NULL) { 1841 freemsg(mctlp); 1842 error = EPROTO; 1843 eprintsoline(so, error); 1844 goto out; 1845 } 1846 1847 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1848 !(flags & MSG_XPG4_2)); 1849 if (controllen != 0) 1850 controllen = ncontrollen; 1851 else if (ncontrollen != 0) 1852 msg->msg_flags |= MSG_CTRUNC; 1853 } else { 1854 controllen = 0; 1855 } 1856 1857 if (controllen != 0) { 1858 /* 1859 * Return control msg to caller. 1860 * Caller handles truncation if length 1861 * exceeds msg_controllen. 1862 */ 1863 control = kmem_zalloc(controllen, KM_SLEEP); 1864 1865 error = so_opt2cmsg(mctlp, opt, optlen, 1866 !(flags & MSG_XPG4_2), control, controllen); 1867 if (error) { 1868 freemsg(mctlp); 1869 kmem_free(control, controllen); 1870 eprintsoline(so, error); 1871 goto out; 1872 } 1873 msg->msg_control = control; 1874 msg->msg_controllen = controllen; 1875 } 1876 1877 /* 1878 * Set msg_flags to MSG_EOR based on 1879 * DATA_flag and MOREDATA. 1880 */ 1881 mutex_enter(&so->so_lock); 1882 so->so_state &= ~SS_SAVEDEOR; 1883 if (!(tpr->data_ind.MORE_flag & 1)) { 1884 if (!(rval.r_val1 & MOREDATA)) 1885 msg->msg_flags |= MSG_EOR; 1886 else 1887 so->so_state |= SS_SAVEDEOR; 1888 } 1889 freemsg(mctlp); 1890 /* 1891 * If some data was received (i.e. not EOF) and the 1892 * read/recv* has not been satisfied wait for some more. 1893 * Not possible to wait if control info was received. 1894 */ 1895 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1896 controllen == 0 && 1897 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1898 mutex_exit(&so->so_lock); 1899 flags |= MSG_NOMARK; 1900 goto retry; 1901 } 1902 goto out_locked; 1903 } 1904 default: 1905 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1906 tpr->type); 1907 freemsg(mctlp); 1908 error = EPROTO; 1909 ASSERT(0); 1910 } 1911 out: 1912 mutex_enter(&so->so_lock); 1913 out_locked: 1914 ret = sod_rcv_done(so, suiop, uiop); 1915 if (ret != 0 && error == 0) 1916 error = ret; 1917 1918 so_unlock_read(so); /* Clear SOREADLOCKED */ 1919 mutex_exit(&so->so_lock); 1920 1921 SO_UNBLOCK_FALLBACK(so); 1922 1923 return (error); 1924 } 1925 1926 sonodeops_t so_sonodeops = { 1927 so_init, /* sop_init */ 1928 so_accept, /* sop_accept */ 1929 so_bind, /* sop_bind */ 1930 so_listen, /* sop_listen */ 1931 so_connect, /* sop_connect */ 1932 so_recvmsg, /* sop_recvmsg */ 1933 so_sendmsg, /* sop_sendmsg */ 1934 so_sendmblk, /* sop_sendmblk */ 1935 so_getpeername, /* sop_getpeername */ 1936 so_getsockname, /* sop_getsockname */ 1937 so_shutdown, /* sop_shutdown */ 1938 so_getsockopt, /* sop_getsockopt */ 1939 so_setsockopt, /* sop_setsockopt */ 1940 so_ioctl, /* sop_ioctl */ 1941 so_poll, /* sop_poll */ 1942 so_close, /* sop_close */ 1943 }; 1944 1945 sock_upcalls_t so_upcalls = { 1946 so_newconn, 1947 so_connected, 1948 so_disconnected, 1949 so_opctl, 1950 so_queue_msg, 1951 so_set_prop, 1952 so_txq_full, 1953 so_signal_oob, 1954 so_zcopy_notify, 1955 so_set_error, 1956 so_closed 1957 }; 1958