1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/debug.h> 35 #include <sys/cmn_err.h> 36 37 #include <sys/stropts.h> 38 #include <sys/socket.h> 39 #include <sys/socketvar.h> 40 41 #define _SUN_TPI_VERSION 2 42 #include <sys/tihdr.h> 43 #include <sys/sockio.h> 44 #include <sys/kmem_impl.h> 45 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/ddi.h> 49 #include <netinet/in.h> 50 #include <inet/ip.h> 51 52 #include <fs/sockfs/sockcommon.h> 53 #include <fs/sockfs/sockfilter_impl.h> 54 55 #include <sys/socket_proto.h> 56 57 #include <fs/sockfs/socktpi_impl.h> 58 #include <fs/sockfs/sodirect.h> 59 #include <sys/tihdr.h> 60 #include <fs/sockfs/nl7c.h> 61 62 extern int xnet_skip_checks; 63 extern int xnet_check_print; 64 65 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 66 67 68 /*ARGSUSED*/ 69 int 70 so_accept_notsupp(struct sonode *lso, int fflag, 71 struct cred *cr, struct sonode **nsop) 72 { 73 return (EOPNOTSUPP); 74 } 75 76 /*ARGSUSED*/ 77 int 78 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 79 { 80 return (EOPNOTSUPP); 81 } 82 83 /*ARGSUSED*/ 84 int 85 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 86 socklen_t *len, struct cred *cr) 87 { 88 return (EOPNOTSUPP); 89 } 90 91 /*ARGSUSED*/ 92 int 93 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 94 socklen_t *addrlen, boolean_t accept, struct cred *cr) 95 { 96 return (EOPNOTSUPP); 97 } 98 99 /*ARGSUSED*/ 100 int 101 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 102 { 103 return (EOPNOTSUPP); 104 } 105 106 /*ARGSUSED*/ 107 int 108 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 109 struct cred *cr, mblk_t **mpp) 110 { 111 return (EOPNOTSUPP); 112 } 113 114 /* 115 * Generic Socket Ops 116 */ 117 118 /* ARGSUSED */ 119 int 120 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 121 { 122 return (socket_init_common(so, pso, flags, cr)); 123 } 124 125 int 126 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 127 int flags, struct cred *cr) 128 { 129 int error; 130 131 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 132 133 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 134 135 /* X/Open requires this check */ 136 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 137 if (xnet_check_print) { 138 printf("sockfs: X/Open bind state check " 139 "caused EINVAL\n"); 140 } 141 error = EINVAL; 142 goto done; 143 } 144 145 /* 146 * a bind to a NULL address is interpreted as unbind. So just 147 * do the downcall. 148 */ 149 if (name == NULL) 150 goto dobind; 151 152 switch (so->so_family) { 153 case AF_INET: 154 if ((size_t)namelen != sizeof (sin_t)) { 155 error = name->sa_family != so->so_family ? 156 EAFNOSUPPORT : EINVAL; 157 eprintsoline(so, error); 158 goto done; 159 } 160 161 if ((flags & _SOBIND_XPG4_2) && 162 (name->sa_family != so->so_family)) { 163 /* 164 * This check has to be made for X/Open 165 * sockets however application failures have 166 * been observed when it is applied to 167 * all sockets. 168 */ 169 error = EAFNOSUPPORT; 170 eprintsoline(so, error); 171 goto done; 172 } 173 /* 174 * Force a zero sa_family to match so_family. 175 * 176 * Some programs like inetd(1M) don't set the 177 * family field. Other programs leave 178 * sin_family set to garbage - SunOS 4.X does 179 * not check the family field on a bind. 180 * We use the family field that 181 * was passed in to the socket() call. 182 */ 183 name->sa_family = so->so_family; 184 break; 185 186 case AF_INET6: { 187 #ifdef DEBUG 188 sin6_t *sin6 = (sin6_t *)name; 189 #endif 190 if ((size_t)namelen != sizeof (sin6_t)) { 191 error = name->sa_family != so->so_family ? 192 EAFNOSUPPORT : EINVAL; 193 eprintsoline(so, error); 194 goto done; 195 } 196 197 if (name->sa_family != so->so_family) { 198 /* 199 * With IPv6 we require the family to match 200 * unlike in IPv4. 201 */ 202 error = EAFNOSUPPORT; 203 eprintsoline(so, error); 204 goto done; 205 } 206 #ifdef DEBUG 207 /* 208 * Verify that apps don't forget to clear 209 * sin6_scope_id etc 210 */ 211 if (sin6->sin6_scope_id != 0 && 212 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 213 zcmn_err(getzoneid(), CE_WARN, 214 "bind with uninitialized sin6_scope_id " 215 "(%d) on socket. Pid = %d\n", 216 (int)sin6->sin6_scope_id, 217 (int)curproc->p_pid); 218 } 219 if (sin6->__sin6_src_id != 0) { 220 zcmn_err(getzoneid(), CE_WARN, 221 "bind with uninitialized __sin6_src_id " 222 "(%d) on socket. Pid = %d\n", 223 (int)sin6->__sin6_src_id, 224 (int)curproc->p_pid); 225 } 226 #endif /* DEBUG */ 227 228 break; 229 } 230 default: 231 /* Just pass the request to the protocol */ 232 goto dobind; 233 } 234 235 /* 236 * First we check if either NCA or KSSL has been enabled for 237 * the requested address, and if so, we fall back to TPI. 238 * If neither of those two services are enabled, then we just 239 * pass the request to the protocol. 240 * 241 * Note that KSSL can only be enabled on a socket if NCA is NOT 242 * enabled for that socket, hence the else-statement below. 243 */ 244 if (nl7c_enabled && ((so->so_family == AF_INET || 245 so->so_family == AF_INET6) && 246 nl7c_lookup_addr(name, namelen) != NULL)) { 247 /* 248 * NL7C is not supported in non-global zones, 249 * we enforce this restriction here. 250 */ 251 if (so->so_zoneid == GLOBAL_ZONEID) { 252 /* NCA should be used, so fall back to TPI */ 253 error = so_tpi_fallback(so, cr); 254 SO_UNBLOCK_FALLBACK(so); 255 if (error) 256 return (error); 257 else 258 return (SOP_BIND(so, name, namelen, flags, cr)); 259 } 260 } 261 262 dobind: 263 if (so->so_filter_active == 0 || 264 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 265 error = (*so->so_downcalls->sd_bind) 266 (so->so_proto_handle, name, namelen, cr); 267 } 268 done: 269 SO_UNBLOCK_FALLBACK(so); 270 271 return (error); 272 } 273 274 int 275 so_listen(struct sonode *so, int backlog, struct cred *cr) 276 { 277 int error = 0; 278 279 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 280 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 281 282 if ((so)->so_filter_active == 0 || 283 (error = sof_filter_listen(so, &backlog, cr)) < 0) 284 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 285 backlog, cr); 286 287 SO_UNBLOCK_FALLBACK(so); 288 289 return (error); 290 } 291 292 293 int 294 so_connect(struct sonode *so, struct sockaddr *name, 295 socklen_t namelen, int fflag, int flags, struct cred *cr) 296 { 297 int error = 0; 298 sock_connid_t id; 299 300 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 301 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 302 303 /* 304 * If there is a pending error, return error 305 * This can happen if a non blocking operation caused an error. 306 */ 307 308 if (so->so_error != 0) { 309 mutex_enter(&so->so_lock); 310 error = sogeterr(so, B_TRUE); 311 mutex_exit(&so->so_lock); 312 if (error != 0) 313 goto done; 314 } 315 316 if (so->so_filter_active == 0 || 317 (error = sof_filter_connect(so, (struct sockaddr *)name, 318 &namelen, cr)) < 0) { 319 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 320 name, namelen, &id, cr); 321 322 if (error == EINPROGRESS) 323 error = so_wait_connected(so, 324 fflag & (FNONBLOCK|FNDELAY), id); 325 } 326 done: 327 SO_UNBLOCK_FALLBACK(so); 328 return (error); 329 } 330 331 /*ARGSUSED*/ 332 int 333 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 334 { 335 int error = 0; 336 struct sonode *nso; 337 338 *nsop = NULL; 339 340 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 341 if ((so->so_state & SS_ACCEPTCONN) == 0) { 342 SO_UNBLOCK_FALLBACK(so); 343 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 344 EOPNOTSUPP : EINVAL); 345 } 346 347 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 348 &nso)) == 0) { 349 ASSERT(nso != NULL); 350 351 /* finish the accept */ 352 if ((so->so_filter_active > 0 && 353 (error = sof_filter_accept(nso, cr)) > 0) || 354 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 355 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 356 (void) socket_close(nso, 0, cr); 357 socket_destroy(nso); 358 } else { 359 *nsop = nso; 360 } 361 } 362 363 SO_UNBLOCK_FALLBACK(so); 364 return (error); 365 } 366 367 int 368 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 369 struct cred *cr) 370 { 371 int error, flags; 372 boolean_t dontblock; 373 ssize_t orig_resid; 374 mblk_t *mp; 375 376 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 377 378 flags = msg->msg_flags; 379 error = 0; 380 dontblock = (flags & MSG_DONTWAIT) || 381 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 382 383 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 384 /* 385 * Old way of passing fd's is not supported 386 */ 387 SO_UNBLOCK_FALLBACK(so); 388 return (EOPNOTSUPP); 389 } 390 391 if ((so->so_mode & SM_ATOMIC) && 392 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 393 so->so_proto_props.sopp_maxpsz != -1) { 394 SO_UNBLOCK_FALLBACK(so); 395 return (EMSGSIZE); 396 } 397 398 /* 399 * For atomic sends we will only do one iteration. 400 */ 401 do { 402 if (so->so_state & SS_CANTSENDMORE) { 403 error = EPIPE; 404 break; 405 } 406 407 if (so->so_error != 0) { 408 mutex_enter(&so->so_lock); 409 error = sogeterr(so, B_TRUE); 410 mutex_exit(&so->so_lock); 411 if (error != 0) 412 break; 413 } 414 415 /* 416 * Send down OOB messages even if the send path is being 417 * flow controlled (assuming the protocol supports OOB data). 418 */ 419 if (flags & MSG_OOB) { 420 if ((so->so_mode & SM_EXDATA) == 0) { 421 error = EOPNOTSUPP; 422 break; 423 } 424 } else if (SO_SND_FLOWCTRLD(so)) { 425 /* 426 * Need to wait until the protocol is ready to receive 427 * more data for transmission. 428 */ 429 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 430 break; 431 } 432 433 /* 434 * Time to send data to the protocol. We either copy the 435 * data into mblks or pass the uio directly to the protocol. 436 * We decide what to do based on the available down calls. 437 */ 438 if (so->so_downcalls->sd_send_uio != NULL) { 439 error = (*so->so_downcalls->sd_send_uio) 440 (so->so_proto_handle, uiop, msg, cr); 441 if (error != 0) 442 break; 443 } else { 444 /* save the resid in case of failure */ 445 orig_resid = uiop->uio_resid; 446 447 if ((mp = socopyinuio(uiop, 448 so->so_proto_props.sopp_maxpsz, 449 so->so_proto_props.sopp_wroff, 450 so->so_proto_props.sopp_maxblk, 451 so->so_proto_props.sopp_tail, &error)) == NULL) { 452 break; 453 } 454 ASSERT(uiop->uio_resid >= 0); 455 456 if (so->so_filter_active > 0 && 457 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 458 &error)) == NULL)) { 459 if (error != 0) 460 break; 461 continue; 462 } 463 error = (*so->so_downcalls->sd_send) 464 (so->so_proto_handle, mp, msg, cr); 465 if (error != 0) { 466 /* 467 * The send failed. We do not have to free the 468 * mblks, because that is the protocol's 469 * responsibility. However, uio_resid must 470 * remain accurate, so adjust that here. 471 */ 472 uiop->uio_resid = orig_resid; 473 break; 474 } 475 } 476 } while (uiop->uio_resid > 0); 477 478 SO_UNBLOCK_FALLBACK(so); 479 480 return (error); 481 } 482 483 int 484 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 485 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 486 boolean_t fil_inject) 487 { 488 int error; 489 boolean_t dontblock; 490 size_t size; 491 mblk_t *mp = *mpp; 492 493 if (so->so_downcalls->sd_send == NULL) 494 return (EOPNOTSUPP); 495 496 error = 0; 497 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 498 (fflag & (FNONBLOCK|FNDELAY)); 499 size = msgdsize(mp); 500 501 if ((so->so_mode & SM_ATOMIC) && 502 size > so->so_proto_props.sopp_maxpsz && 503 so->so_proto_props.sopp_maxpsz != -1) { 504 SO_UNBLOCK_FALLBACK(so); 505 return (EMSGSIZE); 506 } 507 508 while (mp != NULL) { 509 mblk_t *nmp, *last_mblk; 510 size_t mlen; 511 512 if (so->so_state & SS_CANTSENDMORE) { 513 error = EPIPE; 514 break; 515 } 516 if (so->so_error != 0) { 517 mutex_enter(&so->so_lock); 518 error = sogeterr(so, B_TRUE); 519 mutex_exit(&so->so_lock); 520 if (error != 0) 521 break; 522 } 523 /* Socket filters are not flow controlled */ 524 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 525 /* 526 * Need to wait until the protocol is ready to receive 527 * more data for transmission. 528 */ 529 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 530 break; 531 } 532 533 /* 534 * We only allow so_maxpsz of data to be sent down to 535 * the protocol at time. 536 */ 537 mlen = MBLKL(mp); 538 nmp = mp->b_cont; 539 last_mblk = mp; 540 while (nmp != NULL) { 541 mlen += MBLKL(nmp); 542 if (mlen > so->so_proto_props.sopp_maxpsz) { 543 last_mblk->b_cont = NULL; 544 break; 545 } 546 last_mblk = nmp; 547 nmp = nmp->b_cont; 548 } 549 550 if (so->so_filter_active > 0 && 551 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 552 cr, &error)) == NULL) { 553 *mpp = mp = nmp; 554 if (error != 0) 555 break; 556 continue; 557 } 558 error = (*so->so_downcalls->sd_send) 559 (so->so_proto_handle, mp, msg, cr); 560 if (error != 0) { 561 /* 562 * The send failed. The protocol will free the mblks 563 * that were sent down. Let the caller deal with the 564 * rest. 565 */ 566 *mpp = nmp; 567 break; 568 } 569 570 *mpp = mp = nmp; 571 } 572 /* Let the filter know whether the protocol is flow controlled */ 573 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 574 error = ENOSPC; 575 576 return (error); 577 } 578 579 #pragma inline(so_sendmblk_impl) 580 581 int 582 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 583 struct cred *cr, mblk_t **mpp) 584 { 585 int error; 586 587 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 588 589 if ((so->so_mode & SM_SENDFILESUPP) == 0) { 590 SO_UNBLOCK_FALLBACK(so); 591 return (EOPNOTSUPP); 592 } 593 594 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 595 B_FALSE); 596 597 SO_UNBLOCK_FALLBACK(so); 598 599 return (error); 600 } 601 602 int 603 so_shutdown(struct sonode *so, int how, struct cred *cr) 604 { 605 int error; 606 607 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 608 609 /* 610 * SunOS 4.X has no check for datagram sockets. 611 * 5.X checks that it is connected (ENOTCONN) 612 * X/Open requires that we check the connected state. 613 */ 614 if (!(so->so_state & SS_ISCONNECTED)) { 615 if (!xnet_skip_checks) { 616 error = ENOTCONN; 617 if (xnet_check_print) { 618 printf("sockfs: X/Open shutdown check " 619 "caused ENOTCONN\n"); 620 } 621 } 622 goto done; 623 } 624 625 if (so->so_filter_active == 0 || 626 (error = sof_filter_shutdown(so, &how, cr)) < 0) 627 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 628 how, cr)); 629 630 /* 631 * Protocol agreed to shutdown. We need to flush the 632 * receive buffer if the receive side is being shutdown. 633 */ 634 if (error == 0 && how != SHUT_WR) { 635 mutex_enter(&so->so_lock); 636 /* wait for active reader to finish */ 637 (void) so_lock_read(so, 0); 638 639 so_rcv_flush(so); 640 641 so_unlock_read(so); 642 mutex_exit(&so->so_lock); 643 } 644 645 done: 646 SO_UNBLOCK_FALLBACK(so); 647 return (error); 648 } 649 650 int 651 so_getsockname(struct sonode *so, struct sockaddr *addr, 652 socklen_t *addrlen, struct cred *cr) 653 { 654 int error; 655 656 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 657 658 if (so->so_filter_active == 0 || 659 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 660 error = (*so->so_downcalls->sd_getsockname) 661 (so->so_proto_handle, addr, addrlen, cr); 662 663 SO_UNBLOCK_FALLBACK(so); 664 return (error); 665 } 666 667 int 668 so_getpeername(struct sonode *so, struct sockaddr *addr, 669 socklen_t *addrlen, boolean_t accept, struct cred *cr) 670 { 671 int error; 672 673 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 674 675 if (accept) { 676 error = (*so->so_downcalls->sd_getpeername) 677 (so->so_proto_handle, addr, addrlen, cr); 678 } else if (!(so->so_state & SS_ISCONNECTED)) { 679 error = ENOTCONN; 680 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 681 /* Added this check for X/Open */ 682 error = EINVAL; 683 if (xnet_check_print) { 684 printf("sockfs: X/Open getpeername check => EINVAL\n"); 685 } 686 } else if (so->so_filter_active == 0 || 687 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 688 error = (*so->so_downcalls->sd_getpeername) 689 (so->so_proto_handle, addr, addrlen, cr); 690 } 691 692 SO_UNBLOCK_FALLBACK(so); 693 return (error); 694 } 695 696 int 697 so_getsockopt(struct sonode *so, int level, int option_name, 698 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 699 { 700 int error = 0; 701 702 if (level == SOL_FILTER) 703 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 704 705 SO_BLOCK_FALLBACK(so, 706 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 707 708 if ((so->so_filter_active == 0 || 709 (error = sof_filter_getsockopt(so, level, option_name, optval, 710 optlenp, cr)) < 0) && 711 (error = socket_getopt_common(so, level, option_name, optval, 712 optlenp, flags)) < 0) { 713 error = (*so->so_downcalls->sd_getsockopt) 714 (so->so_proto_handle, level, option_name, optval, optlenp, 715 cr); 716 if (error == ENOPROTOOPT) { 717 if (level == SOL_SOCKET) { 718 /* 719 * If a protocol does not support a particular 720 * socket option, set can fail (not allowed) 721 * but get can not fail. This is the previous 722 * sockfs bahvior. 723 */ 724 switch (option_name) { 725 case SO_LINGER: 726 if (*optlenp < (t_uscalar_t) 727 sizeof (struct linger)) { 728 error = EINVAL; 729 break; 730 } 731 error = 0; 732 bzero(optval, sizeof (struct linger)); 733 *optlenp = sizeof (struct linger); 734 break; 735 case SO_RCVTIMEO: 736 case SO_SNDTIMEO: 737 if (*optlenp < (t_uscalar_t) 738 sizeof (struct timeval)) { 739 error = EINVAL; 740 break; 741 } 742 error = 0; 743 bzero(optval, sizeof (struct timeval)); 744 *optlenp = sizeof (struct timeval); 745 break; 746 case SO_SND_BUFINFO: 747 if (*optlenp < (t_uscalar_t) 748 sizeof (struct so_snd_bufinfo)) { 749 error = EINVAL; 750 break; 751 } 752 error = 0; 753 bzero(optval, 754 sizeof (struct so_snd_bufinfo)); 755 *optlenp = 756 sizeof (struct so_snd_bufinfo); 757 break; 758 case SO_DEBUG: 759 case SO_REUSEADDR: 760 case SO_REUSEPORT: 761 case SO_KEEPALIVE: 762 case SO_DONTROUTE: 763 case SO_BROADCAST: 764 case SO_USELOOPBACK: 765 case SO_OOBINLINE: 766 case SO_DGRAM_ERRIND: 767 case SO_SNDBUF: 768 case SO_RCVBUF: 769 error = 0; 770 *((int32_t *)optval) = 0; 771 *optlenp = sizeof (int32_t); 772 break; 773 default: 774 break; 775 } 776 } 777 } 778 } 779 780 SO_UNBLOCK_FALLBACK(so); 781 return (error); 782 } 783 784 int 785 so_setsockopt(struct sonode *so, int level, int option_name, 786 const void *optval, socklen_t optlen, struct cred *cr) 787 { 788 int error = 0; 789 struct timeval tl; 790 const void *opt = optval; 791 792 if (level == SOL_FILTER) 793 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 794 795 SO_BLOCK_FALLBACK(so, 796 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 797 798 /* X/Open requires this check */ 799 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 800 SO_UNBLOCK_FALLBACK(so); 801 if (xnet_check_print) 802 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 803 return (EINVAL); 804 } 805 806 if (so->so_filter_active > 0 && 807 (error = sof_filter_setsockopt(so, level, option_name, 808 (void *)optval, &optlen, cr)) >= 0) 809 goto done; 810 811 if (level == SOL_SOCKET) { 812 switch (option_name) { 813 case SO_RCVTIMEO: 814 case SO_SNDTIMEO: { 815 /* 816 * We pass down these two options to protocol in order 817 * to support some third part protocols which need to 818 * know them. For those protocols which don't care 819 * these two options, simply return 0. 820 */ 821 clock_t t_usec; 822 823 if (get_udatamodel() == DATAMODEL_NONE || 824 get_udatamodel() == DATAMODEL_NATIVE) { 825 if (optlen != sizeof (struct timeval)) { 826 error = EINVAL; 827 goto done; 828 } 829 bcopy((struct timeval *)optval, &tl, 830 sizeof (struct timeval)); 831 } else { 832 if (optlen != sizeof (struct timeval32)) { 833 error = EINVAL; 834 goto done; 835 } 836 TIMEVAL32_TO_TIMEVAL(&tl, 837 (struct timeval32 *)optval); 838 } 839 opt = &tl; 840 optlen = sizeof (tl); 841 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 842 mutex_enter(&so->so_lock); 843 if (option_name == SO_RCVTIMEO) 844 so->so_rcvtimeo = drv_usectohz(t_usec); 845 else 846 so->so_sndtimeo = drv_usectohz(t_usec); 847 mutex_exit(&so->so_lock); 848 break; 849 } 850 case SO_RCVBUF: 851 /* 852 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 853 * sockfs since the transport might adjust the value 854 * and not return exactly what was set by the 855 * application. 856 */ 857 so->so_xpg_rcvbuf = *(int32_t *)optval; 858 break; 859 } 860 } 861 error = (*so->so_downcalls->sd_setsockopt) 862 (so->so_proto_handle, level, option_name, opt, optlen, cr); 863 done: 864 SO_UNBLOCK_FALLBACK(so); 865 return (error); 866 } 867 868 int 869 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 870 struct cred *cr, int32_t *rvalp) 871 { 872 int error = 0; 873 874 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 875 876 /* 877 * If there is a pending error, return error 878 * This can happen if a non blocking operation caused an error. 879 */ 880 if (so->so_error != 0) { 881 mutex_enter(&so->so_lock); 882 error = sogeterr(so, B_TRUE); 883 mutex_exit(&so->so_lock); 884 if (error != 0) 885 goto done; 886 } 887 888 /* 889 * calling strioc can result in the socket falling back to TPI, 890 * if that is supported. 891 */ 892 if ((so->so_filter_active == 0 || 893 (error = sof_filter_ioctl(so, cmd, arg, mode, 894 rvalp, cr)) < 0) && 895 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 896 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 897 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 898 cmd, arg, mode, rvalp, cr); 899 } 900 901 done: 902 SO_UNBLOCK_FALLBACK(so); 903 904 return (error); 905 } 906 907 int 908 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 909 struct pollhead **phpp) 910 { 911 int state = so->so_state, mask; 912 *reventsp = 0; 913 914 /* 915 * In sockets the errors are represented as input/output events 916 */ 917 if (so->so_error != 0 && 918 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 919 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 920 return (0); 921 } 922 923 /* 924 * If the socket is in a state where it can send data 925 * turn on POLLWRBAND and POLLOUT events. 926 */ 927 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 928 /* 929 * out of band data is allowed even if the connection 930 * is flow controlled 931 */ 932 *reventsp |= POLLWRBAND & events; 933 if (!SO_SND_FLOWCTRLD(so)) { 934 /* 935 * As long as there is buffer to send data 936 * turn on POLLOUT events 937 */ 938 *reventsp |= POLLOUT & events; 939 } 940 } 941 942 /* 943 * Turn on POLLIN whenever there is data on the receive queue, 944 * or the socket is in a state where no more data will be received. 945 * Also, if the socket is accepting connections, flip the bit if 946 * there is something on the queue. 947 * 948 * We do an initial check for events without holding locks. However, 949 * if there are no event available, then we redo the check for POLLIN 950 * events under the lock. 951 */ 952 953 /* Pending connections */ 954 if (!list_is_empty(&so->so_acceptq_list)) 955 *reventsp |= (POLLIN|POLLRDNORM) & events; 956 957 /* 958 * If we're looking for POLLRDHUP, indicate it if we have sent the 959 * last rx signal for the socket. 960 */ 961 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG)) 962 *reventsp |= POLLRDHUP; 963 964 /* Data */ 965 /* so_downcalls is null for sctp */ 966 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 967 *reventsp |= (*so->so_downcalls->sd_poll) 968 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 969 CRED()) & events; 970 ASSERT((*reventsp & ~events) == 0); 971 /* do not recheck events */ 972 events &= ~SO_PROTO_POLLEV; 973 } else { 974 if (SO_HAVE_DATA(so)) 975 *reventsp |= (POLLIN|POLLRDNORM) & events; 976 977 /* Urgent data */ 978 if ((state & SS_OOBPEND) != 0) { 979 *reventsp |= (POLLRDBAND | POLLPRI) & events; 980 } 981 982 /* 983 * If the socket has become disconnected, we set POLLHUP. 984 * Note that if we are in this state, we will have set POLLIN 985 * (SO_HAVE_DATA() is true on a disconnected socket), but not 986 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with 987 * the semantics of POLLHUP, which is defined to be mutually 988 * exclusive with respect to POLLOUT but not POLLIN. We are 989 * therefore setting POLLHUP primarily for the benefit of 990 * those not polling on POLLIN, as they have no other way of 991 * knowing that the socket has been disconnected. 992 */ 993 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG; 994 995 if ((state & (mask | SS_ISCONNECTED)) == mask) 996 *reventsp |= POLLHUP; 997 } 998 999 if ((!*reventsp && !anyyet) || (events & POLLET)) { 1000 /* Check for read events again, but this time under lock */ 1001 if (events & (POLLIN|POLLRDNORM)) { 1002 mutex_enter(&so->so_lock); 1003 if (SO_HAVE_DATA(so) || 1004 !list_is_empty(&so->so_acceptq_list)) { 1005 if (events & POLLET) { 1006 so->so_pollev |= SO_POLLEV_IN; 1007 *phpp = &so->so_poll_list; 1008 } 1009 1010 mutex_exit(&so->so_lock); 1011 *reventsp |= (POLLIN|POLLRDNORM) & events; 1012 1013 return (0); 1014 } else { 1015 so->so_pollev |= SO_POLLEV_IN; 1016 mutex_exit(&so->so_lock); 1017 } 1018 } 1019 *phpp = &so->so_poll_list; 1020 } 1021 return (0); 1022 } 1023 1024 /* 1025 * Generic Upcalls 1026 */ 1027 void 1028 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 1029 cred_t *peer_cred, pid_t peer_cpid) 1030 { 1031 struct sonode *so = (struct sonode *)sock_handle; 1032 1033 mutex_enter(&so->so_lock); 1034 ASSERT(so->so_proto_handle != NULL); 1035 1036 if (peer_cred != NULL) { 1037 if (so->so_peercred != NULL) 1038 crfree(so->so_peercred); 1039 crhold(peer_cred); 1040 so->so_peercred = peer_cred; 1041 so->so_cpid = peer_cpid; 1042 } 1043 1044 so->so_proto_connid = id; 1045 soisconnected(so); 1046 /* 1047 * Wake ones who're waiting for conn to become established. 1048 */ 1049 so_notify_connected(so); 1050 } 1051 1052 int 1053 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1054 { 1055 struct sonode *so = (struct sonode *)sock_handle; 1056 boolean_t connect_failed; 1057 1058 mutex_enter(&so->so_lock); 1059 1060 /* 1061 * If we aren't currently connected, then this isn't a disconnect but 1062 * rather a failure to connect. 1063 */ 1064 connect_failed = !(so->so_state & SS_ISCONNECTED); 1065 1066 so->so_proto_connid = id; 1067 soisdisconnected(so, error); 1068 so_notify_disconnected(so, connect_failed, error); 1069 1070 return (0); 1071 } 1072 1073 void 1074 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1075 uintptr_t arg) 1076 { 1077 struct sonode *so = (struct sonode *)sock_handle; 1078 1079 switch (action) { 1080 case SOCK_OPCTL_SHUT_SEND: 1081 mutex_enter(&so->so_lock); 1082 socantsendmore(so); 1083 so_notify_disconnecting(so); 1084 break; 1085 case SOCK_OPCTL_SHUT_RECV: { 1086 mutex_enter(&so->so_lock); 1087 socantrcvmore(so); 1088 so_notify_eof(so); 1089 break; 1090 } 1091 case SOCK_OPCTL_ENAB_ACCEPT: 1092 mutex_enter(&so->so_lock); 1093 so->so_state |= SS_ACCEPTCONN; 1094 so->so_backlog = (unsigned int)arg; 1095 /* 1096 * The protocol can stop generating newconn upcalls when 1097 * the backlog is full, so to make sure the listener does 1098 * not end up with a queue full of deferred connections 1099 * we reduce the backlog by one. Thus the listener will 1100 * start closing deferred connections before the backlog 1101 * is full. 1102 */ 1103 if (so->so_filter_active > 0) 1104 so->so_backlog = MAX(1, so->so_backlog - 1); 1105 mutex_exit(&so->so_lock); 1106 break; 1107 default: 1108 ASSERT(0); 1109 break; 1110 } 1111 } 1112 1113 void 1114 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1115 { 1116 struct sonode *so = (struct sonode *)sock_handle; 1117 1118 if (qfull) { 1119 so_snd_qfull(so); 1120 } else { 1121 so_snd_qnotfull(so); 1122 mutex_enter(&so->so_lock); 1123 /* so_notify_writable drops so_lock */ 1124 so_notify_writable(so); 1125 } 1126 } 1127 1128 sock_upper_handle_t 1129 so_newconn(sock_upper_handle_t parenthandle, 1130 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1131 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1132 { 1133 struct sonode *so = (struct sonode *)parenthandle; 1134 struct sonode *nso; 1135 int error; 1136 1137 ASSERT(proto_handle != NULL); 1138 1139 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1140 (so->so_acceptq_len >= so->so_backlog && 1141 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1142 return (NULL); 1143 } 1144 1145 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1146 &error); 1147 if (nso == NULL) 1148 return (NULL); 1149 1150 if (peer_cred != NULL) { 1151 crhold(peer_cred); 1152 nso->so_peercred = peer_cred; 1153 nso->so_cpid = peer_cpid; 1154 } 1155 nso->so_listener = so; 1156 1157 /* 1158 * The new socket (nso), proto_handle and sock_upcallsp are all 1159 * valid at this point. But as soon as nso is placed in the accept 1160 * queue that can no longer be assumed (since an accept() thread may 1161 * pull it off the queue and close the socket). 1162 */ 1163 *sock_upcallsp = &so_upcalls; 1164 1165 mutex_enter(&so->so_acceptq_lock); 1166 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1167 mutex_exit(&so->so_acceptq_lock); 1168 ASSERT(nso->so_count == 1); 1169 nso->so_count--; 1170 nso->so_listener = NULL; 1171 /* drop proto ref */ 1172 VN_RELE(SOTOV(nso)); 1173 socket_destroy(nso); 1174 return (NULL); 1175 } else { 1176 so->so_acceptq_len++; 1177 if (nso->so_state & SS_FIL_DEFER) { 1178 list_insert_tail(&so->so_acceptq_defer, nso); 1179 mutex_exit(&so->so_acceptq_lock); 1180 } else { 1181 list_insert_tail(&so->so_acceptq_list, nso); 1182 cv_signal(&so->so_acceptq_cv); 1183 mutex_exit(&so->so_acceptq_lock); 1184 mutex_enter(&so->so_lock); 1185 so_notify_newconn(so); 1186 } 1187 1188 return ((sock_upper_handle_t)nso); 1189 } 1190 } 1191 1192 void 1193 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1194 { 1195 struct sonode *so; 1196 1197 so = (struct sonode *)sock_handle; 1198 1199 mutex_enter(&so->so_lock); 1200 1201 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1202 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1203 if (soppp->sopp_flags & SOCKOPT_WROFF) 1204 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1205 if (soppp->sopp_flags & SOCKOPT_TAIL) 1206 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1207 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1208 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1209 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1210 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1211 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1212 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1213 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1214 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1215 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1216 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1217 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1218 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1219 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1220 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1221 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1222 } 1223 1224 if (soppp->sopp_zcopyflag & COPYCACHED) { 1225 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1226 } 1227 } 1228 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1229 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1230 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1231 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1232 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1233 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1234 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1235 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1236 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1237 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1238 1239 mutex_exit(&so->so_lock); 1240 1241 if (so->so_filter_active > 0) { 1242 sof_instance_t *inst; 1243 ssize_t maxblk; 1244 ushort_t wroff, tail; 1245 maxblk = so->so_proto_props.sopp_maxblk; 1246 wroff = so->so_proto_props.sopp_wroff; 1247 tail = so->so_proto_props.sopp_tail; 1248 for (inst = so->so_filter_bottom; inst != NULL; 1249 inst = inst->sofi_prev) { 1250 if (SOF_INTERESTED(inst, mblk_prop)) { 1251 (*inst->sofi_ops->sofop_mblk_prop)( 1252 (sof_handle_t)inst, inst->sofi_cookie, 1253 &maxblk, &wroff, &tail); 1254 } 1255 } 1256 mutex_enter(&so->so_lock); 1257 so->so_proto_props.sopp_maxblk = maxblk; 1258 so->so_proto_props.sopp_wroff = wroff; 1259 so->so_proto_props.sopp_tail = tail; 1260 mutex_exit(&so->so_lock); 1261 } 1262 #ifdef DEBUG 1263 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1264 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1265 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1266 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1267 SOCKOPT_LOOPBACK); 1268 ASSERT(soppp->sopp_flags == 0); 1269 #endif 1270 } 1271 1272 /* ARGSUSED */ 1273 ssize_t 1274 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1275 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1276 sof_instance_t *filter) 1277 { 1278 boolean_t force_push = B_TRUE; 1279 int space_left; 1280 sodirect_t *sodp = so->so_direct; 1281 1282 ASSERT(errorp != NULL); 1283 *errorp = 0; 1284 if (mp == NULL) { 1285 if (so->so_downcalls->sd_recv_uio != NULL) { 1286 mutex_enter(&so->so_lock); 1287 /* the notify functions will drop the lock */ 1288 if (flags & MSG_OOB) 1289 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1290 else 1291 so_notify_data(so, msg_size); 1292 return (0); 1293 } 1294 ASSERT(msg_size == 0); 1295 mutex_enter(&so->so_lock); 1296 goto space_check; 1297 } 1298 1299 ASSERT(mp->b_next == NULL); 1300 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1301 ASSERT(msg_size == msgdsize(mp)); 1302 1303 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1304 /* The read pointer is not aligned correctly for TPI */ 1305 zcmn_err(getzoneid(), CE_WARN, 1306 "sockfs: Unaligned TPI message received. rptr = %p\n", 1307 (void *)mp->b_rptr); 1308 freemsg(mp); 1309 mutex_enter(&so->so_lock); 1310 if (sodp != NULL) 1311 SOD_UIOAFINI(sodp); 1312 goto space_check; 1313 } 1314 1315 if (so->so_filter_active > 0) { 1316 for (; filter != NULL; filter = filter->sofi_prev) { 1317 if (!SOF_INTERESTED(filter, data_in)) 1318 continue; 1319 mp = (*filter->sofi_ops->sofop_data_in)( 1320 (sof_handle_t)filter, filter->sofi_cookie, mp, 1321 flags, &msg_size); 1322 ASSERT(msgdsize(mp) == msg_size); 1323 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1324 (mblk_t *), mp); 1325 /* Data was consumed/dropped, just do space check */ 1326 if (msg_size == 0) { 1327 mutex_enter(&so->so_lock); 1328 goto space_check; 1329 } 1330 } 1331 } 1332 1333 if (flags & MSG_OOB) { 1334 so_queue_oob(so, mp, msg_size); 1335 mutex_enter(&so->so_lock); 1336 goto space_check; 1337 } 1338 1339 if (force_pushp != NULL) 1340 force_push = *force_pushp; 1341 1342 mutex_enter(&so->so_lock); 1343 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1344 if (sodp != NULL) 1345 SOD_DISABLE(sodp); 1346 mutex_exit(&so->so_lock); 1347 *errorp = EOPNOTSUPP; 1348 return (-1); 1349 } 1350 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1351 freemsg(mp); 1352 if (sodp != NULL) 1353 SOD_DISABLE(sodp); 1354 mutex_exit(&so->so_lock); 1355 return (0); 1356 } 1357 1358 /* process the mblk via I/OAT if capable */ 1359 if (sodp != NULL && sodp->sod_enabled) { 1360 if (DB_TYPE(mp) == M_DATA) { 1361 sod_uioa_mblk_init(sodp, mp, msg_size); 1362 } else { 1363 SOD_UIOAFINI(sodp); 1364 } 1365 } 1366 1367 if (mp->b_next == NULL) { 1368 so_enqueue_msg(so, mp, msg_size); 1369 } else { 1370 do { 1371 mblk_t *nmp; 1372 1373 if ((nmp = mp->b_next) != NULL) { 1374 mp->b_next = NULL; 1375 } 1376 so_enqueue_msg(so, mp, msgdsize(mp)); 1377 mp = nmp; 1378 } while (mp != NULL); 1379 } 1380 1381 space_left = so->so_rcvbuf - so->so_rcv_queued; 1382 if (space_left <= 0) { 1383 so->so_flowctrld = B_TRUE; 1384 *errorp = ENOSPC; 1385 space_left = -1; 1386 } 1387 1388 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1389 so->so_rcv_queued >= so->so_rcv_wanted) { 1390 SOCKET_TIMER_CANCEL(so); 1391 /* 1392 * so_notify_data will release the lock 1393 */ 1394 so_notify_data(so, so->so_rcv_queued); 1395 1396 if (force_pushp != NULL) 1397 *force_pushp = B_TRUE; 1398 goto done; 1399 } else if (so->so_rcv_timer_tid == 0) { 1400 /* Make sure the recv push timer is running */ 1401 SOCKET_TIMER_START(so); 1402 } 1403 1404 done_unlock: 1405 mutex_exit(&so->so_lock); 1406 done: 1407 return (space_left); 1408 1409 space_check: 1410 space_left = so->so_rcvbuf - so->so_rcv_queued; 1411 if (space_left <= 0) { 1412 so->so_flowctrld = B_TRUE; 1413 *errorp = ENOSPC; 1414 space_left = -1; 1415 } 1416 goto done_unlock; 1417 } 1418 1419 #pragma inline(so_queue_msg_impl) 1420 1421 ssize_t 1422 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1423 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1424 { 1425 struct sonode *so = (struct sonode *)sock_handle; 1426 1427 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1428 so->so_filter_bottom)); 1429 } 1430 1431 /* 1432 * Set the offset of where the oob data is relative to the bytes in 1433 * queued. Also generate SIGURG 1434 */ 1435 void 1436 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1437 { 1438 struct sonode *so; 1439 1440 ASSERT(offset >= 0); 1441 so = (struct sonode *)sock_handle; 1442 mutex_enter(&so->so_lock); 1443 if (so->so_direct != NULL) 1444 SOD_UIOAFINI(so->so_direct); 1445 1446 /* 1447 * New urgent data on the way so forget about any old 1448 * urgent data. 1449 */ 1450 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1451 1452 /* 1453 * Record that urgent data is pending. 1454 */ 1455 so->so_state |= SS_OOBPEND; 1456 1457 if (so->so_oobmsg != NULL) { 1458 dprintso(so, 1, ("sock: discarding old oob\n")); 1459 freemsg(so->so_oobmsg); 1460 so->so_oobmsg = NULL; 1461 } 1462 1463 /* 1464 * set the offset where the urgent byte is 1465 */ 1466 so->so_oobmark = so->so_rcv_queued + offset; 1467 if (so->so_oobmark == 0) 1468 so->so_state |= SS_RCVATMARK; 1469 else 1470 so->so_state &= ~SS_RCVATMARK; 1471 1472 so_notify_oobsig(so); 1473 } 1474 1475 /* 1476 * Queue the OOB byte 1477 */ 1478 static void 1479 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1480 { 1481 mutex_enter(&so->so_lock); 1482 if (so->so_direct != NULL) 1483 SOD_UIOAFINI(so->so_direct); 1484 1485 ASSERT(mp != NULL); 1486 if (!IS_SO_OOB_INLINE(so)) { 1487 so->so_oobmsg = mp; 1488 so->so_state |= SS_HAVEOOBDATA; 1489 } else { 1490 so_enqueue_msg(so, mp, len); 1491 } 1492 1493 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1494 } 1495 1496 int 1497 so_close(struct sonode *so, int flag, struct cred *cr) 1498 { 1499 int error; 1500 1501 /* 1502 * No new data will be enqueued once the CLOSING flag is set. 1503 */ 1504 mutex_enter(&so->so_lock); 1505 so->so_state |= SS_CLOSING; 1506 ASSERT(so_verify_oobstate(so)); 1507 so_rcv_flush(so); 1508 mutex_exit(&so->so_lock); 1509 1510 if (so->so_filter_active > 0) 1511 sof_sonode_closing(so); 1512 1513 if (so->so_state & SS_ACCEPTCONN) { 1514 /* 1515 * We grab and release the accept lock to ensure that any 1516 * thread about to insert a socket in so_newconn completes 1517 * before we flush the queue. Any thread calling so_newconn 1518 * after we drop the lock will observe the SS_CLOSING flag, 1519 * which will stop it from inserting the socket in the queue. 1520 */ 1521 mutex_enter(&so->so_acceptq_lock); 1522 mutex_exit(&so->so_acceptq_lock); 1523 1524 so_acceptq_flush(so, B_TRUE); 1525 } 1526 1527 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1528 switch (error) { 1529 default: 1530 /* Protocol made a synchronous close; remove proto ref */ 1531 VN_RELE(SOTOV(so)); 1532 break; 1533 case EINPROGRESS: 1534 /* 1535 * Protocol is in the process of closing, it will make a 1536 * 'closed' upcall to remove the reference. 1537 */ 1538 error = 0; 1539 break; 1540 } 1541 1542 return (error); 1543 } 1544 1545 /* 1546 * Upcall made by the protocol when it's doing an asynchronous close. It 1547 * will drop the protocol's reference on the socket. 1548 */ 1549 void 1550 so_closed(sock_upper_handle_t sock_handle) 1551 { 1552 struct sonode *so = (struct sonode *)sock_handle; 1553 1554 VN_RELE(SOTOV(so)); 1555 } 1556 1557 void 1558 so_zcopy_notify(sock_upper_handle_t sock_handle) 1559 { 1560 struct sonode *so = (struct sonode *)sock_handle; 1561 1562 mutex_enter(&so->so_lock); 1563 so->so_copyflag |= STZCNOTIFY; 1564 cv_broadcast(&so->so_copy_cv); 1565 mutex_exit(&so->so_lock); 1566 } 1567 1568 void 1569 so_set_error(sock_upper_handle_t sock_handle, int error) 1570 { 1571 struct sonode *so = (struct sonode *)sock_handle; 1572 1573 mutex_enter(&so->so_lock); 1574 1575 soseterror(so, error); 1576 1577 so_notify_error(so); 1578 } 1579 1580 /* 1581 * so_recvmsg - read data from the socket 1582 * 1583 * There are two ways of obtaining data; either we ask the protocol to 1584 * copy directly into the supplied buffer, or we copy data from the 1585 * sonode's receive queue. The decision which one to use depends on 1586 * whether the protocol has a sd_recv_uio down call. 1587 */ 1588 int 1589 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1590 struct cred *cr) 1591 { 1592 rval_t rval; 1593 int flags = 0; 1594 t_uscalar_t controllen, namelen; 1595 int error = 0; 1596 int ret; 1597 mblk_t *mctlp = NULL; 1598 union T_primitives *tpr; 1599 void *control; 1600 ssize_t saved_resid; 1601 struct uio *suiop; 1602 1603 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1604 1605 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1606 (so->so_mode & SM_CONNREQUIRED)) { 1607 SO_UNBLOCK_FALLBACK(so); 1608 return (ENOTCONN); 1609 } 1610 1611 if (msg->msg_flags & MSG_PEEK) 1612 msg->msg_flags &= ~MSG_WAITALL; 1613 1614 if (so->so_mode & SM_ATOMIC) 1615 msg->msg_flags |= MSG_TRUNC; 1616 1617 if (msg->msg_flags & MSG_OOB) { 1618 if ((so->so_mode & SM_EXDATA) == 0) { 1619 error = EOPNOTSUPP; 1620 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1621 error = (*so->so_downcalls->sd_recv_uio) 1622 (so->so_proto_handle, uiop, msg, cr); 1623 } else { 1624 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1625 IS_SO_OOB_INLINE(so)); 1626 } 1627 SO_UNBLOCK_FALLBACK(so); 1628 return (error); 1629 } 1630 1631 /* 1632 * If the protocol has the recv down call, then pass the request 1633 * down. 1634 */ 1635 if (so->so_downcalls->sd_recv_uio != NULL) { 1636 error = (*so->so_downcalls->sd_recv_uio) 1637 (so->so_proto_handle, uiop, msg, cr); 1638 SO_UNBLOCK_FALLBACK(so); 1639 return (error); 1640 } 1641 1642 /* 1643 * Reading data from the socket buffer 1644 */ 1645 flags = msg->msg_flags; 1646 msg->msg_flags = 0; 1647 1648 /* 1649 * Set msg_controllen and msg_namelen to zero here to make it 1650 * simpler in the cases that no control or name is returned. 1651 */ 1652 controllen = msg->msg_controllen; 1653 namelen = msg->msg_namelen; 1654 msg->msg_controllen = 0; 1655 msg->msg_namelen = 0; 1656 1657 mutex_enter(&so->so_lock); 1658 /* Set SOREADLOCKED */ 1659 error = so_lock_read_intr(so, 1660 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1661 mutex_exit(&so->so_lock); 1662 if (error) { 1663 SO_UNBLOCK_FALLBACK(so); 1664 return (error); 1665 } 1666 1667 suiop = sod_rcv_init(so, flags, &uiop); 1668 retry: 1669 saved_resid = uiop->uio_resid; 1670 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1671 if (error != 0) { 1672 goto out; 1673 } 1674 /* 1675 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1676 * For non-datagrams MOREDATA is used to set MSG_EOR. 1677 */ 1678 ASSERT(!(rval.r_val1 & MORECTL)); 1679 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1680 msg->msg_flags |= MSG_TRUNC; 1681 if (mctlp == NULL) { 1682 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1683 1684 mutex_enter(&so->so_lock); 1685 /* Set MSG_EOR based on MOREDATA */ 1686 if (!(rval.r_val1 & MOREDATA)) { 1687 if (so->so_state & SS_SAVEDEOR) { 1688 msg->msg_flags |= MSG_EOR; 1689 so->so_state &= ~SS_SAVEDEOR; 1690 } 1691 } 1692 /* 1693 * If some data was received (i.e. not EOF) and the 1694 * read/recv* has not been satisfied wait for some more. 1695 */ 1696 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1697 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1698 mutex_exit(&so->so_lock); 1699 flags |= MSG_NOMARK; 1700 goto retry; 1701 } 1702 1703 goto out_locked; 1704 } 1705 /* so_queue_msg has already verified length and alignment */ 1706 tpr = (union T_primitives *)mctlp->b_rptr; 1707 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1708 switch (tpr->type) { 1709 case T_DATA_IND: { 1710 /* 1711 * Set msg_flags to MSG_EOR based on 1712 * MORE_flag and MOREDATA. 1713 */ 1714 mutex_enter(&so->so_lock); 1715 so->so_state &= ~SS_SAVEDEOR; 1716 if (!(tpr->data_ind.MORE_flag & 1)) { 1717 if (!(rval.r_val1 & MOREDATA)) 1718 msg->msg_flags |= MSG_EOR; 1719 else 1720 so->so_state |= SS_SAVEDEOR; 1721 } 1722 freemsg(mctlp); 1723 /* 1724 * If some data was received (i.e. not EOF) and the 1725 * read/recv* has not been satisfied wait for some more. 1726 */ 1727 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1728 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1729 mutex_exit(&so->so_lock); 1730 flags |= MSG_NOMARK; 1731 goto retry; 1732 } 1733 goto out_locked; 1734 } 1735 case T_UNITDATA_IND: { 1736 void *addr; 1737 t_uscalar_t addrlen; 1738 void *abuf; 1739 t_uscalar_t optlen; 1740 void *opt; 1741 1742 if (namelen != 0) { 1743 /* Caller wants source address */ 1744 addrlen = tpr->unitdata_ind.SRC_length; 1745 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1746 addrlen, 1); 1747 if (addr == NULL) { 1748 freemsg(mctlp); 1749 error = EPROTO; 1750 eprintsoline(so, error); 1751 goto out; 1752 } 1753 ASSERT(so->so_family != AF_UNIX); 1754 } 1755 optlen = tpr->unitdata_ind.OPT_length; 1756 if (optlen != 0) { 1757 t_uscalar_t ncontrollen; 1758 1759 /* 1760 * Extract any source address option. 1761 * Determine how large cmsg buffer is needed. 1762 */ 1763 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1764 optlen, __TPI_ALIGN_SIZE); 1765 1766 if (opt == NULL) { 1767 freemsg(mctlp); 1768 error = EPROTO; 1769 eprintsoline(so, error); 1770 goto out; 1771 } 1772 if (so->so_family == AF_UNIX) 1773 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1774 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1775 !(flags & MSG_XPG4_2)); 1776 if (controllen != 0) 1777 controllen = ncontrollen; 1778 else if (ncontrollen != 0) 1779 msg->msg_flags |= MSG_CTRUNC; 1780 } else { 1781 controllen = 0; 1782 } 1783 1784 if (namelen != 0) { 1785 /* 1786 * Return address to caller. 1787 * Caller handles truncation if length 1788 * exceeds msg_namelen. 1789 * NOTE: AF_UNIX NUL termination is ensured by 1790 * the sender's copyin_name(). 1791 */ 1792 abuf = kmem_alloc(addrlen, KM_SLEEP); 1793 1794 bcopy(addr, abuf, addrlen); 1795 msg->msg_name = abuf; 1796 msg->msg_namelen = addrlen; 1797 } 1798 1799 if (controllen != 0) { 1800 /* 1801 * Return control msg to caller. 1802 * Caller handles truncation if length 1803 * exceeds msg_controllen. 1804 */ 1805 control = kmem_zalloc(controllen, KM_SLEEP); 1806 1807 error = so_opt2cmsg(mctlp, opt, optlen, 1808 !(flags & MSG_XPG4_2), control, controllen); 1809 if (error) { 1810 freemsg(mctlp); 1811 if (msg->msg_namelen != 0) 1812 kmem_free(msg->msg_name, 1813 msg->msg_namelen); 1814 kmem_free(control, controllen); 1815 eprintsoline(so, error); 1816 goto out; 1817 } 1818 msg->msg_control = control; 1819 msg->msg_controllen = controllen; 1820 } 1821 1822 freemsg(mctlp); 1823 goto out; 1824 } 1825 case T_OPTDATA_IND: { 1826 struct T_optdata_req *tdr; 1827 void *opt; 1828 t_uscalar_t optlen; 1829 1830 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1831 optlen = tdr->OPT_length; 1832 if (optlen != 0) { 1833 t_uscalar_t ncontrollen; 1834 /* 1835 * Determine how large cmsg buffer is needed. 1836 */ 1837 opt = sogetoff(mctlp, 1838 tpr->optdata_ind.OPT_offset, optlen, 1839 __TPI_ALIGN_SIZE); 1840 1841 if (opt == NULL) { 1842 freemsg(mctlp); 1843 error = EPROTO; 1844 eprintsoline(so, error); 1845 goto out; 1846 } 1847 1848 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1849 !(flags & MSG_XPG4_2)); 1850 if (controllen != 0) 1851 controllen = ncontrollen; 1852 else if (ncontrollen != 0) 1853 msg->msg_flags |= MSG_CTRUNC; 1854 } else { 1855 controllen = 0; 1856 } 1857 1858 if (controllen != 0) { 1859 /* 1860 * Return control msg to caller. 1861 * Caller handles truncation if length 1862 * exceeds msg_controllen. 1863 */ 1864 control = kmem_zalloc(controllen, KM_SLEEP); 1865 1866 error = so_opt2cmsg(mctlp, opt, optlen, 1867 !(flags & MSG_XPG4_2), control, controllen); 1868 if (error) { 1869 freemsg(mctlp); 1870 kmem_free(control, controllen); 1871 eprintsoline(so, error); 1872 goto out; 1873 } 1874 msg->msg_control = control; 1875 msg->msg_controllen = controllen; 1876 } 1877 1878 /* 1879 * Set msg_flags to MSG_EOR based on 1880 * DATA_flag and MOREDATA. 1881 */ 1882 mutex_enter(&so->so_lock); 1883 so->so_state &= ~SS_SAVEDEOR; 1884 if (!(tpr->data_ind.MORE_flag & 1)) { 1885 if (!(rval.r_val1 & MOREDATA)) 1886 msg->msg_flags |= MSG_EOR; 1887 else 1888 so->so_state |= SS_SAVEDEOR; 1889 } 1890 freemsg(mctlp); 1891 /* 1892 * If some data was received (i.e. not EOF) and the 1893 * read/recv* has not been satisfied wait for some more. 1894 * Not possible to wait if control info was received. 1895 */ 1896 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1897 controllen == 0 && 1898 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1899 mutex_exit(&so->so_lock); 1900 flags |= MSG_NOMARK; 1901 goto retry; 1902 } 1903 goto out_locked; 1904 } 1905 default: 1906 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1907 tpr->type); 1908 freemsg(mctlp); 1909 error = EPROTO; 1910 ASSERT(0); 1911 } 1912 out: 1913 mutex_enter(&so->so_lock); 1914 out_locked: 1915 ret = sod_rcv_done(so, suiop, uiop); 1916 if (ret != 0 && error == 0) 1917 error = ret; 1918 1919 so_unlock_read(so); /* Clear SOREADLOCKED */ 1920 mutex_exit(&so->so_lock); 1921 1922 SO_UNBLOCK_FALLBACK(so); 1923 1924 return (error); 1925 } 1926 1927 sonodeops_t so_sonodeops = { 1928 so_init, /* sop_init */ 1929 so_accept, /* sop_accept */ 1930 so_bind, /* sop_bind */ 1931 so_listen, /* sop_listen */ 1932 so_connect, /* sop_connect */ 1933 so_recvmsg, /* sop_recvmsg */ 1934 so_sendmsg, /* sop_sendmsg */ 1935 so_sendmblk, /* sop_sendmblk */ 1936 so_getpeername, /* sop_getpeername */ 1937 so_getsockname, /* sop_getsockname */ 1938 so_shutdown, /* sop_shutdown */ 1939 so_getsockopt, /* sop_getsockopt */ 1940 so_setsockopt, /* sop_setsockopt */ 1941 so_ioctl, /* sop_ioctl */ 1942 so_poll, /* sop_poll */ 1943 so_close, /* sop_close */ 1944 }; 1945 1946 sock_upcalls_t so_upcalls = { 1947 so_newconn, 1948 so_connected, 1949 so_disconnected, 1950 so_opctl, 1951 so_queue_msg, 1952 so_set_prop, 1953 so_txq_full, 1954 so_signal_oob, 1955 so_zcopy_notify, 1956 so_set_error, 1957 so_closed 1958 }; 1959