1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/debug.h> 35 #include <sys/cmn_err.h> 36 37 #include <sys/stropts.h> 38 #include <sys/socket.h> 39 #include <sys/socketvar.h> 40 41 #define _SUN_TPI_VERSION 2 42 #include <sys/tihdr.h> 43 #include <sys/sockio.h> 44 #include <sys/kmem_impl.h> 45 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/ddi.h> 49 #include <netinet/in.h> 50 #include <inet/ip.h> 51 52 #include <fs/sockfs/sockcommon.h> 53 #include <fs/sockfs/sockfilter_impl.h> 54 55 #include <sys/socket_proto.h> 56 57 #include <fs/sockfs/socktpi_impl.h> 58 #include <fs/sockfs/sodirect.h> 59 #include <sys/tihdr.h> 60 #include <fs/sockfs/nl7c.h> 61 62 extern int xnet_skip_checks; 63 extern int xnet_check_print; 64 65 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 66 67 68 /*ARGSUSED*/ 69 int 70 so_accept_notsupp(struct sonode *lso, int fflag, 71 struct cred *cr, struct sonode **nsop) 72 { 73 return (EOPNOTSUPP); 74 } 75 76 /*ARGSUSED*/ 77 int 78 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 79 { 80 return (EOPNOTSUPP); 81 } 82 83 /*ARGSUSED*/ 84 int 85 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 86 socklen_t *len, struct cred *cr) 87 { 88 return (EOPNOTSUPP); 89 } 90 91 /*ARGSUSED*/ 92 int 93 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 94 socklen_t *addrlen, boolean_t accept, struct cred *cr) 95 { 96 return (EOPNOTSUPP); 97 } 98 99 /*ARGSUSED*/ 100 int 101 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 102 { 103 return (EOPNOTSUPP); 104 } 105 106 /*ARGSUSED*/ 107 int 108 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 109 struct cred *cr, mblk_t **mpp) 110 { 111 return (EOPNOTSUPP); 112 } 113 114 /* 115 * Generic Socket Ops 116 */ 117 118 /* ARGSUSED */ 119 int 120 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 121 { 122 return (socket_init_common(so, pso, flags, cr)); 123 } 124 125 int 126 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 127 int flags, struct cred *cr) 128 { 129 int error; 130 131 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 132 133 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 134 135 /* X/Open requires this check */ 136 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 137 if (xnet_check_print) { 138 printf("sockfs: X/Open bind state check " 139 "caused EINVAL\n"); 140 } 141 error = EINVAL; 142 goto done; 143 } 144 145 /* 146 * a bind to a NULL address is interpreted as unbind. So just 147 * do the downcall. 148 */ 149 if (name == NULL) 150 goto dobind; 151 152 switch (so->so_family) { 153 case AF_INET: 154 if ((size_t)namelen != sizeof (sin_t)) { 155 error = name->sa_family != so->so_family ? 156 EAFNOSUPPORT : EINVAL; 157 eprintsoline(so, error); 158 goto done; 159 } 160 161 if ((flags & _SOBIND_XPG4_2) && 162 (name->sa_family != so->so_family)) { 163 /* 164 * This check has to be made for X/Open 165 * sockets however application failures have 166 * been observed when it is applied to 167 * all sockets. 168 */ 169 error = EAFNOSUPPORT; 170 eprintsoline(so, error); 171 goto done; 172 } 173 /* 174 * Force a zero sa_family to match so_family. 175 * 176 * Some programs like inetd(1M) don't set the 177 * family field. Other programs leave 178 * sin_family set to garbage - SunOS 4.X does 179 * not check the family field on a bind. 180 * We use the family field that 181 * was passed in to the socket() call. 182 */ 183 name->sa_family = so->so_family; 184 break; 185 186 case AF_INET6: { 187 #ifdef DEBUG 188 sin6_t *sin6 = (sin6_t *)name; 189 #endif 190 if ((size_t)namelen != sizeof (sin6_t)) { 191 error = name->sa_family != so->so_family ? 192 EAFNOSUPPORT : EINVAL; 193 eprintsoline(so, error); 194 goto done; 195 } 196 197 if (name->sa_family != so->so_family) { 198 /* 199 * With IPv6 we require the family to match 200 * unlike in IPv4. 201 */ 202 error = EAFNOSUPPORT; 203 eprintsoline(so, error); 204 goto done; 205 } 206 #ifdef DEBUG 207 /* 208 * Verify that apps don't forget to clear 209 * sin6_scope_id etc 210 */ 211 if (sin6->sin6_scope_id != 0 && 212 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 213 zcmn_err(getzoneid(), CE_WARN, 214 "bind with uninitialized sin6_scope_id " 215 "(%d) on socket. Pid = %d\n", 216 (int)sin6->sin6_scope_id, 217 (int)curproc->p_pid); 218 } 219 if (sin6->__sin6_src_id != 0) { 220 zcmn_err(getzoneid(), CE_WARN, 221 "bind with uninitialized __sin6_src_id " 222 "(%d) on socket. Pid = %d\n", 223 (int)sin6->__sin6_src_id, 224 (int)curproc->p_pid); 225 } 226 #endif /* DEBUG */ 227 228 break; 229 } 230 default: 231 /* Just pass the request to the protocol */ 232 goto dobind; 233 } 234 235 /* 236 * First we check if either NCA or KSSL has been enabled for 237 * the requested address, and if so, we fall back to TPI. 238 * If neither of those two services are enabled, then we just 239 * pass the request to the protocol. 240 * 241 * Note that KSSL can only be enabled on a socket if NCA is NOT 242 * enabled for that socket, hence the else-statement below. 243 */ 244 if (nl7c_enabled && ((so->so_family == AF_INET || 245 so->so_family == AF_INET6) && 246 nl7c_lookup_addr(name, namelen) != NULL)) { 247 /* 248 * NL7C is not supported in non-global zones, 249 * we enforce this restriction here. 250 */ 251 if (so->so_zoneid == GLOBAL_ZONEID) { 252 /* NCA should be used, so fall back to TPI */ 253 error = so_tpi_fallback(so, cr); 254 SO_UNBLOCK_FALLBACK(so); 255 if (error) 256 return (error); 257 else 258 return (SOP_BIND(so, name, namelen, flags, cr)); 259 } 260 } 261 262 dobind: 263 if (so->so_filter_active == 0 || 264 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 265 error = (*so->so_downcalls->sd_bind) 266 (so->so_proto_handle, name, namelen, cr); 267 } 268 done: 269 SO_UNBLOCK_FALLBACK(so); 270 271 return (error); 272 } 273 274 int 275 so_listen(struct sonode *so, int backlog, struct cred *cr) 276 { 277 int error = 0; 278 279 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 280 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 281 282 if ((so)->so_filter_active == 0 || 283 (error = sof_filter_listen(so, &backlog, cr)) < 0) 284 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 285 backlog, cr); 286 287 SO_UNBLOCK_FALLBACK(so); 288 289 return (error); 290 } 291 292 293 int 294 so_connect(struct sonode *so, struct sockaddr *name, 295 socklen_t namelen, int fflag, int flags, struct cred *cr) 296 { 297 int error = 0; 298 sock_connid_t id; 299 300 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 301 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 302 303 /* 304 * If there is a pending error, return error 305 * This can happen if a non blocking operation caused an error. 306 */ 307 308 if (so->so_error != 0) { 309 mutex_enter(&so->so_lock); 310 error = sogeterr(so, B_TRUE); 311 mutex_exit(&so->so_lock); 312 if (error != 0) 313 goto done; 314 } 315 316 if (so->so_filter_active == 0 || 317 (error = sof_filter_connect(so, (struct sockaddr *)name, 318 &namelen, cr)) < 0) { 319 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 320 name, namelen, &id, cr); 321 322 if (error == EINPROGRESS) 323 error = so_wait_connected(so, 324 fflag & (FNONBLOCK|FNDELAY), id); 325 } 326 done: 327 SO_UNBLOCK_FALLBACK(so); 328 return (error); 329 } 330 331 /*ARGSUSED*/ 332 int 333 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 334 { 335 int error = 0; 336 struct sonode *nso; 337 338 *nsop = NULL; 339 340 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 341 if ((so->so_state & SS_ACCEPTCONN) == 0) { 342 SO_UNBLOCK_FALLBACK(so); 343 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 344 EOPNOTSUPP : EINVAL); 345 } 346 347 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 348 &nso)) == 0) { 349 ASSERT(nso != NULL); 350 351 /* finish the accept */ 352 if ((so->so_filter_active > 0 && 353 (error = sof_filter_accept(nso, cr)) > 0) || 354 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 355 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 356 (void) socket_close(nso, 0, cr); 357 socket_destroy(nso); 358 } else { 359 *nsop = nso; 360 } 361 } 362 363 SO_UNBLOCK_FALLBACK(so); 364 return (error); 365 } 366 367 int 368 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 369 struct cred *cr) 370 { 371 int error, flags; 372 boolean_t dontblock; 373 ssize_t orig_resid; 374 mblk_t *mp; 375 376 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 377 378 flags = msg->msg_flags; 379 error = 0; 380 dontblock = (flags & MSG_DONTWAIT) || 381 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 382 383 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 384 /* 385 * Old way of passing fd's is not supported 386 */ 387 SO_UNBLOCK_FALLBACK(so); 388 return (EOPNOTSUPP); 389 } 390 391 if ((so->so_mode & SM_ATOMIC) && 392 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 393 so->so_proto_props.sopp_maxpsz != -1) { 394 SO_UNBLOCK_FALLBACK(so); 395 return (EMSGSIZE); 396 } 397 398 /* 399 * For atomic sends we will only do one iteration. 400 */ 401 do { 402 if (so->so_state & SS_CANTSENDMORE) { 403 error = EPIPE; 404 break; 405 } 406 407 if (so->so_error != 0) { 408 mutex_enter(&so->so_lock); 409 error = sogeterr(so, B_TRUE); 410 mutex_exit(&so->so_lock); 411 if (error != 0) 412 break; 413 } 414 415 /* 416 * Send down OOB messages even if the send path is being 417 * flow controlled (assuming the protocol supports OOB data). 418 */ 419 if (flags & MSG_OOB) { 420 if ((so->so_mode & SM_EXDATA) == 0) { 421 error = EOPNOTSUPP; 422 break; 423 } 424 } else if (SO_SND_FLOWCTRLD(so)) { 425 /* 426 * Need to wait until the protocol is ready to receive 427 * more data for transmission. 428 */ 429 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 430 break; 431 } 432 433 /* 434 * Time to send data to the protocol. We either copy the 435 * data into mblks or pass the uio directly to the protocol. 436 * We decide what to do based on the available down calls. 437 */ 438 if (so->so_downcalls->sd_send_uio != NULL) { 439 error = (*so->so_downcalls->sd_send_uio) 440 (so->so_proto_handle, uiop, msg, cr); 441 if (error != 0) 442 break; 443 } else { 444 /* save the resid in case of failure */ 445 orig_resid = uiop->uio_resid; 446 447 if ((mp = socopyinuio(uiop, 448 so->so_proto_props.sopp_maxpsz, 449 so->so_proto_props.sopp_wroff, 450 so->so_proto_props.sopp_maxblk, 451 so->so_proto_props.sopp_tail, &error)) == NULL) { 452 break; 453 } 454 ASSERT(uiop->uio_resid >= 0); 455 456 if (so->so_filter_active > 0 && 457 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 458 &error)) == NULL)) { 459 if (error != 0) 460 break; 461 continue; 462 } 463 error = (*so->so_downcalls->sd_send) 464 (so->so_proto_handle, mp, msg, cr); 465 if (error != 0) { 466 /* 467 * The send failed. We do not have to free the 468 * mblks, because that is the protocol's 469 * responsibility. However, uio_resid must 470 * remain accurate, so adjust that here. 471 */ 472 uiop->uio_resid = orig_resid; 473 break; 474 } 475 } 476 } while (uiop->uio_resid > 0); 477 478 SO_UNBLOCK_FALLBACK(so); 479 480 return (error); 481 } 482 483 int 484 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 485 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 486 boolean_t fil_inject) 487 { 488 int error; 489 boolean_t dontblock; 490 size_t size; 491 mblk_t *mp = *mpp; 492 493 if (so->so_downcalls->sd_send == NULL) 494 return (EOPNOTSUPP); 495 496 error = 0; 497 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 498 (fflag & (FNONBLOCK|FNDELAY)); 499 size = msgdsize(mp); 500 501 if ((so->so_mode & SM_ATOMIC) && 502 size > so->so_proto_props.sopp_maxpsz && 503 so->so_proto_props.sopp_maxpsz != -1) { 504 SO_UNBLOCK_FALLBACK(so); 505 return (EMSGSIZE); 506 } 507 508 while (mp != NULL) { 509 mblk_t *nmp, *last_mblk; 510 size_t mlen; 511 512 if (so->so_state & SS_CANTSENDMORE) { 513 error = EPIPE; 514 break; 515 } 516 if (so->so_error != 0) { 517 mutex_enter(&so->so_lock); 518 error = sogeterr(so, B_TRUE); 519 mutex_exit(&so->so_lock); 520 if (error != 0) 521 break; 522 } 523 /* Socket filters are not flow controlled */ 524 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 525 /* 526 * Need to wait until the protocol is ready to receive 527 * more data for transmission. 528 */ 529 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 530 break; 531 } 532 533 /* 534 * We only allow so_maxpsz of data to be sent down to 535 * the protocol at time. 536 */ 537 mlen = MBLKL(mp); 538 nmp = mp->b_cont; 539 last_mblk = mp; 540 while (nmp != NULL) { 541 mlen += MBLKL(nmp); 542 if (mlen > so->so_proto_props.sopp_maxpsz) { 543 last_mblk->b_cont = NULL; 544 break; 545 } 546 last_mblk = nmp; 547 nmp = nmp->b_cont; 548 } 549 550 if (so->so_filter_active > 0 && 551 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 552 cr, &error)) == NULL) { 553 *mpp = mp = nmp; 554 if (error != 0) 555 break; 556 continue; 557 } 558 error = (*so->so_downcalls->sd_send) 559 (so->so_proto_handle, mp, msg, cr); 560 if (error != 0) { 561 /* 562 * The send failed. The protocol will free the mblks 563 * that were sent down. Let the caller deal with the 564 * rest. 565 */ 566 *mpp = nmp; 567 break; 568 } 569 570 *mpp = mp = nmp; 571 } 572 /* Let the filter know whether the protocol is flow controlled */ 573 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 574 error = ENOSPC; 575 576 return (error); 577 } 578 579 #pragma inline(so_sendmblk_impl) 580 581 int 582 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 583 struct cred *cr, mblk_t **mpp) 584 { 585 int error; 586 587 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 588 589 if ((so->so_mode & SM_SENDFILESUPP) == 0) { 590 SO_UNBLOCK_FALLBACK(so); 591 return (EOPNOTSUPP); 592 } 593 594 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 595 B_FALSE); 596 597 SO_UNBLOCK_FALLBACK(so); 598 599 return (error); 600 } 601 602 int 603 so_shutdown(struct sonode *so, int how, struct cred *cr) 604 { 605 int error; 606 607 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 608 609 /* 610 * SunOS 4.X has no check for datagram sockets. 611 * 5.X checks that it is connected (ENOTCONN) 612 * X/Open requires that we check the connected state. 613 */ 614 if (!(so->so_state & SS_ISCONNECTED)) { 615 if (!xnet_skip_checks) { 616 error = ENOTCONN; 617 if (xnet_check_print) { 618 printf("sockfs: X/Open shutdown check " 619 "caused ENOTCONN\n"); 620 } 621 } 622 goto done; 623 } 624 625 if (so->so_filter_active == 0 || 626 (error = sof_filter_shutdown(so, &how, cr)) < 0) 627 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 628 how, cr)); 629 630 /* 631 * Protocol agreed to shutdown. We need to flush the 632 * receive buffer if the receive side is being shutdown. 633 */ 634 if (error == 0 && how != SHUT_WR) { 635 mutex_enter(&so->so_lock); 636 /* wait for active reader to finish */ 637 (void) so_lock_read(so, 0); 638 639 so_rcv_flush(so); 640 641 so_unlock_read(so); 642 mutex_exit(&so->so_lock); 643 } 644 645 done: 646 SO_UNBLOCK_FALLBACK(so); 647 return (error); 648 } 649 650 int 651 so_getsockname(struct sonode *so, struct sockaddr *addr, 652 socklen_t *addrlen, struct cred *cr) 653 { 654 int error; 655 656 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 657 658 if (so->so_filter_active == 0 || 659 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 660 error = (*so->so_downcalls->sd_getsockname) 661 (so->so_proto_handle, addr, addrlen, cr); 662 663 SO_UNBLOCK_FALLBACK(so); 664 return (error); 665 } 666 667 int 668 so_getpeername(struct sonode *so, struct sockaddr *addr, 669 socklen_t *addrlen, boolean_t accept, struct cred *cr) 670 { 671 int error; 672 673 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 674 675 if (accept) { 676 error = (*so->so_downcalls->sd_getpeername) 677 (so->so_proto_handle, addr, addrlen, cr); 678 } else if (!(so->so_state & SS_ISCONNECTED)) { 679 error = ENOTCONN; 680 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 681 /* Added this check for X/Open */ 682 error = EINVAL; 683 if (xnet_check_print) { 684 printf("sockfs: X/Open getpeername check => EINVAL\n"); 685 } 686 } else if (so->so_filter_active == 0 || 687 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 688 error = (*so->so_downcalls->sd_getpeername) 689 (so->so_proto_handle, addr, addrlen, cr); 690 } 691 692 SO_UNBLOCK_FALLBACK(so); 693 return (error); 694 } 695 696 int 697 so_getsockopt(struct sonode *so, int level, int option_name, 698 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 699 { 700 int error = 0; 701 702 if (level == SOL_FILTER) 703 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 704 705 SO_BLOCK_FALLBACK(so, 706 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 707 708 if ((so->so_filter_active == 0 || 709 (error = sof_filter_getsockopt(so, level, option_name, optval, 710 optlenp, cr)) < 0) && 711 (error = socket_getopt_common(so, level, option_name, optval, 712 optlenp, flags)) < 0) { 713 error = (*so->so_downcalls->sd_getsockopt) 714 (so->so_proto_handle, level, option_name, optval, optlenp, 715 cr); 716 if (error == ENOPROTOOPT) { 717 if (level == SOL_SOCKET) { 718 /* 719 * If a protocol does not support a particular 720 * socket option, set can fail (not allowed) 721 * but get can not fail. This is the previous 722 * sockfs bahvior. 723 */ 724 switch (option_name) { 725 case SO_LINGER: 726 if (*optlenp < (t_uscalar_t) 727 sizeof (struct linger)) { 728 error = EINVAL; 729 break; 730 } 731 error = 0; 732 bzero(optval, sizeof (struct linger)); 733 *optlenp = sizeof (struct linger); 734 break; 735 case SO_RCVTIMEO: 736 case SO_SNDTIMEO: 737 if (*optlenp < (t_uscalar_t) 738 sizeof (struct timeval)) { 739 error = EINVAL; 740 break; 741 } 742 error = 0; 743 bzero(optval, sizeof (struct timeval)); 744 *optlenp = sizeof (struct timeval); 745 break; 746 case SO_SND_BUFINFO: 747 if (*optlenp < (t_uscalar_t) 748 sizeof (struct so_snd_bufinfo)) { 749 error = EINVAL; 750 break; 751 } 752 error = 0; 753 bzero(optval, 754 sizeof (struct so_snd_bufinfo)); 755 *optlenp = 756 sizeof (struct so_snd_bufinfo); 757 break; 758 case SO_DEBUG: 759 case SO_REUSEADDR: 760 case SO_KEEPALIVE: 761 case SO_DONTROUTE: 762 case SO_BROADCAST: 763 case SO_USELOOPBACK: 764 case SO_OOBINLINE: 765 case SO_DGRAM_ERRIND: 766 case SO_SNDBUF: 767 case SO_RCVBUF: 768 error = 0; 769 *((int32_t *)optval) = 0; 770 *optlenp = sizeof (int32_t); 771 break; 772 default: 773 break; 774 } 775 } 776 } 777 } 778 779 SO_UNBLOCK_FALLBACK(so); 780 return (error); 781 } 782 783 int 784 so_setsockopt(struct sonode *so, int level, int option_name, 785 const void *optval, socklen_t optlen, struct cred *cr) 786 { 787 int error = 0; 788 struct timeval tl; 789 const void *opt = optval; 790 791 if (level == SOL_FILTER) 792 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 793 794 SO_BLOCK_FALLBACK(so, 795 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 796 797 /* X/Open requires this check */ 798 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 799 SO_UNBLOCK_FALLBACK(so); 800 if (xnet_check_print) 801 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 802 return (EINVAL); 803 } 804 805 if (so->so_filter_active > 0 && 806 (error = sof_filter_setsockopt(so, level, option_name, 807 (void *)optval, &optlen, cr)) >= 0) 808 goto done; 809 810 if (level == SOL_SOCKET) { 811 switch (option_name) { 812 case SO_RCVTIMEO: 813 case SO_SNDTIMEO: { 814 /* 815 * We pass down these two options to protocol in order 816 * to support some third part protocols which need to 817 * know them. For those protocols which don't care 818 * these two options, simply return 0. 819 */ 820 clock_t t_usec; 821 822 if (get_udatamodel() == DATAMODEL_NONE || 823 get_udatamodel() == DATAMODEL_NATIVE) { 824 if (optlen != sizeof (struct timeval)) { 825 error = EINVAL; 826 goto done; 827 } 828 bcopy((struct timeval *)optval, &tl, 829 sizeof (struct timeval)); 830 } else { 831 if (optlen != sizeof (struct timeval32)) { 832 error = EINVAL; 833 goto done; 834 } 835 TIMEVAL32_TO_TIMEVAL(&tl, 836 (struct timeval32 *)optval); 837 } 838 opt = &tl; 839 optlen = sizeof (tl); 840 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 841 mutex_enter(&so->so_lock); 842 if (option_name == SO_RCVTIMEO) 843 so->so_rcvtimeo = drv_usectohz(t_usec); 844 else 845 so->so_sndtimeo = drv_usectohz(t_usec); 846 mutex_exit(&so->so_lock); 847 break; 848 } 849 case SO_RCVBUF: 850 /* 851 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 852 * sockfs since the transport might adjust the value 853 * and not return exactly what was set by the 854 * application. 855 */ 856 so->so_xpg_rcvbuf = *(int32_t *)optval; 857 break; 858 } 859 } 860 error = (*so->so_downcalls->sd_setsockopt) 861 (so->so_proto_handle, level, option_name, opt, optlen, cr); 862 done: 863 SO_UNBLOCK_FALLBACK(so); 864 return (error); 865 } 866 867 int 868 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 869 struct cred *cr, int32_t *rvalp) 870 { 871 int error = 0; 872 873 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 874 875 /* 876 * If there is a pending error, return error 877 * This can happen if a non blocking operation caused an error. 878 */ 879 if (so->so_error != 0) { 880 mutex_enter(&so->so_lock); 881 error = sogeterr(so, B_TRUE); 882 mutex_exit(&so->so_lock); 883 if (error != 0) 884 goto done; 885 } 886 887 /* 888 * calling strioc can result in the socket falling back to TPI, 889 * if that is supported. 890 */ 891 if ((so->so_filter_active == 0 || 892 (error = sof_filter_ioctl(so, cmd, arg, mode, 893 rvalp, cr)) < 0) && 894 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 895 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 896 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 897 cmd, arg, mode, rvalp, cr); 898 } 899 900 done: 901 SO_UNBLOCK_FALLBACK(so); 902 903 return (error); 904 } 905 906 int 907 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 908 struct pollhead **phpp) 909 { 910 int state = so->so_state, mask; 911 *reventsp = 0; 912 913 /* 914 * In sockets the errors are represented as input/output events 915 */ 916 if (so->so_error != 0 && 917 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 918 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 919 return (0); 920 } 921 922 /* 923 * If the socket is in a state where it can send data 924 * turn on POLLWRBAND and POLLOUT events. 925 */ 926 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 927 /* 928 * out of band data is allowed even if the connection 929 * is flow controlled 930 */ 931 *reventsp |= POLLWRBAND & events; 932 if (!SO_SND_FLOWCTRLD(so)) { 933 /* 934 * As long as there is buffer to send data 935 * turn on POLLOUT events 936 */ 937 *reventsp |= POLLOUT & events; 938 } 939 } 940 941 /* 942 * Turn on POLLIN whenever there is data on the receive queue, 943 * or the socket is in a state where no more data will be received. 944 * Also, if the socket is accepting connections, flip the bit if 945 * there is something on the queue. 946 * 947 * We do an initial check for events without holding locks. However, 948 * if there are no event available, then we redo the check for POLLIN 949 * events under the lock. 950 */ 951 952 /* Pending connections */ 953 if (!list_is_empty(&so->so_acceptq_list)) 954 *reventsp |= (POLLIN|POLLRDNORM) & events; 955 956 /* Data */ 957 /* so_downcalls is null for sctp */ 958 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 959 *reventsp |= (*so->so_downcalls->sd_poll) 960 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 961 CRED()) & events; 962 ASSERT((*reventsp & ~events) == 0); 963 /* do not recheck events */ 964 events &= ~SO_PROTO_POLLEV; 965 } else { 966 if (SO_HAVE_DATA(so)) 967 *reventsp |= (POLLIN|POLLRDNORM) & events; 968 969 /* Urgent data */ 970 if ((state & SS_OOBPEND) != 0) { 971 *reventsp |= (POLLRDBAND | POLLPRI) & events; 972 } 973 974 /* 975 * If the socket has become disconnected, we set POLLHUP. 976 * Note that if we are in this state, we will have set POLLIN 977 * (SO_HAVE_DATA() is true on a disconnected socket), but not 978 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with 979 * the semantics of POLLHUP, which is defined to be mutually 980 * exclusive with respect to POLLOUT but not POLLIN. We are 981 * therefore setting POLLHUP primarily for the benefit of 982 * those not polling on POLLIN, as they have no other way of 983 * knowing that the socket has been disconnected. 984 */ 985 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG; 986 987 if ((state & (mask | SS_ISCONNECTED)) == mask) 988 *reventsp |= POLLHUP; 989 } 990 991 if (!*reventsp && !anyyet) { 992 /* Check for read events again, but this time under lock */ 993 if (events & (POLLIN|POLLRDNORM)) { 994 mutex_enter(&so->so_lock); 995 if (SO_HAVE_DATA(so) || 996 !list_is_empty(&so->so_acceptq_list)) { 997 mutex_exit(&so->so_lock); 998 *reventsp |= (POLLIN|POLLRDNORM) & events; 999 return (0); 1000 } else { 1001 so->so_pollev |= SO_POLLEV_IN; 1002 mutex_exit(&so->so_lock); 1003 } 1004 } 1005 *phpp = &so->so_poll_list; 1006 } 1007 return (0); 1008 } 1009 1010 /* 1011 * Generic Upcalls 1012 */ 1013 void 1014 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 1015 cred_t *peer_cred, pid_t peer_cpid) 1016 { 1017 struct sonode *so = (struct sonode *)sock_handle; 1018 1019 mutex_enter(&so->so_lock); 1020 ASSERT(so->so_proto_handle != NULL); 1021 1022 if (peer_cred != NULL) { 1023 if (so->so_peercred != NULL) 1024 crfree(so->so_peercred); 1025 crhold(peer_cred); 1026 so->so_peercred = peer_cred; 1027 so->so_cpid = peer_cpid; 1028 } 1029 1030 so->so_proto_connid = id; 1031 soisconnected(so); 1032 /* 1033 * Wake ones who're waiting for conn to become established. 1034 */ 1035 so_notify_connected(so); 1036 } 1037 1038 int 1039 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1040 { 1041 struct sonode *so = (struct sonode *)sock_handle; 1042 boolean_t connect_failed; 1043 1044 mutex_enter(&so->so_lock); 1045 1046 /* 1047 * If we aren't currently connected, then this isn't a disconnect but 1048 * rather a failure to connect. 1049 */ 1050 connect_failed = !(so->so_state & SS_ISCONNECTED); 1051 1052 so->so_proto_connid = id; 1053 soisdisconnected(so, error); 1054 so_notify_disconnected(so, connect_failed, error); 1055 1056 return (0); 1057 } 1058 1059 void 1060 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1061 uintptr_t arg) 1062 { 1063 struct sonode *so = (struct sonode *)sock_handle; 1064 1065 switch (action) { 1066 case SOCK_OPCTL_SHUT_SEND: 1067 mutex_enter(&so->so_lock); 1068 socantsendmore(so); 1069 so_notify_disconnecting(so); 1070 break; 1071 case SOCK_OPCTL_SHUT_RECV: { 1072 mutex_enter(&so->so_lock); 1073 socantrcvmore(so); 1074 so_notify_eof(so); 1075 break; 1076 } 1077 case SOCK_OPCTL_ENAB_ACCEPT: 1078 mutex_enter(&so->so_lock); 1079 so->so_state |= SS_ACCEPTCONN; 1080 so->so_backlog = (unsigned int)arg; 1081 /* 1082 * The protocol can stop generating newconn upcalls when 1083 * the backlog is full, so to make sure the listener does 1084 * not end up with a queue full of deferred connections 1085 * we reduce the backlog by one. Thus the listener will 1086 * start closing deferred connections before the backlog 1087 * is full. 1088 */ 1089 if (so->so_filter_active > 0) 1090 so->so_backlog = MAX(1, so->so_backlog - 1); 1091 mutex_exit(&so->so_lock); 1092 break; 1093 default: 1094 ASSERT(0); 1095 break; 1096 } 1097 } 1098 1099 void 1100 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1101 { 1102 struct sonode *so = (struct sonode *)sock_handle; 1103 1104 if (qfull) { 1105 so_snd_qfull(so); 1106 } else { 1107 so_snd_qnotfull(so); 1108 mutex_enter(&so->so_lock); 1109 /* so_notify_writable drops so_lock */ 1110 so_notify_writable(so); 1111 } 1112 } 1113 1114 sock_upper_handle_t 1115 so_newconn(sock_upper_handle_t parenthandle, 1116 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1117 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1118 { 1119 struct sonode *so = (struct sonode *)parenthandle; 1120 struct sonode *nso; 1121 int error; 1122 1123 ASSERT(proto_handle != NULL); 1124 1125 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1126 (so->so_acceptq_len >= so->so_backlog && 1127 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1128 return (NULL); 1129 } 1130 1131 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1132 &error); 1133 if (nso == NULL) 1134 return (NULL); 1135 1136 if (peer_cred != NULL) { 1137 crhold(peer_cred); 1138 nso->so_peercred = peer_cred; 1139 nso->so_cpid = peer_cpid; 1140 } 1141 nso->so_listener = so; 1142 1143 /* 1144 * The new socket (nso), proto_handle and sock_upcallsp are all 1145 * valid at this point. But as soon as nso is placed in the accept 1146 * queue that can no longer be assumed (since an accept() thread may 1147 * pull it off the queue and close the socket). 1148 */ 1149 *sock_upcallsp = &so_upcalls; 1150 1151 mutex_enter(&so->so_acceptq_lock); 1152 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1153 mutex_exit(&so->so_acceptq_lock); 1154 ASSERT(nso->so_count == 1); 1155 nso->so_count--; 1156 nso->so_listener = NULL; 1157 /* drop proto ref */ 1158 VN_RELE(SOTOV(nso)); 1159 socket_destroy(nso); 1160 return (NULL); 1161 } else { 1162 so->so_acceptq_len++; 1163 if (nso->so_state & SS_FIL_DEFER) { 1164 list_insert_tail(&so->so_acceptq_defer, nso); 1165 mutex_exit(&so->so_acceptq_lock); 1166 } else { 1167 list_insert_tail(&so->so_acceptq_list, nso); 1168 cv_signal(&so->so_acceptq_cv); 1169 mutex_exit(&so->so_acceptq_lock); 1170 mutex_enter(&so->so_lock); 1171 so_notify_newconn(so); 1172 } 1173 1174 return ((sock_upper_handle_t)nso); 1175 } 1176 } 1177 1178 void 1179 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1180 { 1181 struct sonode *so; 1182 1183 so = (struct sonode *)sock_handle; 1184 1185 mutex_enter(&so->so_lock); 1186 1187 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1188 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1189 if (soppp->sopp_flags & SOCKOPT_WROFF) 1190 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1191 if (soppp->sopp_flags & SOCKOPT_TAIL) 1192 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1193 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1194 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1195 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1196 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1197 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1198 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1199 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1200 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1201 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1202 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1203 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1204 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1205 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1206 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1207 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1208 } 1209 1210 if (soppp->sopp_zcopyflag & COPYCACHED) { 1211 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1212 } 1213 } 1214 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1215 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1216 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1217 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1218 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1219 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1220 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1221 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1222 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1223 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1224 1225 mutex_exit(&so->so_lock); 1226 1227 if (so->so_filter_active > 0) { 1228 sof_instance_t *inst; 1229 ssize_t maxblk; 1230 ushort_t wroff, tail; 1231 maxblk = so->so_proto_props.sopp_maxblk; 1232 wroff = so->so_proto_props.sopp_wroff; 1233 tail = so->so_proto_props.sopp_tail; 1234 for (inst = so->so_filter_bottom; inst != NULL; 1235 inst = inst->sofi_prev) { 1236 if (SOF_INTERESTED(inst, mblk_prop)) { 1237 (*inst->sofi_ops->sofop_mblk_prop)( 1238 (sof_handle_t)inst, inst->sofi_cookie, 1239 &maxblk, &wroff, &tail); 1240 } 1241 } 1242 mutex_enter(&so->so_lock); 1243 so->so_proto_props.sopp_maxblk = maxblk; 1244 so->so_proto_props.sopp_wroff = wroff; 1245 so->so_proto_props.sopp_tail = tail; 1246 mutex_exit(&so->so_lock); 1247 } 1248 #ifdef DEBUG 1249 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1250 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1251 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1252 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1253 SOCKOPT_LOOPBACK); 1254 ASSERT(soppp->sopp_flags == 0); 1255 #endif 1256 } 1257 1258 /* ARGSUSED */ 1259 ssize_t 1260 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1261 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1262 sof_instance_t *filter) 1263 { 1264 boolean_t force_push = B_TRUE; 1265 int space_left; 1266 sodirect_t *sodp = so->so_direct; 1267 1268 ASSERT(errorp != NULL); 1269 *errorp = 0; 1270 if (mp == NULL) { 1271 if (so->so_downcalls->sd_recv_uio != NULL) { 1272 mutex_enter(&so->so_lock); 1273 /* the notify functions will drop the lock */ 1274 if (flags & MSG_OOB) 1275 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1276 else 1277 so_notify_data(so, msg_size); 1278 return (0); 1279 } 1280 ASSERT(msg_size == 0); 1281 mutex_enter(&so->so_lock); 1282 goto space_check; 1283 } 1284 1285 ASSERT(mp->b_next == NULL); 1286 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1287 ASSERT(msg_size == msgdsize(mp)); 1288 1289 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1290 /* The read pointer is not aligned correctly for TPI */ 1291 zcmn_err(getzoneid(), CE_WARN, 1292 "sockfs: Unaligned TPI message received. rptr = %p\n", 1293 (void *)mp->b_rptr); 1294 freemsg(mp); 1295 mutex_enter(&so->so_lock); 1296 if (sodp != NULL) 1297 SOD_UIOAFINI(sodp); 1298 goto space_check; 1299 } 1300 1301 if (so->so_filter_active > 0) { 1302 for (; filter != NULL; filter = filter->sofi_prev) { 1303 if (!SOF_INTERESTED(filter, data_in)) 1304 continue; 1305 mp = (*filter->sofi_ops->sofop_data_in)( 1306 (sof_handle_t)filter, filter->sofi_cookie, mp, 1307 flags, &msg_size); 1308 ASSERT(msgdsize(mp) == msg_size); 1309 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1310 (mblk_t *), mp); 1311 /* Data was consumed/dropped, just do space check */ 1312 if (msg_size == 0) { 1313 mutex_enter(&so->so_lock); 1314 goto space_check; 1315 } 1316 } 1317 } 1318 1319 if (flags & MSG_OOB) { 1320 so_queue_oob(so, mp, msg_size); 1321 mutex_enter(&so->so_lock); 1322 goto space_check; 1323 } 1324 1325 if (force_pushp != NULL) 1326 force_push = *force_pushp; 1327 1328 mutex_enter(&so->so_lock); 1329 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1330 if (sodp != NULL) 1331 SOD_DISABLE(sodp); 1332 mutex_exit(&so->so_lock); 1333 *errorp = EOPNOTSUPP; 1334 return (-1); 1335 } 1336 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1337 freemsg(mp); 1338 if (sodp != NULL) 1339 SOD_DISABLE(sodp); 1340 mutex_exit(&so->so_lock); 1341 return (0); 1342 } 1343 1344 /* process the mblk via I/OAT if capable */ 1345 if (sodp != NULL && sodp->sod_enabled) { 1346 if (DB_TYPE(mp) == M_DATA) { 1347 sod_uioa_mblk_init(sodp, mp, msg_size); 1348 } else { 1349 SOD_UIOAFINI(sodp); 1350 } 1351 } 1352 1353 if (mp->b_next == NULL) { 1354 so_enqueue_msg(so, mp, msg_size); 1355 } else { 1356 do { 1357 mblk_t *nmp; 1358 1359 if ((nmp = mp->b_next) != NULL) { 1360 mp->b_next = NULL; 1361 } 1362 so_enqueue_msg(so, mp, msgdsize(mp)); 1363 mp = nmp; 1364 } while (mp != NULL); 1365 } 1366 1367 space_left = so->so_rcvbuf - so->so_rcv_queued; 1368 if (space_left <= 0) { 1369 so->so_flowctrld = B_TRUE; 1370 *errorp = ENOSPC; 1371 space_left = -1; 1372 } 1373 1374 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1375 so->so_rcv_queued >= so->so_rcv_wanted) { 1376 SOCKET_TIMER_CANCEL(so); 1377 /* 1378 * so_notify_data will release the lock 1379 */ 1380 so_notify_data(so, so->so_rcv_queued); 1381 1382 if (force_pushp != NULL) 1383 *force_pushp = B_TRUE; 1384 goto done; 1385 } else if (so->so_rcv_timer_tid == 0) { 1386 /* Make sure the recv push timer is running */ 1387 SOCKET_TIMER_START(so); 1388 } 1389 1390 done_unlock: 1391 mutex_exit(&so->so_lock); 1392 done: 1393 return (space_left); 1394 1395 space_check: 1396 space_left = so->so_rcvbuf - so->so_rcv_queued; 1397 if (space_left <= 0) { 1398 so->so_flowctrld = B_TRUE; 1399 *errorp = ENOSPC; 1400 space_left = -1; 1401 } 1402 goto done_unlock; 1403 } 1404 1405 #pragma inline(so_queue_msg_impl) 1406 1407 ssize_t 1408 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1409 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1410 { 1411 struct sonode *so = (struct sonode *)sock_handle; 1412 1413 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1414 so->so_filter_bottom)); 1415 } 1416 1417 /* 1418 * Set the offset of where the oob data is relative to the bytes in 1419 * queued. Also generate SIGURG 1420 */ 1421 void 1422 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1423 { 1424 struct sonode *so; 1425 1426 ASSERT(offset >= 0); 1427 so = (struct sonode *)sock_handle; 1428 mutex_enter(&so->so_lock); 1429 if (so->so_direct != NULL) 1430 SOD_UIOAFINI(so->so_direct); 1431 1432 /* 1433 * New urgent data on the way so forget about any old 1434 * urgent data. 1435 */ 1436 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1437 1438 /* 1439 * Record that urgent data is pending. 1440 */ 1441 so->so_state |= SS_OOBPEND; 1442 1443 if (so->so_oobmsg != NULL) { 1444 dprintso(so, 1, ("sock: discarding old oob\n")); 1445 freemsg(so->so_oobmsg); 1446 so->so_oobmsg = NULL; 1447 } 1448 1449 /* 1450 * set the offset where the urgent byte is 1451 */ 1452 so->so_oobmark = so->so_rcv_queued + offset; 1453 if (so->so_oobmark == 0) 1454 so->so_state |= SS_RCVATMARK; 1455 else 1456 so->so_state &= ~SS_RCVATMARK; 1457 1458 so_notify_oobsig(so); 1459 } 1460 1461 /* 1462 * Queue the OOB byte 1463 */ 1464 static void 1465 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1466 { 1467 mutex_enter(&so->so_lock); 1468 if (so->so_direct != NULL) 1469 SOD_UIOAFINI(so->so_direct); 1470 1471 ASSERT(mp != NULL); 1472 if (!IS_SO_OOB_INLINE(so)) { 1473 so->so_oobmsg = mp; 1474 so->so_state |= SS_HAVEOOBDATA; 1475 } else { 1476 so_enqueue_msg(so, mp, len); 1477 } 1478 1479 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1480 } 1481 1482 int 1483 so_close(struct sonode *so, int flag, struct cred *cr) 1484 { 1485 int error; 1486 1487 /* 1488 * No new data will be enqueued once the CLOSING flag is set. 1489 */ 1490 mutex_enter(&so->so_lock); 1491 so->so_state |= SS_CLOSING; 1492 ASSERT(so_verify_oobstate(so)); 1493 so_rcv_flush(so); 1494 mutex_exit(&so->so_lock); 1495 1496 if (so->so_filter_active > 0) 1497 sof_sonode_closing(so); 1498 1499 if (so->so_state & SS_ACCEPTCONN) { 1500 /* 1501 * We grab and release the accept lock to ensure that any 1502 * thread about to insert a socket in so_newconn completes 1503 * before we flush the queue. Any thread calling so_newconn 1504 * after we drop the lock will observe the SS_CLOSING flag, 1505 * which will stop it from inserting the socket in the queue. 1506 */ 1507 mutex_enter(&so->so_acceptq_lock); 1508 mutex_exit(&so->so_acceptq_lock); 1509 1510 so_acceptq_flush(so, B_TRUE); 1511 } 1512 1513 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1514 switch (error) { 1515 default: 1516 /* Protocol made a synchronous close; remove proto ref */ 1517 VN_RELE(SOTOV(so)); 1518 break; 1519 case EINPROGRESS: 1520 /* 1521 * Protocol is in the process of closing, it will make a 1522 * 'closed' upcall to remove the reference. 1523 */ 1524 error = 0; 1525 break; 1526 } 1527 1528 return (error); 1529 } 1530 1531 /* 1532 * Upcall made by the protocol when it's doing an asynchronous close. It 1533 * will drop the protocol's reference on the socket. 1534 */ 1535 void 1536 so_closed(sock_upper_handle_t sock_handle) 1537 { 1538 struct sonode *so = (struct sonode *)sock_handle; 1539 1540 VN_RELE(SOTOV(so)); 1541 } 1542 1543 void 1544 so_zcopy_notify(sock_upper_handle_t sock_handle) 1545 { 1546 struct sonode *so = (struct sonode *)sock_handle; 1547 1548 mutex_enter(&so->so_lock); 1549 so->so_copyflag |= STZCNOTIFY; 1550 cv_broadcast(&so->so_copy_cv); 1551 mutex_exit(&so->so_lock); 1552 } 1553 1554 void 1555 so_set_error(sock_upper_handle_t sock_handle, int error) 1556 { 1557 struct sonode *so = (struct sonode *)sock_handle; 1558 1559 mutex_enter(&so->so_lock); 1560 1561 soseterror(so, error); 1562 1563 so_notify_error(so); 1564 } 1565 1566 /* 1567 * so_recvmsg - read data from the socket 1568 * 1569 * There are two ways of obtaining data; either we ask the protocol to 1570 * copy directly into the supplied buffer, or we copy data from the 1571 * sonode's receive queue. The decision which one to use depends on 1572 * whether the protocol has a sd_recv_uio down call. 1573 */ 1574 int 1575 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1576 struct cred *cr) 1577 { 1578 rval_t rval; 1579 int flags = 0; 1580 t_uscalar_t controllen, namelen; 1581 int error = 0; 1582 int ret; 1583 mblk_t *mctlp = NULL; 1584 union T_primitives *tpr; 1585 void *control; 1586 ssize_t saved_resid; 1587 struct uio *suiop; 1588 1589 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1590 1591 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1592 (so->so_mode & SM_CONNREQUIRED)) { 1593 SO_UNBLOCK_FALLBACK(so); 1594 return (ENOTCONN); 1595 } 1596 1597 if (msg->msg_flags & MSG_PEEK) 1598 msg->msg_flags &= ~MSG_WAITALL; 1599 1600 if (so->so_mode & SM_ATOMIC) 1601 msg->msg_flags |= MSG_TRUNC; 1602 1603 if (msg->msg_flags & MSG_OOB) { 1604 if ((so->so_mode & SM_EXDATA) == 0) { 1605 error = EOPNOTSUPP; 1606 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1607 error = (*so->so_downcalls->sd_recv_uio) 1608 (so->so_proto_handle, uiop, msg, cr); 1609 } else { 1610 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1611 IS_SO_OOB_INLINE(so)); 1612 } 1613 SO_UNBLOCK_FALLBACK(so); 1614 return (error); 1615 } 1616 1617 /* 1618 * If the protocol has the recv down call, then pass the request 1619 * down. 1620 */ 1621 if (so->so_downcalls->sd_recv_uio != NULL) { 1622 error = (*so->so_downcalls->sd_recv_uio) 1623 (so->so_proto_handle, uiop, msg, cr); 1624 SO_UNBLOCK_FALLBACK(so); 1625 return (error); 1626 } 1627 1628 /* 1629 * Reading data from the socket buffer 1630 */ 1631 flags = msg->msg_flags; 1632 msg->msg_flags = 0; 1633 1634 /* 1635 * Set msg_controllen and msg_namelen to zero here to make it 1636 * simpler in the cases that no control or name is returned. 1637 */ 1638 controllen = msg->msg_controllen; 1639 namelen = msg->msg_namelen; 1640 msg->msg_controllen = 0; 1641 msg->msg_namelen = 0; 1642 1643 mutex_enter(&so->so_lock); 1644 /* Set SOREADLOCKED */ 1645 error = so_lock_read_intr(so, 1646 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1647 mutex_exit(&so->so_lock); 1648 if (error) { 1649 SO_UNBLOCK_FALLBACK(so); 1650 return (error); 1651 } 1652 1653 suiop = sod_rcv_init(so, flags, &uiop); 1654 retry: 1655 saved_resid = uiop->uio_resid; 1656 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1657 if (error != 0) { 1658 goto out; 1659 } 1660 /* 1661 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1662 * For non-datagrams MOREDATA is used to set MSG_EOR. 1663 */ 1664 ASSERT(!(rval.r_val1 & MORECTL)); 1665 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1666 msg->msg_flags |= MSG_TRUNC; 1667 if (mctlp == NULL) { 1668 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1669 1670 mutex_enter(&so->so_lock); 1671 /* Set MSG_EOR based on MOREDATA */ 1672 if (!(rval.r_val1 & MOREDATA)) { 1673 if (so->so_state & SS_SAVEDEOR) { 1674 msg->msg_flags |= MSG_EOR; 1675 so->so_state &= ~SS_SAVEDEOR; 1676 } 1677 } 1678 /* 1679 * If some data was received (i.e. not EOF) and the 1680 * read/recv* has not been satisfied wait for some more. 1681 */ 1682 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1683 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1684 mutex_exit(&so->so_lock); 1685 flags |= MSG_NOMARK; 1686 goto retry; 1687 } 1688 1689 goto out_locked; 1690 } 1691 /* so_queue_msg has already verified length and alignment */ 1692 tpr = (union T_primitives *)mctlp->b_rptr; 1693 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1694 switch (tpr->type) { 1695 case T_DATA_IND: { 1696 /* 1697 * Set msg_flags to MSG_EOR based on 1698 * MORE_flag and MOREDATA. 1699 */ 1700 mutex_enter(&so->so_lock); 1701 so->so_state &= ~SS_SAVEDEOR; 1702 if (!(tpr->data_ind.MORE_flag & 1)) { 1703 if (!(rval.r_val1 & MOREDATA)) 1704 msg->msg_flags |= MSG_EOR; 1705 else 1706 so->so_state |= SS_SAVEDEOR; 1707 } 1708 freemsg(mctlp); 1709 /* 1710 * If some data was received (i.e. not EOF) and the 1711 * read/recv* has not been satisfied wait for some more. 1712 */ 1713 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1714 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1715 mutex_exit(&so->so_lock); 1716 flags |= MSG_NOMARK; 1717 goto retry; 1718 } 1719 goto out_locked; 1720 } 1721 case T_UNITDATA_IND: { 1722 void *addr; 1723 t_uscalar_t addrlen; 1724 void *abuf; 1725 t_uscalar_t optlen; 1726 void *opt; 1727 1728 if (namelen != 0) { 1729 /* Caller wants source address */ 1730 addrlen = tpr->unitdata_ind.SRC_length; 1731 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1732 addrlen, 1); 1733 if (addr == NULL) { 1734 freemsg(mctlp); 1735 error = EPROTO; 1736 eprintsoline(so, error); 1737 goto out; 1738 } 1739 ASSERT(so->so_family != AF_UNIX); 1740 } 1741 optlen = tpr->unitdata_ind.OPT_length; 1742 if (optlen != 0) { 1743 t_uscalar_t ncontrollen; 1744 1745 /* 1746 * Extract any source address option. 1747 * Determine how large cmsg buffer is needed. 1748 */ 1749 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1750 optlen, __TPI_ALIGN_SIZE); 1751 1752 if (opt == NULL) { 1753 freemsg(mctlp); 1754 error = EPROTO; 1755 eprintsoline(so, error); 1756 goto out; 1757 } 1758 if (so->so_family == AF_UNIX) 1759 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1760 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1761 !(flags & MSG_XPG4_2)); 1762 if (controllen != 0) 1763 controllen = ncontrollen; 1764 else if (ncontrollen != 0) 1765 msg->msg_flags |= MSG_CTRUNC; 1766 } else { 1767 controllen = 0; 1768 } 1769 1770 if (namelen != 0) { 1771 /* 1772 * Return address to caller. 1773 * Caller handles truncation if length 1774 * exceeds msg_namelen. 1775 * NOTE: AF_UNIX NUL termination is ensured by 1776 * the sender's copyin_name(). 1777 */ 1778 abuf = kmem_alloc(addrlen, KM_SLEEP); 1779 1780 bcopy(addr, abuf, addrlen); 1781 msg->msg_name = abuf; 1782 msg->msg_namelen = addrlen; 1783 } 1784 1785 if (controllen != 0) { 1786 /* 1787 * Return control msg to caller. 1788 * Caller handles truncation if length 1789 * exceeds msg_controllen. 1790 */ 1791 control = kmem_zalloc(controllen, KM_SLEEP); 1792 1793 error = so_opt2cmsg(mctlp, opt, optlen, 1794 !(flags & MSG_XPG4_2), control, controllen); 1795 if (error) { 1796 freemsg(mctlp); 1797 if (msg->msg_namelen != 0) 1798 kmem_free(msg->msg_name, 1799 msg->msg_namelen); 1800 kmem_free(control, controllen); 1801 eprintsoline(so, error); 1802 goto out; 1803 } 1804 msg->msg_control = control; 1805 msg->msg_controllen = controllen; 1806 } 1807 1808 freemsg(mctlp); 1809 goto out; 1810 } 1811 case T_OPTDATA_IND: { 1812 struct T_optdata_req *tdr; 1813 void *opt; 1814 t_uscalar_t optlen; 1815 1816 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1817 optlen = tdr->OPT_length; 1818 if (optlen != 0) { 1819 t_uscalar_t ncontrollen; 1820 /* 1821 * Determine how large cmsg buffer is needed. 1822 */ 1823 opt = sogetoff(mctlp, 1824 tpr->optdata_ind.OPT_offset, optlen, 1825 __TPI_ALIGN_SIZE); 1826 1827 if (opt == NULL) { 1828 freemsg(mctlp); 1829 error = EPROTO; 1830 eprintsoline(so, error); 1831 goto out; 1832 } 1833 1834 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1835 !(flags & MSG_XPG4_2)); 1836 if (controllen != 0) 1837 controllen = ncontrollen; 1838 else if (ncontrollen != 0) 1839 msg->msg_flags |= MSG_CTRUNC; 1840 } else { 1841 controllen = 0; 1842 } 1843 1844 if (controllen != 0) { 1845 /* 1846 * Return control msg to caller. 1847 * Caller handles truncation if length 1848 * exceeds msg_controllen. 1849 */ 1850 control = kmem_zalloc(controllen, KM_SLEEP); 1851 1852 error = so_opt2cmsg(mctlp, opt, optlen, 1853 !(flags & MSG_XPG4_2), control, controllen); 1854 if (error) { 1855 freemsg(mctlp); 1856 kmem_free(control, controllen); 1857 eprintsoline(so, error); 1858 goto out; 1859 } 1860 msg->msg_control = control; 1861 msg->msg_controllen = controllen; 1862 } 1863 1864 /* 1865 * Set msg_flags to MSG_EOR based on 1866 * DATA_flag and MOREDATA. 1867 */ 1868 mutex_enter(&so->so_lock); 1869 so->so_state &= ~SS_SAVEDEOR; 1870 if (!(tpr->data_ind.MORE_flag & 1)) { 1871 if (!(rval.r_val1 & MOREDATA)) 1872 msg->msg_flags |= MSG_EOR; 1873 else 1874 so->so_state |= SS_SAVEDEOR; 1875 } 1876 freemsg(mctlp); 1877 /* 1878 * If some data was received (i.e. not EOF) and the 1879 * read/recv* has not been satisfied wait for some more. 1880 * Not possible to wait if control info was received. 1881 */ 1882 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1883 controllen == 0 && 1884 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1885 mutex_exit(&so->so_lock); 1886 flags |= MSG_NOMARK; 1887 goto retry; 1888 } 1889 goto out_locked; 1890 } 1891 default: 1892 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1893 tpr->type); 1894 freemsg(mctlp); 1895 error = EPROTO; 1896 ASSERT(0); 1897 } 1898 out: 1899 mutex_enter(&so->so_lock); 1900 out_locked: 1901 ret = sod_rcv_done(so, suiop, uiop); 1902 if (ret != 0 && error == 0) 1903 error = ret; 1904 1905 so_unlock_read(so); /* Clear SOREADLOCKED */ 1906 mutex_exit(&so->so_lock); 1907 1908 SO_UNBLOCK_FALLBACK(so); 1909 1910 return (error); 1911 } 1912 1913 sonodeops_t so_sonodeops = { 1914 so_init, /* sop_init */ 1915 so_accept, /* sop_accept */ 1916 so_bind, /* sop_bind */ 1917 so_listen, /* sop_listen */ 1918 so_connect, /* sop_connect */ 1919 so_recvmsg, /* sop_recvmsg */ 1920 so_sendmsg, /* sop_sendmsg */ 1921 so_sendmblk, /* sop_sendmblk */ 1922 so_getpeername, /* sop_getpeername */ 1923 so_getsockname, /* sop_getsockname */ 1924 so_shutdown, /* sop_shutdown */ 1925 so_getsockopt, /* sop_getsockopt */ 1926 so_setsockopt, /* sop_setsockopt */ 1927 so_ioctl, /* sop_ioctl */ 1928 so_poll, /* sop_poll */ 1929 so_close, /* sop_close */ 1930 }; 1931 1932 sock_upcalls_t so_upcalls = { 1933 so_newconn, 1934 so_connected, 1935 so_disconnected, 1936 so_opctl, 1937 so_queue_msg, 1938 so_set_prop, 1939 so_txq_full, 1940 so_signal_oob, 1941 so_zcopy_notify, 1942 so_set_error, 1943 so_closed 1944 }; 1945