1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/sysmacros.h> 30 #include <sys/debug.h> 31 #include <sys/cmn_err.h> 32 33 #include <sys/stropts.h> 34 #include <sys/socket.h> 35 #include <sys/socketvar.h> 36 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/sockio.h> 40 #include <sys/kmem_impl.h> 41 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/ddi.h> 45 #include <netinet/in.h> 46 #include <inet/ip.h> 47 48 #include <fs/sockfs/sockcommon.h> 49 #include <fs/sockfs/sockfilter_impl.h> 50 51 #include <sys/socket_proto.h> 52 53 #include <fs/sockfs/socktpi_impl.h> 54 #include <fs/sockfs/sodirect.h> 55 #include <sys/tihdr.h> 56 #include <fs/sockfs/nl7c.h> 57 #include <inet/kssl/ksslapi.h> 58 59 60 extern int xnet_skip_checks; 61 extern int xnet_check_print; 62 63 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 64 65 66 /*ARGSUSED*/ 67 int 68 so_accept_notsupp(struct sonode *lso, int fflag, 69 struct cred *cr, struct sonode **nsop) 70 { 71 return (EOPNOTSUPP); 72 } 73 74 /*ARGSUSED*/ 75 int 76 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 77 { 78 return (EOPNOTSUPP); 79 } 80 81 /*ARGSUSED*/ 82 int 83 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 84 socklen_t *len, struct cred *cr) 85 { 86 return (EOPNOTSUPP); 87 } 88 89 /*ARGSUSED*/ 90 int 91 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 92 socklen_t *addrlen, boolean_t accept, struct cred *cr) 93 { 94 return (EOPNOTSUPP); 95 } 96 97 /*ARGSUSED*/ 98 int 99 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 100 { 101 return (EOPNOTSUPP); 102 } 103 104 /*ARGSUSED*/ 105 int 106 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 107 struct cred *cr, mblk_t **mpp) 108 { 109 return (EOPNOTSUPP); 110 } 111 112 /* 113 * Generic Socket Ops 114 */ 115 116 /* ARGSUSED */ 117 int 118 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 119 { 120 return (socket_init_common(so, pso, flags, cr)); 121 } 122 123 int 124 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 125 int flags, struct cred *cr) 126 { 127 int error; 128 129 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 130 131 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 132 133 /* X/Open requires this check */ 134 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 135 if (xnet_check_print) { 136 printf("sockfs: X/Open bind state check " 137 "caused EINVAL\n"); 138 } 139 error = EINVAL; 140 goto done; 141 } 142 143 /* 144 * a bind to a NULL address is interpreted as unbind. So just 145 * do the downcall. 146 */ 147 if (name == NULL) 148 goto dobind; 149 150 switch (so->so_family) { 151 case AF_INET: 152 if ((size_t)namelen != sizeof (sin_t)) { 153 error = name->sa_family != so->so_family ? 154 EAFNOSUPPORT : EINVAL; 155 eprintsoline(so, error); 156 goto done; 157 } 158 159 if ((flags & _SOBIND_XPG4_2) && 160 (name->sa_family != so->so_family)) { 161 /* 162 * This check has to be made for X/Open 163 * sockets however application failures have 164 * been observed when it is applied to 165 * all sockets. 166 */ 167 error = EAFNOSUPPORT; 168 eprintsoline(so, error); 169 goto done; 170 } 171 /* 172 * Force a zero sa_family to match so_family. 173 * 174 * Some programs like inetd(1M) don't set the 175 * family field. Other programs leave 176 * sin_family set to garbage - SunOS 4.X does 177 * not check the family field on a bind. 178 * We use the family field that 179 * was passed in to the socket() call. 180 */ 181 name->sa_family = so->so_family; 182 break; 183 184 case AF_INET6: { 185 #ifdef DEBUG 186 sin6_t *sin6 = (sin6_t *)name; 187 #endif 188 if ((size_t)namelen != sizeof (sin6_t)) { 189 error = name->sa_family != so->so_family ? 190 EAFNOSUPPORT : EINVAL; 191 eprintsoline(so, error); 192 goto done; 193 } 194 195 if (name->sa_family != so->so_family) { 196 /* 197 * With IPv6 we require the family to match 198 * unlike in IPv4. 199 */ 200 error = EAFNOSUPPORT; 201 eprintsoline(so, error); 202 goto done; 203 } 204 #ifdef DEBUG 205 /* 206 * Verify that apps don't forget to clear 207 * sin6_scope_id etc 208 */ 209 if (sin6->sin6_scope_id != 0 && 210 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 211 zcmn_err(getzoneid(), CE_WARN, 212 "bind with uninitialized sin6_scope_id " 213 "(%d) on socket. Pid = %d\n", 214 (int)sin6->sin6_scope_id, 215 (int)curproc->p_pid); 216 } 217 if (sin6->__sin6_src_id != 0) { 218 zcmn_err(getzoneid(), CE_WARN, 219 "bind with uninitialized __sin6_src_id " 220 "(%d) on socket. Pid = %d\n", 221 (int)sin6->__sin6_src_id, 222 (int)curproc->p_pid); 223 } 224 #endif /* DEBUG */ 225 226 break; 227 } 228 default: 229 /* Just pass the request to the protocol */ 230 goto dobind; 231 } 232 233 /* 234 * First we check if either NCA or KSSL has been enabled for 235 * the requested address, and if so, we fall back to TPI. 236 * If neither of those two services are enabled, then we just 237 * pass the request to the protocol. 238 * 239 * Note that KSSL can only be enabled on a socket if NCA is NOT 240 * enabled for that socket, hence the else-statement below. 241 */ 242 if (nl7c_enabled && ((so->so_family == AF_INET || 243 so->so_family == AF_INET6) && 244 nl7c_lookup_addr(name, namelen) != NULL)) { 245 /* 246 * NL7C is not supported in non-global zones, 247 * we enforce this restriction here. 248 */ 249 if (so->so_zoneid == GLOBAL_ZONEID) { 250 /* NCA should be used, so fall back to TPI */ 251 error = so_tpi_fallback(so, cr); 252 SO_UNBLOCK_FALLBACK(so); 253 if (error) 254 return (error); 255 else 256 return (SOP_BIND(so, name, namelen, flags, cr)); 257 } 258 } else if (so->so_type == SOCK_STREAM) { 259 /* Check if KSSL has been configured for this address */ 260 kssl_ent_t ent; 261 kssl_endpt_type_t type; 262 struct T_bind_req bind_req; 263 mblk_t *mp; 264 265 /* 266 * TODO: Check with KSSL team if we could add a function call 267 * that only queries whether KSSL is enabled for the given 268 * address. 269 */ 270 bind_req.PRIM_type = T_BIND_REQ; 271 bind_req.ADDR_length = namelen; 272 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 273 mp = soallocproto2(&bind_req, sizeof (bind_req), 274 name, namelen, 0, _ALLOC_SLEEP, cr); 275 276 type = kssl_check_proxy(mp, so, &ent); 277 freemsg(mp); 278 279 if (type != KSSL_NO_PROXY) { 280 /* 281 * KSSL has been configured for this address, so 282 * we must fall back to TPI. 283 */ 284 kssl_release_ent(ent, so, type); 285 error = so_tpi_fallback(so, cr); 286 SO_UNBLOCK_FALLBACK(so); 287 if (error) 288 return (error); 289 else 290 return (SOP_BIND(so, name, namelen, flags, cr)); 291 } 292 } 293 294 dobind: 295 if (so->so_filter_active == 0 || 296 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 297 error = (*so->so_downcalls->sd_bind) 298 (so->so_proto_handle, name, namelen, cr); 299 } 300 done: 301 SO_UNBLOCK_FALLBACK(so); 302 303 return (error); 304 } 305 306 int 307 so_listen(struct sonode *so, int backlog, struct cred *cr) 308 { 309 int error = 0; 310 311 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 312 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 313 314 if ((so)->so_filter_active == 0 || 315 (error = sof_filter_listen(so, &backlog, cr)) < 0) 316 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 317 backlog, cr); 318 319 SO_UNBLOCK_FALLBACK(so); 320 321 return (error); 322 } 323 324 325 int 326 so_connect(struct sonode *so, struct sockaddr *name, 327 socklen_t namelen, int fflag, int flags, struct cred *cr) 328 { 329 int error = 0; 330 sock_connid_t id; 331 332 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 333 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 334 335 /* 336 * If there is a pending error, return error 337 * This can happen if a non blocking operation caused an error. 338 */ 339 340 if (so->so_error != 0) { 341 mutex_enter(&so->so_lock); 342 error = sogeterr(so, B_TRUE); 343 mutex_exit(&so->so_lock); 344 if (error != 0) 345 goto done; 346 } 347 348 if (so->so_filter_active == 0 || 349 (error = sof_filter_connect(so, (struct sockaddr *)name, 350 &namelen, cr)) < 0) { 351 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 352 name, namelen, &id, cr); 353 354 if (error == EINPROGRESS) 355 error = so_wait_connected(so, 356 fflag & (FNONBLOCK|FNDELAY), id); 357 } 358 done: 359 SO_UNBLOCK_FALLBACK(so); 360 return (error); 361 } 362 363 /*ARGSUSED*/ 364 int 365 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 366 { 367 int error = 0; 368 struct sonode *nso; 369 370 *nsop = NULL; 371 372 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 373 if ((so->so_state & SS_ACCEPTCONN) == 0) { 374 SO_UNBLOCK_FALLBACK(so); 375 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 376 EOPNOTSUPP : EINVAL); 377 } 378 379 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 380 &nso)) == 0) { 381 ASSERT(nso != NULL); 382 383 /* finish the accept */ 384 if ((so->so_filter_active > 0 && 385 (error = sof_filter_accept(nso, cr)) > 0) || 386 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 387 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 388 (void) socket_close(nso, 0, cr); 389 socket_destroy(nso); 390 } else { 391 *nsop = nso; 392 } 393 } 394 395 SO_UNBLOCK_FALLBACK(so); 396 return (error); 397 } 398 399 int 400 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 401 struct cred *cr) 402 { 403 int error, flags; 404 boolean_t dontblock; 405 ssize_t orig_resid; 406 mblk_t *mp; 407 408 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 409 410 flags = msg->msg_flags; 411 error = 0; 412 dontblock = (flags & MSG_DONTWAIT) || 413 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 414 415 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 416 /* 417 * Old way of passing fd's is not supported 418 */ 419 SO_UNBLOCK_FALLBACK(so); 420 return (EOPNOTSUPP); 421 } 422 423 if ((so->so_mode & SM_ATOMIC) && 424 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 425 so->so_proto_props.sopp_maxpsz != -1) { 426 SO_UNBLOCK_FALLBACK(so); 427 return (EMSGSIZE); 428 } 429 430 /* 431 * For atomic sends we will only do one iteration. 432 */ 433 do { 434 if (so->so_state & SS_CANTSENDMORE) { 435 error = EPIPE; 436 break; 437 } 438 439 if (so->so_error != 0) { 440 mutex_enter(&so->so_lock); 441 error = sogeterr(so, B_TRUE); 442 mutex_exit(&so->so_lock); 443 if (error != 0) 444 break; 445 } 446 447 /* 448 * Send down OOB messages even if the send path is being 449 * flow controlled (assuming the protocol supports OOB data). 450 */ 451 if (flags & MSG_OOB) { 452 if ((so->so_mode & SM_EXDATA) == 0) { 453 error = EOPNOTSUPP; 454 break; 455 } 456 } else if (SO_SND_FLOWCTRLD(so)) { 457 /* 458 * Need to wait until the protocol is ready to receive 459 * more data for transmission. 460 */ 461 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 462 break; 463 } 464 465 /* 466 * Time to send data to the protocol. We either copy the 467 * data into mblks or pass the uio directly to the protocol. 468 * We decide what to do based on the available down calls. 469 */ 470 if (so->so_downcalls->sd_send_uio != NULL) { 471 error = (*so->so_downcalls->sd_send_uio) 472 (so->so_proto_handle, uiop, msg, cr); 473 if (error != 0) 474 break; 475 } else { 476 /* save the resid in case of failure */ 477 orig_resid = uiop->uio_resid; 478 479 if ((mp = socopyinuio(uiop, 480 so->so_proto_props.sopp_maxpsz, 481 so->so_proto_props.sopp_wroff, 482 so->so_proto_props.sopp_maxblk, 483 so->so_proto_props.sopp_tail, &error)) == NULL) { 484 break; 485 } 486 ASSERT(uiop->uio_resid >= 0); 487 488 if (so->so_filter_active > 0 && 489 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 490 &error)) == NULL)) { 491 if (error != 0) 492 break; 493 continue; 494 } 495 error = (*so->so_downcalls->sd_send) 496 (so->so_proto_handle, mp, msg, cr); 497 if (error != 0) { 498 /* 499 * The send failed. We do not have to free the 500 * mblks, because that is the protocol's 501 * responsibility. However, uio_resid must 502 * remain accurate, so adjust that here. 503 */ 504 uiop->uio_resid = orig_resid; 505 break; 506 } 507 } 508 } while (uiop->uio_resid > 0); 509 510 SO_UNBLOCK_FALLBACK(so); 511 512 return (error); 513 } 514 515 int 516 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 517 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 518 boolean_t fil_inject) 519 { 520 int error; 521 boolean_t dontblock; 522 size_t size; 523 mblk_t *mp = *mpp; 524 525 if (so->so_downcalls->sd_send == NULL) 526 return (EOPNOTSUPP); 527 528 error = 0; 529 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 530 (fflag & (FNONBLOCK|FNDELAY)); 531 size = msgdsize(mp); 532 533 if ((so->so_mode & SM_ATOMIC) && 534 size > so->so_proto_props.sopp_maxpsz && 535 so->so_proto_props.sopp_maxpsz != -1) { 536 SO_UNBLOCK_FALLBACK(so); 537 return (EMSGSIZE); 538 } 539 540 while (mp != NULL) { 541 mblk_t *nmp, *last_mblk; 542 size_t mlen; 543 544 if (so->so_state & SS_CANTSENDMORE) { 545 error = EPIPE; 546 break; 547 } 548 if (so->so_error != 0) { 549 mutex_enter(&so->so_lock); 550 error = sogeterr(so, B_TRUE); 551 mutex_exit(&so->so_lock); 552 if (error != 0) 553 break; 554 } 555 /* Socket filters are not flow controlled */ 556 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 557 /* 558 * Need to wait until the protocol is ready to receive 559 * more data for transmission. 560 */ 561 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 562 break; 563 } 564 565 /* 566 * We only allow so_maxpsz of data to be sent down to 567 * the protocol at time. 568 */ 569 mlen = MBLKL(mp); 570 nmp = mp->b_cont; 571 last_mblk = mp; 572 while (nmp != NULL) { 573 mlen += MBLKL(nmp); 574 if (mlen > so->so_proto_props.sopp_maxpsz) { 575 last_mblk->b_cont = NULL; 576 break; 577 } 578 last_mblk = nmp; 579 nmp = nmp->b_cont; 580 } 581 582 if (so->so_filter_active > 0 && 583 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 584 cr, &error)) == NULL) { 585 *mpp = mp = nmp; 586 if (error != 0) 587 break; 588 continue; 589 } 590 error = (*so->so_downcalls->sd_send) 591 (so->so_proto_handle, mp, msg, cr); 592 if (error != 0) { 593 /* 594 * The send failed. The protocol will free the mblks 595 * that were sent down. Let the caller deal with the 596 * rest. 597 */ 598 *mpp = nmp; 599 break; 600 } 601 602 *mpp = mp = nmp; 603 } 604 /* Let the filter know whether the protocol is flow controlled */ 605 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 606 error = ENOSPC; 607 608 return (error); 609 } 610 611 #pragma inline(so_sendmblk_impl) 612 613 int 614 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 615 struct cred *cr, mblk_t **mpp) 616 { 617 int error; 618 619 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 620 621 if ((so->so_mode & SM_SENDFILESUPP) == 0) { 622 SO_UNBLOCK_FALLBACK(so); 623 return (EOPNOTSUPP); 624 } 625 626 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 627 B_FALSE); 628 629 SO_UNBLOCK_FALLBACK(so); 630 631 return (error); 632 } 633 634 int 635 so_shutdown(struct sonode *so, int how, struct cred *cr) 636 { 637 int error; 638 639 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 640 641 /* 642 * SunOS 4.X has no check for datagram sockets. 643 * 5.X checks that it is connected (ENOTCONN) 644 * X/Open requires that we check the connected state. 645 */ 646 if (!(so->so_state & SS_ISCONNECTED)) { 647 if (!xnet_skip_checks) { 648 error = ENOTCONN; 649 if (xnet_check_print) { 650 printf("sockfs: X/Open shutdown check " 651 "caused ENOTCONN\n"); 652 } 653 } 654 goto done; 655 } 656 657 if (so->so_filter_active == 0 || 658 (error = sof_filter_shutdown(so, &how, cr)) < 0) 659 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 660 how, cr)); 661 662 /* 663 * Protocol agreed to shutdown. We need to flush the 664 * receive buffer if the receive side is being shutdown. 665 */ 666 if (error == 0 && how != SHUT_WR) { 667 mutex_enter(&so->so_lock); 668 /* wait for active reader to finish */ 669 (void) so_lock_read(so, 0); 670 671 so_rcv_flush(so); 672 673 so_unlock_read(so); 674 mutex_exit(&so->so_lock); 675 } 676 677 done: 678 SO_UNBLOCK_FALLBACK(so); 679 return (error); 680 } 681 682 int 683 so_getsockname(struct sonode *so, struct sockaddr *addr, 684 socklen_t *addrlen, struct cred *cr) 685 { 686 int error; 687 688 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 689 690 if (so->so_filter_active == 0 || 691 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 692 error = (*so->so_downcalls->sd_getsockname) 693 (so->so_proto_handle, addr, addrlen, cr); 694 695 SO_UNBLOCK_FALLBACK(so); 696 return (error); 697 } 698 699 int 700 so_getpeername(struct sonode *so, struct sockaddr *addr, 701 socklen_t *addrlen, boolean_t accept, struct cred *cr) 702 { 703 int error; 704 705 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 706 707 if (accept) { 708 error = (*so->so_downcalls->sd_getpeername) 709 (so->so_proto_handle, addr, addrlen, cr); 710 } else if (!(so->so_state & SS_ISCONNECTED)) { 711 error = ENOTCONN; 712 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 713 /* Added this check for X/Open */ 714 error = EINVAL; 715 if (xnet_check_print) { 716 printf("sockfs: X/Open getpeername check => EINVAL\n"); 717 } 718 } else if (so->so_filter_active == 0 || 719 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 720 error = (*so->so_downcalls->sd_getpeername) 721 (so->so_proto_handle, addr, addrlen, cr); 722 } 723 724 SO_UNBLOCK_FALLBACK(so); 725 return (error); 726 } 727 728 int 729 so_getsockopt(struct sonode *so, int level, int option_name, 730 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 731 { 732 int error = 0; 733 734 if (level == SOL_FILTER) 735 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 736 737 SO_BLOCK_FALLBACK(so, 738 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 739 740 if ((so->so_filter_active == 0 || 741 (error = sof_filter_getsockopt(so, level, option_name, optval, 742 optlenp, cr)) < 0) && 743 (error = socket_getopt_common(so, level, option_name, optval, 744 optlenp, flags)) < 0) { 745 error = (*so->so_downcalls->sd_getsockopt) 746 (so->so_proto_handle, level, option_name, optval, optlenp, 747 cr); 748 if (error == ENOPROTOOPT) { 749 if (level == SOL_SOCKET) { 750 /* 751 * If a protocol does not support a particular 752 * socket option, set can fail (not allowed) 753 * but get can not fail. This is the previous 754 * sockfs bahvior. 755 */ 756 switch (option_name) { 757 case SO_LINGER: 758 if (*optlenp < (t_uscalar_t) 759 sizeof (struct linger)) { 760 error = EINVAL; 761 break; 762 } 763 error = 0; 764 bzero(optval, sizeof (struct linger)); 765 *optlenp = sizeof (struct linger); 766 break; 767 case SO_RCVTIMEO: 768 case SO_SNDTIMEO: 769 if (*optlenp < (t_uscalar_t) 770 sizeof (struct timeval)) { 771 error = EINVAL; 772 break; 773 } 774 error = 0; 775 bzero(optval, sizeof (struct timeval)); 776 *optlenp = sizeof (struct timeval); 777 break; 778 case SO_SND_BUFINFO: 779 if (*optlenp < (t_uscalar_t) 780 sizeof (struct so_snd_bufinfo)) { 781 error = EINVAL; 782 break; 783 } 784 error = 0; 785 bzero(optval, 786 sizeof (struct so_snd_bufinfo)); 787 *optlenp = 788 sizeof (struct so_snd_bufinfo); 789 break; 790 case SO_DEBUG: 791 case SO_REUSEADDR: 792 case SO_KEEPALIVE: 793 case SO_DONTROUTE: 794 case SO_BROADCAST: 795 case SO_USELOOPBACK: 796 case SO_OOBINLINE: 797 case SO_DGRAM_ERRIND: 798 case SO_SNDBUF: 799 case SO_RCVBUF: 800 error = 0; 801 *((int32_t *)optval) = 0; 802 *optlenp = sizeof (int32_t); 803 break; 804 default: 805 break; 806 } 807 } 808 } 809 } 810 811 SO_UNBLOCK_FALLBACK(so); 812 return (error); 813 } 814 815 int 816 so_setsockopt(struct sonode *so, int level, int option_name, 817 const void *optval, socklen_t optlen, struct cred *cr) 818 { 819 int error = 0; 820 struct timeval tl; 821 const void *opt = optval; 822 823 if (level == SOL_FILTER) 824 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 825 826 SO_BLOCK_FALLBACK(so, 827 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 828 829 /* X/Open requires this check */ 830 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 831 SO_UNBLOCK_FALLBACK(so); 832 if (xnet_check_print) 833 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 834 return (EINVAL); 835 } 836 837 if (so->so_filter_active > 0 && 838 (error = sof_filter_setsockopt(so, level, option_name, 839 (void *)optval, &optlen, cr)) >= 0) 840 goto done; 841 842 if (level == SOL_SOCKET) { 843 switch (option_name) { 844 case SO_RCVTIMEO: 845 case SO_SNDTIMEO: { 846 /* 847 * We pass down these two options to protocol in order 848 * to support some third part protocols which need to 849 * know them. For those protocols which don't care 850 * these two options, simply return 0. 851 */ 852 clock_t t_usec; 853 854 if (get_udatamodel() == DATAMODEL_NONE || 855 get_udatamodel() == DATAMODEL_NATIVE) { 856 if (optlen != sizeof (struct timeval)) { 857 error = EINVAL; 858 goto done; 859 } 860 bcopy((struct timeval *)optval, &tl, 861 sizeof (struct timeval)); 862 } else { 863 if (optlen != sizeof (struct timeval32)) { 864 error = EINVAL; 865 goto done; 866 } 867 TIMEVAL32_TO_TIMEVAL(&tl, 868 (struct timeval32 *)optval); 869 } 870 opt = &tl; 871 optlen = sizeof (tl); 872 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 873 mutex_enter(&so->so_lock); 874 if (option_name == SO_RCVTIMEO) 875 so->so_rcvtimeo = drv_usectohz(t_usec); 876 else 877 so->so_sndtimeo = drv_usectohz(t_usec); 878 mutex_exit(&so->so_lock); 879 break; 880 } 881 case SO_RCVBUF: 882 /* 883 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 884 * sockfs since the transport might adjust the value 885 * and not return exactly what was set by the 886 * application. 887 */ 888 so->so_xpg_rcvbuf = *(int32_t *)optval; 889 break; 890 } 891 } 892 error = (*so->so_downcalls->sd_setsockopt) 893 (so->so_proto_handle, level, option_name, opt, optlen, cr); 894 done: 895 SO_UNBLOCK_FALLBACK(so); 896 return (error); 897 } 898 899 int 900 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 901 struct cred *cr, int32_t *rvalp) 902 { 903 int error = 0; 904 905 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 906 907 /* 908 * If there is a pending error, return error 909 * This can happen if a non blocking operation caused an error. 910 */ 911 if (so->so_error != 0) { 912 mutex_enter(&so->so_lock); 913 error = sogeterr(so, B_TRUE); 914 mutex_exit(&so->so_lock); 915 if (error != 0) 916 goto done; 917 } 918 919 /* 920 * calling strioc can result in the socket falling back to TPI, 921 * if that is supported. 922 */ 923 if ((so->so_filter_active == 0 || 924 (error = sof_filter_ioctl(so, cmd, arg, mode, 925 rvalp, cr)) < 0) && 926 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 927 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 928 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 929 cmd, arg, mode, rvalp, cr); 930 } 931 932 done: 933 SO_UNBLOCK_FALLBACK(so); 934 935 return (error); 936 } 937 938 int 939 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 940 struct pollhead **phpp) 941 { 942 int state = so->so_state; 943 *reventsp = 0; 944 945 /* 946 * In sockets the errors are represented as input/output events 947 */ 948 if (so->so_error != 0 && 949 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 950 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 951 return (0); 952 } 953 954 /* 955 * If the socket is in a state where it can send data 956 * turn on POLLWRBAND and POLLOUT events. 957 */ 958 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 959 /* 960 * out of band data is allowed even if the connection 961 * is flow controlled 962 */ 963 *reventsp |= POLLWRBAND & events; 964 if (!SO_SND_FLOWCTRLD(so)) { 965 /* 966 * As long as there is buffer to send data 967 * turn on POLLOUT events 968 */ 969 *reventsp |= POLLOUT & events; 970 } 971 } 972 973 /* 974 * Turn on POLLIN whenever there is data on the receive queue, 975 * or the socket is in a state where no more data will be received. 976 * Also, if the socket is accepting connections, flip the bit if 977 * there is something on the queue. 978 * 979 * We do an initial check for events without holding locks. However, 980 * if there are no event available, then we redo the check for POLLIN 981 * events under the lock. 982 */ 983 984 /* Pending connections */ 985 if (!list_is_empty(&so->so_acceptq_list)) 986 *reventsp |= (POLLIN|POLLRDNORM) & events; 987 988 /* Data */ 989 /* so_downcalls is null for sctp */ 990 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 991 *reventsp |= (*so->so_downcalls->sd_poll) 992 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 993 CRED()) & events; 994 ASSERT((*reventsp & ~events) == 0); 995 /* do not recheck events */ 996 events &= ~SO_PROTO_POLLEV; 997 } else { 998 if (SO_HAVE_DATA(so)) 999 *reventsp |= (POLLIN|POLLRDNORM) & events; 1000 1001 /* Urgent data */ 1002 if ((state & SS_OOBPEND) != 0) { 1003 *reventsp |= (POLLRDBAND | POLLPRI) & events; 1004 } 1005 } 1006 1007 if (!*reventsp && !anyyet) { 1008 /* Check for read events again, but this time under lock */ 1009 if (events & (POLLIN|POLLRDNORM)) { 1010 mutex_enter(&so->so_lock); 1011 if (SO_HAVE_DATA(so) || 1012 !list_is_empty(&so->so_acceptq_list)) { 1013 mutex_exit(&so->so_lock); 1014 *reventsp |= (POLLIN|POLLRDNORM) & events; 1015 return (0); 1016 } else { 1017 so->so_pollev |= SO_POLLEV_IN; 1018 mutex_exit(&so->so_lock); 1019 } 1020 } 1021 *phpp = &so->so_poll_list; 1022 } 1023 return (0); 1024 } 1025 1026 /* 1027 * Generic Upcalls 1028 */ 1029 void 1030 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 1031 cred_t *peer_cred, pid_t peer_cpid) 1032 { 1033 struct sonode *so = (struct sonode *)sock_handle; 1034 1035 mutex_enter(&so->so_lock); 1036 ASSERT(so->so_proto_handle != NULL); 1037 1038 if (peer_cred != NULL) { 1039 if (so->so_peercred != NULL) 1040 crfree(so->so_peercred); 1041 crhold(peer_cred); 1042 so->so_peercred = peer_cred; 1043 so->so_cpid = peer_cpid; 1044 } 1045 1046 so->so_proto_connid = id; 1047 soisconnected(so); 1048 /* 1049 * Wake ones who're waiting for conn to become established. 1050 */ 1051 so_notify_connected(so); 1052 } 1053 1054 int 1055 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1056 { 1057 struct sonode *so = (struct sonode *)sock_handle; 1058 boolean_t connect_failed; 1059 1060 mutex_enter(&so->so_lock); 1061 connect_failed = so->so_state & SS_ISCONNECTED; 1062 so->so_proto_connid = id; 1063 soisdisconnected(so, error); 1064 so_notify_disconnected(so, connect_failed, error); 1065 1066 return (0); 1067 } 1068 1069 void 1070 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1071 uintptr_t arg) 1072 { 1073 struct sonode *so = (struct sonode *)sock_handle; 1074 1075 switch (action) { 1076 case SOCK_OPCTL_SHUT_SEND: 1077 mutex_enter(&so->so_lock); 1078 socantsendmore(so); 1079 so_notify_disconnecting(so); 1080 break; 1081 case SOCK_OPCTL_SHUT_RECV: { 1082 mutex_enter(&so->so_lock); 1083 socantrcvmore(so); 1084 so_notify_eof(so); 1085 break; 1086 } 1087 case SOCK_OPCTL_ENAB_ACCEPT: 1088 mutex_enter(&so->so_lock); 1089 so->so_state |= SS_ACCEPTCONN; 1090 so->so_backlog = (unsigned int)arg; 1091 /* 1092 * The protocol can stop generating newconn upcalls when 1093 * the backlog is full, so to make sure the listener does 1094 * not end up with a queue full of deferred connections 1095 * we reduce the backlog by one. Thus the listener will 1096 * start closing deferred connections before the backlog 1097 * is full. 1098 */ 1099 if (so->so_filter_active > 0) 1100 so->so_backlog = MAX(1, so->so_backlog - 1); 1101 mutex_exit(&so->so_lock); 1102 break; 1103 default: 1104 ASSERT(0); 1105 break; 1106 } 1107 } 1108 1109 void 1110 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1111 { 1112 struct sonode *so = (struct sonode *)sock_handle; 1113 1114 if (qfull) { 1115 so_snd_qfull(so); 1116 } else { 1117 so_snd_qnotfull(so); 1118 mutex_enter(&so->so_lock); 1119 /* so_notify_writable drops so_lock */ 1120 so_notify_writable(so); 1121 } 1122 } 1123 1124 sock_upper_handle_t 1125 so_newconn(sock_upper_handle_t parenthandle, 1126 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1127 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1128 { 1129 struct sonode *so = (struct sonode *)parenthandle; 1130 struct sonode *nso; 1131 int error; 1132 1133 ASSERT(proto_handle != NULL); 1134 1135 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1136 (so->so_acceptq_len >= so->so_backlog && 1137 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1138 return (NULL); 1139 } 1140 1141 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1142 &error); 1143 if (nso == NULL) 1144 return (NULL); 1145 1146 if (peer_cred != NULL) { 1147 crhold(peer_cred); 1148 nso->so_peercred = peer_cred; 1149 nso->so_cpid = peer_cpid; 1150 } 1151 nso->so_listener = so; 1152 1153 /* 1154 * The new socket (nso), proto_handle and sock_upcallsp are all 1155 * valid at this point. But as soon as nso is placed in the accept 1156 * queue that can no longer be assumed (since an accept() thread may 1157 * pull it off the queue and close the socket). 1158 */ 1159 *sock_upcallsp = &so_upcalls; 1160 1161 mutex_enter(&so->so_acceptq_lock); 1162 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1163 mutex_exit(&so->so_acceptq_lock); 1164 ASSERT(nso->so_count == 1); 1165 nso->so_count--; 1166 /* drop proto ref */ 1167 VN_RELE(SOTOV(nso)); 1168 socket_destroy(nso); 1169 return (NULL); 1170 } else { 1171 so->so_acceptq_len++; 1172 if (nso->so_state & SS_FIL_DEFER) { 1173 list_insert_tail(&so->so_acceptq_defer, nso); 1174 mutex_exit(&so->so_acceptq_lock); 1175 } else { 1176 list_insert_tail(&so->so_acceptq_list, nso); 1177 cv_signal(&so->so_acceptq_cv); 1178 mutex_exit(&so->so_acceptq_lock); 1179 mutex_enter(&so->so_lock); 1180 so_notify_newconn(so); 1181 } 1182 1183 return ((sock_upper_handle_t)nso); 1184 } 1185 } 1186 1187 void 1188 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1189 { 1190 struct sonode *so; 1191 1192 so = (struct sonode *)sock_handle; 1193 1194 mutex_enter(&so->so_lock); 1195 1196 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1197 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1198 if (soppp->sopp_flags & SOCKOPT_WROFF) 1199 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1200 if (soppp->sopp_flags & SOCKOPT_TAIL) 1201 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1202 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1203 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1204 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1205 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1206 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1207 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1208 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1209 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1210 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1211 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1212 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1213 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1214 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1215 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1216 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1217 } 1218 1219 if (soppp->sopp_zcopyflag & COPYCACHED) { 1220 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1221 } 1222 } 1223 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1224 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1225 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1226 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1227 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1228 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1229 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1230 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1231 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1232 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1233 1234 mutex_exit(&so->so_lock); 1235 1236 if (so->so_filter_active > 0) { 1237 sof_instance_t *inst; 1238 ssize_t maxblk; 1239 ushort_t wroff, tail; 1240 maxblk = so->so_proto_props.sopp_maxblk; 1241 wroff = so->so_proto_props.sopp_wroff; 1242 tail = so->so_proto_props.sopp_tail; 1243 for (inst = so->so_filter_bottom; inst != NULL; 1244 inst = inst->sofi_prev) { 1245 if (SOF_INTERESTED(inst, mblk_prop)) { 1246 (*inst->sofi_ops->sofop_mblk_prop)( 1247 (sof_handle_t)inst, inst->sofi_cookie, 1248 &maxblk, &wroff, &tail); 1249 } 1250 } 1251 mutex_enter(&so->so_lock); 1252 so->so_proto_props.sopp_maxblk = maxblk; 1253 so->so_proto_props.sopp_wroff = wroff; 1254 so->so_proto_props.sopp_tail = tail; 1255 mutex_exit(&so->so_lock); 1256 } 1257 #ifdef DEBUG 1258 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1259 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1260 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1261 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1262 SOCKOPT_LOOPBACK); 1263 ASSERT(soppp->sopp_flags == 0); 1264 #endif 1265 } 1266 1267 /* ARGSUSED */ 1268 ssize_t 1269 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1270 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1271 sof_instance_t *filter) 1272 { 1273 boolean_t force_push = B_TRUE; 1274 int space_left; 1275 sodirect_t *sodp = so->so_direct; 1276 1277 ASSERT(errorp != NULL); 1278 *errorp = 0; 1279 if (mp == NULL) { 1280 if (so->so_downcalls->sd_recv_uio != NULL) { 1281 mutex_enter(&so->so_lock); 1282 /* the notify functions will drop the lock */ 1283 if (flags & MSG_OOB) 1284 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1285 else 1286 so_notify_data(so, msg_size); 1287 return (0); 1288 } 1289 ASSERT(msg_size == 0); 1290 mutex_enter(&so->so_lock); 1291 goto space_check; 1292 } 1293 1294 ASSERT(mp->b_next == NULL); 1295 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1296 ASSERT(msg_size == msgdsize(mp)); 1297 1298 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1299 /* The read pointer is not aligned correctly for TPI */ 1300 zcmn_err(getzoneid(), CE_WARN, 1301 "sockfs: Unaligned TPI message received. rptr = %p\n", 1302 (void *)mp->b_rptr); 1303 freemsg(mp); 1304 mutex_enter(&so->so_lock); 1305 if (sodp != NULL) 1306 SOD_UIOAFINI(sodp); 1307 goto space_check; 1308 } 1309 1310 if (so->so_filter_active > 0) { 1311 for (; filter != NULL; filter = filter->sofi_prev) { 1312 if (!SOF_INTERESTED(filter, data_in)) 1313 continue; 1314 mp = (*filter->sofi_ops->sofop_data_in)( 1315 (sof_handle_t)filter, filter->sofi_cookie, mp, 1316 flags, &msg_size); 1317 ASSERT(msgdsize(mp) == msg_size); 1318 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1319 (mblk_t *), mp); 1320 /* Data was consumed/dropped, just do space check */ 1321 if (msg_size == 0) { 1322 mutex_enter(&so->so_lock); 1323 goto space_check; 1324 } 1325 } 1326 } 1327 1328 if (flags & MSG_OOB) { 1329 so_queue_oob(so, mp, msg_size); 1330 mutex_enter(&so->so_lock); 1331 goto space_check; 1332 } 1333 1334 if (force_pushp != NULL) 1335 force_push = *force_pushp; 1336 1337 mutex_enter(&so->so_lock); 1338 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1339 if (sodp != NULL) 1340 SOD_DISABLE(sodp); 1341 mutex_exit(&so->so_lock); 1342 *errorp = EOPNOTSUPP; 1343 return (-1); 1344 } 1345 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1346 freemsg(mp); 1347 if (sodp != NULL) 1348 SOD_DISABLE(sodp); 1349 mutex_exit(&so->so_lock); 1350 return (0); 1351 } 1352 1353 /* process the mblk via I/OAT if capable */ 1354 if (sodp != NULL && sodp->sod_enabled) { 1355 if (DB_TYPE(mp) == M_DATA) { 1356 sod_uioa_mblk_init(sodp, mp, msg_size); 1357 } else { 1358 SOD_UIOAFINI(sodp); 1359 } 1360 } 1361 1362 if (mp->b_next == NULL) { 1363 so_enqueue_msg(so, mp, msg_size); 1364 } else { 1365 do { 1366 mblk_t *nmp; 1367 1368 if ((nmp = mp->b_next) != NULL) { 1369 mp->b_next = NULL; 1370 } 1371 so_enqueue_msg(so, mp, msgdsize(mp)); 1372 mp = nmp; 1373 } while (mp != NULL); 1374 } 1375 1376 space_left = so->so_rcvbuf - so->so_rcv_queued; 1377 if (space_left <= 0) { 1378 so->so_flowctrld = B_TRUE; 1379 *errorp = ENOSPC; 1380 space_left = -1; 1381 } 1382 1383 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1384 so->so_rcv_queued >= so->so_rcv_wanted) { 1385 SOCKET_TIMER_CANCEL(so); 1386 /* 1387 * so_notify_data will release the lock 1388 */ 1389 so_notify_data(so, so->so_rcv_queued); 1390 1391 if (force_pushp != NULL) 1392 *force_pushp = B_TRUE; 1393 goto done; 1394 } else if (so->so_rcv_timer_tid == 0) { 1395 /* Make sure the recv push timer is running */ 1396 SOCKET_TIMER_START(so); 1397 } 1398 1399 done_unlock: 1400 mutex_exit(&so->so_lock); 1401 done: 1402 return (space_left); 1403 1404 space_check: 1405 space_left = so->so_rcvbuf - so->so_rcv_queued; 1406 if (space_left <= 0) { 1407 so->so_flowctrld = B_TRUE; 1408 *errorp = ENOSPC; 1409 space_left = -1; 1410 } 1411 goto done_unlock; 1412 } 1413 1414 #pragma inline(so_queue_msg_impl) 1415 1416 ssize_t 1417 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1418 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1419 { 1420 struct sonode *so = (struct sonode *)sock_handle; 1421 1422 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1423 so->so_filter_bottom)); 1424 } 1425 1426 /* 1427 * Set the offset of where the oob data is relative to the bytes in 1428 * queued. Also generate SIGURG 1429 */ 1430 void 1431 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1432 { 1433 struct sonode *so; 1434 1435 ASSERT(offset >= 0); 1436 so = (struct sonode *)sock_handle; 1437 mutex_enter(&so->so_lock); 1438 if (so->so_direct != NULL) 1439 SOD_UIOAFINI(so->so_direct); 1440 1441 /* 1442 * New urgent data on the way so forget about any old 1443 * urgent data. 1444 */ 1445 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1446 1447 /* 1448 * Record that urgent data is pending. 1449 */ 1450 so->so_state |= SS_OOBPEND; 1451 1452 if (so->so_oobmsg != NULL) { 1453 dprintso(so, 1, ("sock: discarding old oob\n")); 1454 freemsg(so->so_oobmsg); 1455 so->so_oobmsg = NULL; 1456 } 1457 1458 /* 1459 * set the offset where the urgent byte is 1460 */ 1461 so->so_oobmark = so->so_rcv_queued + offset; 1462 if (so->so_oobmark == 0) 1463 so->so_state |= SS_RCVATMARK; 1464 else 1465 so->so_state &= ~SS_RCVATMARK; 1466 1467 so_notify_oobsig(so); 1468 } 1469 1470 /* 1471 * Queue the OOB byte 1472 */ 1473 static void 1474 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1475 { 1476 mutex_enter(&so->so_lock); 1477 if (so->so_direct != NULL) 1478 SOD_UIOAFINI(so->so_direct); 1479 1480 ASSERT(mp != NULL); 1481 if (!IS_SO_OOB_INLINE(so)) { 1482 so->so_oobmsg = mp; 1483 so->so_state |= SS_HAVEOOBDATA; 1484 } else { 1485 so_enqueue_msg(so, mp, len); 1486 } 1487 1488 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1489 } 1490 1491 int 1492 so_close(struct sonode *so, int flag, struct cred *cr) 1493 { 1494 int error; 1495 1496 /* 1497 * No new data will be enqueued once the CLOSING flag is set. 1498 */ 1499 mutex_enter(&so->so_lock); 1500 so->so_state |= SS_CLOSING; 1501 ASSERT(so_verify_oobstate(so)); 1502 so_rcv_flush(so); 1503 mutex_exit(&so->so_lock); 1504 1505 if (so->so_state & SS_ACCEPTCONN) { 1506 /* 1507 * We grab and release the accept lock to ensure that any 1508 * thread about to insert a socket in so_newconn completes 1509 * before we flush the queue. Any thread calling so_newconn 1510 * after we drop the lock will observe the SS_CLOSING flag, 1511 * which will stop it from inserting the socket in the queue. 1512 */ 1513 mutex_enter(&so->so_acceptq_lock); 1514 mutex_exit(&so->so_acceptq_lock); 1515 1516 so_acceptq_flush(so, B_TRUE); 1517 } 1518 1519 if (so->so_filter_active > 0) 1520 sof_sonode_closing(so); 1521 1522 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1523 switch (error) { 1524 default: 1525 /* Protocol made a synchronous close; remove proto ref */ 1526 VN_RELE(SOTOV(so)); 1527 break; 1528 case EINPROGRESS: 1529 /* 1530 * Protocol is in the process of closing, it will make a 1531 * 'closed' upcall to remove the reference. 1532 */ 1533 error = 0; 1534 break; 1535 } 1536 1537 return (error); 1538 } 1539 1540 /* 1541 * Upcall made by the protocol when it's doing an asynchronous close. It 1542 * will drop the protocol's reference on the socket. 1543 */ 1544 void 1545 so_closed(sock_upper_handle_t sock_handle) 1546 { 1547 struct sonode *so = (struct sonode *)sock_handle; 1548 1549 VN_RELE(SOTOV(so)); 1550 } 1551 1552 void 1553 so_zcopy_notify(sock_upper_handle_t sock_handle) 1554 { 1555 struct sonode *so = (struct sonode *)sock_handle; 1556 1557 mutex_enter(&so->so_lock); 1558 so->so_copyflag |= STZCNOTIFY; 1559 cv_broadcast(&so->so_copy_cv); 1560 mutex_exit(&so->so_lock); 1561 } 1562 1563 void 1564 so_set_error(sock_upper_handle_t sock_handle, int error) 1565 { 1566 struct sonode *so = (struct sonode *)sock_handle; 1567 1568 mutex_enter(&so->so_lock); 1569 1570 soseterror(so, error); 1571 1572 so_notify_error(so); 1573 } 1574 1575 /* 1576 * so_recvmsg - read data from the socket 1577 * 1578 * There are two ways of obtaining data; either we ask the protocol to 1579 * copy directly into the supplied buffer, or we copy data from the 1580 * sonode's receive queue. The decision which one to use depends on 1581 * whether the protocol has a sd_recv_uio down call. 1582 */ 1583 int 1584 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1585 struct cred *cr) 1586 { 1587 rval_t rval; 1588 int flags = 0; 1589 t_uscalar_t controllen, namelen; 1590 int error = 0; 1591 int ret; 1592 mblk_t *mctlp = NULL; 1593 union T_primitives *tpr; 1594 void *control; 1595 ssize_t saved_resid; 1596 struct uio *suiop; 1597 1598 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1599 1600 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1601 (so->so_mode & SM_CONNREQUIRED)) { 1602 SO_UNBLOCK_FALLBACK(so); 1603 return (ENOTCONN); 1604 } 1605 1606 if (msg->msg_flags & MSG_PEEK) 1607 msg->msg_flags &= ~MSG_WAITALL; 1608 1609 if (so->so_mode & SM_ATOMIC) 1610 msg->msg_flags |= MSG_TRUNC; 1611 1612 if (msg->msg_flags & MSG_OOB) { 1613 if ((so->so_mode & SM_EXDATA) == 0) { 1614 error = EOPNOTSUPP; 1615 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1616 error = (*so->so_downcalls->sd_recv_uio) 1617 (so->so_proto_handle, uiop, msg, cr); 1618 } else { 1619 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1620 IS_SO_OOB_INLINE(so)); 1621 } 1622 SO_UNBLOCK_FALLBACK(so); 1623 return (error); 1624 } 1625 1626 /* 1627 * If the protocol has the recv down call, then pass the request 1628 * down. 1629 */ 1630 if (so->so_downcalls->sd_recv_uio != NULL) { 1631 error = (*so->so_downcalls->sd_recv_uio) 1632 (so->so_proto_handle, uiop, msg, cr); 1633 SO_UNBLOCK_FALLBACK(so); 1634 return (error); 1635 } 1636 1637 /* 1638 * Reading data from the socket buffer 1639 */ 1640 flags = msg->msg_flags; 1641 msg->msg_flags = 0; 1642 1643 /* 1644 * Set msg_controllen and msg_namelen to zero here to make it 1645 * simpler in the cases that no control or name is returned. 1646 */ 1647 controllen = msg->msg_controllen; 1648 namelen = msg->msg_namelen; 1649 msg->msg_controllen = 0; 1650 msg->msg_namelen = 0; 1651 1652 mutex_enter(&so->so_lock); 1653 /* Set SOREADLOCKED */ 1654 error = so_lock_read_intr(so, 1655 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1656 mutex_exit(&so->so_lock); 1657 if (error) { 1658 SO_UNBLOCK_FALLBACK(so); 1659 return (error); 1660 } 1661 1662 suiop = sod_rcv_init(so, flags, &uiop); 1663 retry: 1664 saved_resid = uiop->uio_resid; 1665 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1666 if (error != 0) { 1667 goto out; 1668 } 1669 /* 1670 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1671 * For non-datagrams MOREDATA is used to set MSG_EOR. 1672 */ 1673 ASSERT(!(rval.r_val1 & MORECTL)); 1674 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1675 msg->msg_flags |= MSG_TRUNC; 1676 if (mctlp == NULL) { 1677 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1678 1679 mutex_enter(&so->so_lock); 1680 /* Set MSG_EOR based on MOREDATA */ 1681 if (!(rval.r_val1 & MOREDATA)) { 1682 if (so->so_state & SS_SAVEDEOR) { 1683 msg->msg_flags |= MSG_EOR; 1684 so->so_state &= ~SS_SAVEDEOR; 1685 } 1686 } 1687 /* 1688 * If some data was received (i.e. not EOF) and the 1689 * read/recv* has not been satisfied wait for some more. 1690 */ 1691 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1692 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1693 mutex_exit(&so->so_lock); 1694 flags |= MSG_NOMARK; 1695 goto retry; 1696 } 1697 1698 goto out_locked; 1699 } 1700 /* so_queue_msg has already verified length and alignment */ 1701 tpr = (union T_primitives *)mctlp->b_rptr; 1702 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1703 switch (tpr->type) { 1704 case T_DATA_IND: { 1705 /* 1706 * Set msg_flags to MSG_EOR based on 1707 * MORE_flag and MOREDATA. 1708 */ 1709 mutex_enter(&so->so_lock); 1710 so->so_state &= ~SS_SAVEDEOR; 1711 if (!(tpr->data_ind.MORE_flag & 1)) { 1712 if (!(rval.r_val1 & MOREDATA)) 1713 msg->msg_flags |= MSG_EOR; 1714 else 1715 so->so_state |= SS_SAVEDEOR; 1716 } 1717 freemsg(mctlp); 1718 /* 1719 * If some data was received (i.e. not EOF) and the 1720 * read/recv* has not been satisfied wait for some more. 1721 */ 1722 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1723 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1724 mutex_exit(&so->so_lock); 1725 flags |= MSG_NOMARK; 1726 goto retry; 1727 } 1728 goto out_locked; 1729 } 1730 case T_UNITDATA_IND: { 1731 void *addr; 1732 t_uscalar_t addrlen; 1733 void *abuf; 1734 t_uscalar_t optlen; 1735 void *opt; 1736 1737 if (namelen != 0) { 1738 /* Caller wants source address */ 1739 addrlen = tpr->unitdata_ind.SRC_length; 1740 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1741 addrlen, 1); 1742 if (addr == NULL) { 1743 freemsg(mctlp); 1744 error = EPROTO; 1745 eprintsoline(so, error); 1746 goto out; 1747 } 1748 ASSERT(so->so_family != AF_UNIX); 1749 } 1750 optlen = tpr->unitdata_ind.OPT_length; 1751 if (optlen != 0) { 1752 t_uscalar_t ncontrollen; 1753 1754 /* 1755 * Extract any source address option. 1756 * Determine how large cmsg buffer is needed. 1757 */ 1758 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1759 optlen, __TPI_ALIGN_SIZE); 1760 1761 if (opt == NULL) { 1762 freemsg(mctlp); 1763 error = EPROTO; 1764 eprintsoline(so, error); 1765 goto out; 1766 } 1767 if (so->so_family == AF_UNIX) 1768 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1769 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1770 !(flags & MSG_XPG4_2)); 1771 if (controllen != 0) 1772 controllen = ncontrollen; 1773 else if (ncontrollen != 0) 1774 msg->msg_flags |= MSG_CTRUNC; 1775 } else { 1776 controllen = 0; 1777 } 1778 1779 if (namelen != 0) { 1780 /* 1781 * Return address to caller. 1782 * Caller handles truncation if length 1783 * exceeds msg_namelen. 1784 * NOTE: AF_UNIX NUL termination is ensured by 1785 * the sender's copyin_name(). 1786 */ 1787 abuf = kmem_alloc(addrlen, KM_SLEEP); 1788 1789 bcopy(addr, abuf, addrlen); 1790 msg->msg_name = abuf; 1791 msg->msg_namelen = addrlen; 1792 } 1793 1794 if (controllen != 0) { 1795 /* 1796 * Return control msg to caller. 1797 * Caller handles truncation if length 1798 * exceeds msg_controllen. 1799 */ 1800 control = kmem_zalloc(controllen, KM_SLEEP); 1801 1802 error = so_opt2cmsg(mctlp, opt, optlen, 1803 !(flags & MSG_XPG4_2), control, controllen); 1804 if (error) { 1805 freemsg(mctlp); 1806 if (msg->msg_namelen != 0) 1807 kmem_free(msg->msg_name, 1808 msg->msg_namelen); 1809 kmem_free(control, controllen); 1810 eprintsoline(so, error); 1811 goto out; 1812 } 1813 msg->msg_control = control; 1814 msg->msg_controllen = controllen; 1815 } 1816 1817 freemsg(mctlp); 1818 goto out; 1819 } 1820 case T_OPTDATA_IND: { 1821 struct T_optdata_req *tdr; 1822 void *opt; 1823 t_uscalar_t optlen; 1824 1825 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1826 optlen = tdr->OPT_length; 1827 if (optlen != 0) { 1828 t_uscalar_t ncontrollen; 1829 /* 1830 * Determine how large cmsg buffer is needed. 1831 */ 1832 opt = sogetoff(mctlp, 1833 tpr->optdata_ind.OPT_offset, optlen, 1834 __TPI_ALIGN_SIZE); 1835 1836 if (opt == NULL) { 1837 freemsg(mctlp); 1838 error = EPROTO; 1839 eprintsoline(so, error); 1840 goto out; 1841 } 1842 1843 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1844 !(flags & MSG_XPG4_2)); 1845 if (controllen != 0) 1846 controllen = ncontrollen; 1847 else if (ncontrollen != 0) 1848 msg->msg_flags |= MSG_CTRUNC; 1849 } else { 1850 controllen = 0; 1851 } 1852 1853 if (controllen != 0) { 1854 /* 1855 * Return control msg to caller. 1856 * Caller handles truncation if length 1857 * exceeds msg_controllen. 1858 */ 1859 control = kmem_zalloc(controllen, KM_SLEEP); 1860 1861 error = so_opt2cmsg(mctlp, opt, optlen, 1862 !(flags & MSG_XPG4_2), control, controllen); 1863 if (error) { 1864 freemsg(mctlp); 1865 kmem_free(control, controllen); 1866 eprintsoline(so, error); 1867 goto out; 1868 } 1869 msg->msg_control = control; 1870 msg->msg_controllen = controllen; 1871 } 1872 1873 /* 1874 * Set msg_flags to MSG_EOR based on 1875 * DATA_flag and MOREDATA. 1876 */ 1877 mutex_enter(&so->so_lock); 1878 so->so_state &= ~SS_SAVEDEOR; 1879 if (!(tpr->data_ind.MORE_flag & 1)) { 1880 if (!(rval.r_val1 & MOREDATA)) 1881 msg->msg_flags |= MSG_EOR; 1882 else 1883 so->so_state |= SS_SAVEDEOR; 1884 } 1885 freemsg(mctlp); 1886 /* 1887 * If some data was received (i.e. not EOF) and the 1888 * read/recv* has not been satisfied wait for some more. 1889 * Not possible to wait if control info was received. 1890 */ 1891 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1892 controllen == 0 && 1893 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1894 mutex_exit(&so->so_lock); 1895 flags |= MSG_NOMARK; 1896 goto retry; 1897 } 1898 goto out_locked; 1899 } 1900 default: 1901 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1902 tpr->type); 1903 freemsg(mctlp); 1904 error = EPROTO; 1905 ASSERT(0); 1906 } 1907 out: 1908 mutex_enter(&so->so_lock); 1909 out_locked: 1910 ret = sod_rcv_done(so, suiop, uiop); 1911 if (ret != 0 && error == 0) 1912 error = ret; 1913 1914 so_unlock_read(so); /* Clear SOREADLOCKED */ 1915 mutex_exit(&so->so_lock); 1916 1917 SO_UNBLOCK_FALLBACK(so); 1918 1919 return (error); 1920 } 1921 1922 sonodeops_t so_sonodeops = { 1923 so_init, /* sop_init */ 1924 so_accept, /* sop_accept */ 1925 so_bind, /* sop_bind */ 1926 so_listen, /* sop_listen */ 1927 so_connect, /* sop_connect */ 1928 so_recvmsg, /* sop_recvmsg */ 1929 so_sendmsg, /* sop_sendmsg */ 1930 so_sendmblk, /* sop_sendmblk */ 1931 so_getpeername, /* sop_getpeername */ 1932 so_getsockname, /* sop_getsockname */ 1933 so_shutdown, /* sop_shutdown */ 1934 so_getsockopt, /* sop_getsockopt */ 1935 so_setsockopt, /* sop_setsockopt */ 1936 so_ioctl, /* sop_ioctl */ 1937 so_poll, /* sop_poll */ 1938 so_close, /* sop_close */ 1939 }; 1940 1941 sock_upcalls_t so_upcalls = { 1942 so_newconn, 1943 so_connected, 1944 so_disconnected, 1945 so_opctl, 1946 so_queue_msg, 1947 so_set_prop, 1948 so_txq_full, 1949 so_signal_oob, 1950 so_zcopy_notify, 1951 so_set_error, 1952 so_closed 1953 }; 1954