1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/sysmacros.h> 30 #include <sys/debug.h> 31 #include <sys/cmn_err.h> 32 33 #include <sys/stropts.h> 34 #include <sys/socket.h> 35 #include <sys/socketvar.h> 36 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/sockio.h> 40 #include <sys/kmem_impl.h> 41 42 #include <sys/strsubr.h> 43 #include <sys/strsun.h> 44 #include <sys/ddi.h> 45 #include <netinet/in.h> 46 #include <inet/ip.h> 47 48 #include <fs/sockfs/sockcommon.h> 49 #include <fs/sockfs/sockfilter_impl.h> 50 51 #include <sys/socket_proto.h> 52 53 #include <fs/sockfs/socktpi_impl.h> 54 #include <fs/sockfs/sodirect.h> 55 #include <sys/tihdr.h> 56 #include <fs/sockfs/nl7c.h> 57 58 extern int xnet_skip_checks; 59 extern int xnet_check_print; 60 61 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 62 63 64 /*ARGSUSED*/ 65 int 66 so_accept_notsupp(struct sonode *lso, int fflag, 67 struct cred *cr, struct sonode **nsop) 68 { 69 return (EOPNOTSUPP); 70 } 71 72 /*ARGSUSED*/ 73 int 74 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 75 { 76 return (EOPNOTSUPP); 77 } 78 79 /*ARGSUSED*/ 80 int 81 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 82 socklen_t *len, struct cred *cr) 83 { 84 return (EOPNOTSUPP); 85 } 86 87 /*ARGSUSED*/ 88 int 89 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 90 socklen_t *addrlen, boolean_t accept, struct cred *cr) 91 { 92 return (EOPNOTSUPP); 93 } 94 95 /*ARGSUSED*/ 96 int 97 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 98 { 99 return (EOPNOTSUPP); 100 } 101 102 /*ARGSUSED*/ 103 int 104 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 105 struct cred *cr, mblk_t **mpp) 106 { 107 return (EOPNOTSUPP); 108 } 109 110 /* 111 * Generic Socket Ops 112 */ 113 114 /* ARGSUSED */ 115 int 116 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 117 { 118 return (socket_init_common(so, pso, flags, cr)); 119 } 120 121 int 122 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 123 int flags, struct cred *cr) 124 { 125 int error; 126 127 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 128 129 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 130 131 /* X/Open requires this check */ 132 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 133 if (xnet_check_print) { 134 printf("sockfs: X/Open bind state check " 135 "caused EINVAL\n"); 136 } 137 error = EINVAL; 138 goto done; 139 } 140 141 /* 142 * a bind to a NULL address is interpreted as unbind. So just 143 * do the downcall. 144 */ 145 if (name == NULL) 146 goto dobind; 147 148 switch (so->so_family) { 149 case AF_INET: 150 if ((size_t)namelen != sizeof (sin_t)) { 151 error = name->sa_family != so->so_family ? 152 EAFNOSUPPORT : EINVAL; 153 eprintsoline(so, error); 154 goto done; 155 } 156 157 if ((flags & _SOBIND_XPG4_2) && 158 (name->sa_family != so->so_family)) { 159 /* 160 * This check has to be made for X/Open 161 * sockets however application failures have 162 * been observed when it is applied to 163 * all sockets. 164 */ 165 error = EAFNOSUPPORT; 166 eprintsoline(so, error); 167 goto done; 168 } 169 /* 170 * Force a zero sa_family to match so_family. 171 * 172 * Some programs like inetd(1M) don't set the 173 * family field. Other programs leave 174 * sin_family set to garbage - SunOS 4.X does 175 * not check the family field on a bind. 176 * We use the family field that 177 * was passed in to the socket() call. 178 */ 179 name->sa_family = so->so_family; 180 break; 181 182 case AF_INET6: { 183 #ifdef DEBUG 184 sin6_t *sin6 = (sin6_t *)name; 185 #endif 186 if ((size_t)namelen != sizeof (sin6_t)) { 187 error = name->sa_family != so->so_family ? 188 EAFNOSUPPORT : EINVAL; 189 eprintsoline(so, error); 190 goto done; 191 } 192 193 if (name->sa_family != so->so_family) { 194 /* 195 * With IPv6 we require the family to match 196 * unlike in IPv4. 197 */ 198 error = EAFNOSUPPORT; 199 eprintsoline(so, error); 200 goto done; 201 } 202 #ifdef DEBUG 203 /* 204 * Verify that apps don't forget to clear 205 * sin6_scope_id etc 206 */ 207 if (sin6->sin6_scope_id != 0 && 208 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 209 zcmn_err(getzoneid(), CE_WARN, 210 "bind with uninitialized sin6_scope_id " 211 "(%d) on socket. Pid = %d\n", 212 (int)sin6->sin6_scope_id, 213 (int)curproc->p_pid); 214 } 215 if (sin6->__sin6_src_id != 0) { 216 zcmn_err(getzoneid(), CE_WARN, 217 "bind with uninitialized __sin6_src_id " 218 "(%d) on socket. Pid = %d\n", 219 (int)sin6->__sin6_src_id, 220 (int)curproc->p_pid); 221 } 222 #endif /* DEBUG */ 223 224 break; 225 } 226 default: 227 /* Just pass the request to the protocol */ 228 goto dobind; 229 } 230 231 /* 232 * First we check if either NCA or KSSL has been enabled for 233 * the requested address, and if so, we fall back to TPI. 234 * If neither of those two services are enabled, then we just 235 * pass the request to the protocol. 236 * 237 * Note that KSSL can only be enabled on a socket if NCA is NOT 238 * enabled for that socket, hence the else-statement below. 239 */ 240 if (nl7c_enabled && ((so->so_family == AF_INET || 241 so->so_family == AF_INET6) && 242 nl7c_lookup_addr(name, namelen) != NULL)) { 243 /* 244 * NL7C is not supported in non-global zones, 245 * we enforce this restriction here. 246 */ 247 if (so->so_zoneid == GLOBAL_ZONEID) { 248 /* NCA should be used, so fall back to TPI */ 249 error = so_tpi_fallback(so, cr); 250 SO_UNBLOCK_FALLBACK(so); 251 if (error) 252 return (error); 253 else 254 return (SOP_BIND(so, name, namelen, flags, cr)); 255 } 256 } 257 258 dobind: 259 if (so->so_filter_active == 0 || 260 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 261 error = (*so->so_downcalls->sd_bind) 262 (so->so_proto_handle, name, namelen, cr); 263 } 264 done: 265 SO_UNBLOCK_FALLBACK(so); 266 267 return (error); 268 } 269 270 int 271 so_listen(struct sonode *so, int backlog, struct cred *cr) 272 { 273 int error = 0; 274 275 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 276 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 277 278 if ((so)->so_filter_active == 0 || 279 (error = sof_filter_listen(so, &backlog, cr)) < 0) 280 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 281 backlog, cr); 282 283 SO_UNBLOCK_FALLBACK(so); 284 285 return (error); 286 } 287 288 289 int 290 so_connect(struct sonode *so, struct sockaddr *name, 291 socklen_t namelen, int fflag, int flags, struct cred *cr) 292 { 293 int error = 0; 294 sock_connid_t id; 295 296 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 297 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 298 299 /* 300 * If there is a pending error, return error 301 * This can happen if a non blocking operation caused an error. 302 */ 303 304 if (so->so_error != 0) { 305 mutex_enter(&so->so_lock); 306 error = sogeterr(so, B_TRUE); 307 mutex_exit(&so->so_lock); 308 if (error != 0) 309 goto done; 310 } 311 312 if (so->so_filter_active == 0 || 313 (error = sof_filter_connect(so, (struct sockaddr *)name, 314 &namelen, cr)) < 0) { 315 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 316 name, namelen, &id, cr); 317 318 if (error == EINPROGRESS) 319 error = so_wait_connected(so, 320 fflag & (FNONBLOCK|FNDELAY), id); 321 } 322 done: 323 SO_UNBLOCK_FALLBACK(so); 324 return (error); 325 } 326 327 /*ARGSUSED*/ 328 int 329 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 330 { 331 int error = 0; 332 struct sonode *nso; 333 334 *nsop = NULL; 335 336 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 337 if ((so->so_state & SS_ACCEPTCONN) == 0) { 338 SO_UNBLOCK_FALLBACK(so); 339 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 340 EOPNOTSUPP : EINVAL); 341 } 342 343 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 344 &nso)) == 0) { 345 ASSERT(nso != NULL); 346 347 /* finish the accept */ 348 if ((so->so_filter_active > 0 && 349 (error = sof_filter_accept(nso, cr)) > 0) || 350 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 351 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 352 (void) socket_close(nso, 0, cr); 353 socket_destroy(nso); 354 } else { 355 *nsop = nso; 356 } 357 } 358 359 SO_UNBLOCK_FALLBACK(so); 360 return (error); 361 } 362 363 int 364 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 365 struct cred *cr) 366 { 367 int error, flags; 368 boolean_t dontblock; 369 ssize_t orig_resid; 370 mblk_t *mp; 371 372 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 373 374 flags = msg->msg_flags; 375 error = 0; 376 dontblock = (flags & MSG_DONTWAIT) || 377 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 378 379 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 380 /* 381 * Old way of passing fd's is not supported 382 */ 383 SO_UNBLOCK_FALLBACK(so); 384 return (EOPNOTSUPP); 385 } 386 387 if ((so->so_mode & SM_ATOMIC) && 388 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 389 so->so_proto_props.sopp_maxpsz != -1) { 390 SO_UNBLOCK_FALLBACK(so); 391 return (EMSGSIZE); 392 } 393 394 /* 395 * For atomic sends we will only do one iteration. 396 */ 397 do { 398 if (so->so_state & SS_CANTSENDMORE) { 399 error = EPIPE; 400 break; 401 } 402 403 if (so->so_error != 0) { 404 mutex_enter(&so->so_lock); 405 error = sogeterr(so, B_TRUE); 406 mutex_exit(&so->so_lock); 407 if (error != 0) 408 break; 409 } 410 411 /* 412 * Send down OOB messages even if the send path is being 413 * flow controlled (assuming the protocol supports OOB data). 414 */ 415 if (flags & MSG_OOB) { 416 if ((so->so_mode & SM_EXDATA) == 0) { 417 error = EOPNOTSUPP; 418 break; 419 } 420 } else if (SO_SND_FLOWCTRLD(so)) { 421 /* 422 * Need to wait until the protocol is ready to receive 423 * more data for transmission. 424 */ 425 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 426 break; 427 } 428 429 /* 430 * Time to send data to the protocol. We either copy the 431 * data into mblks or pass the uio directly to the protocol. 432 * We decide what to do based on the available down calls. 433 */ 434 if (so->so_downcalls->sd_send_uio != NULL) { 435 error = (*so->so_downcalls->sd_send_uio) 436 (so->so_proto_handle, uiop, msg, cr); 437 if (error != 0) 438 break; 439 } else { 440 /* save the resid in case of failure */ 441 orig_resid = uiop->uio_resid; 442 443 if ((mp = socopyinuio(uiop, 444 so->so_proto_props.sopp_maxpsz, 445 so->so_proto_props.sopp_wroff, 446 so->so_proto_props.sopp_maxblk, 447 so->so_proto_props.sopp_tail, &error)) == NULL) { 448 break; 449 } 450 ASSERT(uiop->uio_resid >= 0); 451 452 if (so->so_filter_active > 0 && 453 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 454 &error)) == NULL)) { 455 if (error != 0) 456 break; 457 continue; 458 } 459 error = (*so->so_downcalls->sd_send) 460 (so->so_proto_handle, mp, msg, cr); 461 if (error != 0) { 462 /* 463 * The send failed. We do not have to free the 464 * mblks, because that is the protocol's 465 * responsibility. However, uio_resid must 466 * remain accurate, so adjust that here. 467 */ 468 uiop->uio_resid = orig_resid; 469 break; 470 } 471 } 472 } while (uiop->uio_resid > 0); 473 474 SO_UNBLOCK_FALLBACK(so); 475 476 return (error); 477 } 478 479 int 480 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 481 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 482 boolean_t fil_inject) 483 { 484 int error; 485 boolean_t dontblock; 486 size_t size; 487 mblk_t *mp = *mpp; 488 489 if (so->so_downcalls->sd_send == NULL) 490 return (EOPNOTSUPP); 491 492 error = 0; 493 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 494 (fflag & (FNONBLOCK|FNDELAY)); 495 size = msgdsize(mp); 496 497 if ((so->so_mode & SM_ATOMIC) && 498 size > so->so_proto_props.sopp_maxpsz && 499 so->so_proto_props.sopp_maxpsz != -1) { 500 SO_UNBLOCK_FALLBACK(so); 501 return (EMSGSIZE); 502 } 503 504 while (mp != NULL) { 505 mblk_t *nmp, *last_mblk; 506 size_t mlen; 507 508 if (so->so_state & SS_CANTSENDMORE) { 509 error = EPIPE; 510 break; 511 } 512 if (so->so_error != 0) { 513 mutex_enter(&so->so_lock); 514 error = sogeterr(so, B_TRUE); 515 mutex_exit(&so->so_lock); 516 if (error != 0) 517 break; 518 } 519 /* Socket filters are not flow controlled */ 520 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 521 /* 522 * Need to wait until the protocol is ready to receive 523 * more data for transmission. 524 */ 525 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 526 break; 527 } 528 529 /* 530 * We only allow so_maxpsz of data to be sent down to 531 * the protocol at time. 532 */ 533 mlen = MBLKL(mp); 534 nmp = mp->b_cont; 535 last_mblk = mp; 536 while (nmp != NULL) { 537 mlen += MBLKL(nmp); 538 if (mlen > so->so_proto_props.sopp_maxpsz) { 539 last_mblk->b_cont = NULL; 540 break; 541 } 542 last_mblk = nmp; 543 nmp = nmp->b_cont; 544 } 545 546 if (so->so_filter_active > 0 && 547 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 548 cr, &error)) == NULL) { 549 *mpp = mp = nmp; 550 if (error != 0) 551 break; 552 continue; 553 } 554 error = (*so->so_downcalls->sd_send) 555 (so->so_proto_handle, mp, msg, cr); 556 if (error != 0) { 557 /* 558 * The send failed. The protocol will free the mblks 559 * that were sent down. Let the caller deal with the 560 * rest. 561 */ 562 *mpp = nmp; 563 break; 564 } 565 566 *mpp = mp = nmp; 567 } 568 /* Let the filter know whether the protocol is flow controlled */ 569 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 570 error = ENOSPC; 571 572 return (error); 573 } 574 575 #pragma inline(so_sendmblk_impl) 576 577 int 578 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 579 struct cred *cr, mblk_t **mpp) 580 { 581 int error; 582 583 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 584 585 if ((so->so_mode & SM_SENDFILESUPP) == 0) { 586 SO_UNBLOCK_FALLBACK(so); 587 return (EOPNOTSUPP); 588 } 589 590 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 591 B_FALSE); 592 593 SO_UNBLOCK_FALLBACK(so); 594 595 return (error); 596 } 597 598 int 599 so_shutdown(struct sonode *so, int how, struct cred *cr) 600 { 601 int error; 602 603 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 604 605 /* 606 * SunOS 4.X has no check for datagram sockets. 607 * 5.X checks that it is connected (ENOTCONN) 608 * X/Open requires that we check the connected state. 609 */ 610 if (!(so->so_state & SS_ISCONNECTED)) { 611 if (!xnet_skip_checks) { 612 error = ENOTCONN; 613 if (xnet_check_print) { 614 printf("sockfs: X/Open shutdown check " 615 "caused ENOTCONN\n"); 616 } 617 } 618 goto done; 619 } 620 621 if (so->so_filter_active == 0 || 622 (error = sof_filter_shutdown(so, &how, cr)) < 0) 623 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 624 how, cr)); 625 626 /* 627 * Protocol agreed to shutdown. We need to flush the 628 * receive buffer if the receive side is being shutdown. 629 */ 630 if (error == 0 && how != SHUT_WR) { 631 mutex_enter(&so->so_lock); 632 /* wait for active reader to finish */ 633 (void) so_lock_read(so, 0); 634 635 so_rcv_flush(so); 636 637 so_unlock_read(so); 638 mutex_exit(&so->so_lock); 639 } 640 641 done: 642 SO_UNBLOCK_FALLBACK(so); 643 return (error); 644 } 645 646 int 647 so_getsockname(struct sonode *so, struct sockaddr *addr, 648 socklen_t *addrlen, struct cred *cr) 649 { 650 int error; 651 652 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 653 654 if (so->so_filter_active == 0 || 655 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 656 error = (*so->so_downcalls->sd_getsockname) 657 (so->so_proto_handle, addr, addrlen, cr); 658 659 SO_UNBLOCK_FALLBACK(so); 660 return (error); 661 } 662 663 int 664 so_getpeername(struct sonode *so, struct sockaddr *addr, 665 socklen_t *addrlen, boolean_t accept, struct cred *cr) 666 { 667 int error; 668 669 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 670 671 if (accept) { 672 error = (*so->so_downcalls->sd_getpeername) 673 (so->so_proto_handle, addr, addrlen, cr); 674 } else if (!(so->so_state & SS_ISCONNECTED)) { 675 error = ENOTCONN; 676 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 677 /* Added this check for X/Open */ 678 error = EINVAL; 679 if (xnet_check_print) { 680 printf("sockfs: X/Open getpeername check => EINVAL\n"); 681 } 682 } else if (so->so_filter_active == 0 || 683 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 684 error = (*so->so_downcalls->sd_getpeername) 685 (so->so_proto_handle, addr, addrlen, cr); 686 } 687 688 SO_UNBLOCK_FALLBACK(so); 689 return (error); 690 } 691 692 int 693 so_getsockopt(struct sonode *so, int level, int option_name, 694 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 695 { 696 int error = 0; 697 698 if (level == SOL_FILTER) 699 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 700 701 SO_BLOCK_FALLBACK(so, 702 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 703 704 if ((so->so_filter_active == 0 || 705 (error = sof_filter_getsockopt(so, level, option_name, optval, 706 optlenp, cr)) < 0) && 707 (error = socket_getopt_common(so, level, option_name, optval, 708 optlenp, flags)) < 0) { 709 error = (*so->so_downcalls->sd_getsockopt) 710 (so->so_proto_handle, level, option_name, optval, optlenp, 711 cr); 712 if (error == ENOPROTOOPT) { 713 if (level == SOL_SOCKET) { 714 /* 715 * If a protocol does not support a particular 716 * socket option, set can fail (not allowed) 717 * but get can not fail. This is the previous 718 * sockfs bahvior. 719 */ 720 switch (option_name) { 721 case SO_LINGER: 722 if (*optlenp < (t_uscalar_t) 723 sizeof (struct linger)) { 724 error = EINVAL; 725 break; 726 } 727 error = 0; 728 bzero(optval, sizeof (struct linger)); 729 *optlenp = sizeof (struct linger); 730 break; 731 case SO_RCVTIMEO: 732 case SO_SNDTIMEO: 733 if (*optlenp < (t_uscalar_t) 734 sizeof (struct timeval)) { 735 error = EINVAL; 736 break; 737 } 738 error = 0; 739 bzero(optval, sizeof (struct timeval)); 740 *optlenp = sizeof (struct timeval); 741 break; 742 case SO_SND_BUFINFO: 743 if (*optlenp < (t_uscalar_t) 744 sizeof (struct so_snd_bufinfo)) { 745 error = EINVAL; 746 break; 747 } 748 error = 0; 749 bzero(optval, 750 sizeof (struct so_snd_bufinfo)); 751 *optlenp = 752 sizeof (struct so_snd_bufinfo); 753 break; 754 case SO_DEBUG: 755 case SO_REUSEADDR: 756 case SO_KEEPALIVE: 757 case SO_DONTROUTE: 758 case SO_BROADCAST: 759 case SO_USELOOPBACK: 760 case SO_OOBINLINE: 761 case SO_DGRAM_ERRIND: 762 case SO_SNDBUF: 763 case SO_RCVBUF: 764 error = 0; 765 *((int32_t *)optval) = 0; 766 *optlenp = sizeof (int32_t); 767 break; 768 default: 769 break; 770 } 771 } 772 } 773 } 774 775 SO_UNBLOCK_FALLBACK(so); 776 return (error); 777 } 778 779 int 780 so_setsockopt(struct sonode *so, int level, int option_name, 781 const void *optval, socklen_t optlen, struct cred *cr) 782 { 783 int error = 0; 784 struct timeval tl; 785 const void *opt = optval; 786 787 if (level == SOL_FILTER) 788 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 789 790 SO_BLOCK_FALLBACK(so, 791 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 792 793 /* X/Open requires this check */ 794 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 795 SO_UNBLOCK_FALLBACK(so); 796 if (xnet_check_print) 797 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 798 return (EINVAL); 799 } 800 801 if (so->so_filter_active > 0 && 802 (error = sof_filter_setsockopt(so, level, option_name, 803 (void *)optval, &optlen, cr)) >= 0) 804 goto done; 805 806 if (level == SOL_SOCKET) { 807 switch (option_name) { 808 case SO_RCVTIMEO: 809 case SO_SNDTIMEO: { 810 /* 811 * We pass down these two options to protocol in order 812 * to support some third part protocols which need to 813 * know them. For those protocols which don't care 814 * these two options, simply return 0. 815 */ 816 clock_t t_usec; 817 818 if (get_udatamodel() == DATAMODEL_NONE || 819 get_udatamodel() == DATAMODEL_NATIVE) { 820 if (optlen != sizeof (struct timeval)) { 821 error = EINVAL; 822 goto done; 823 } 824 bcopy((struct timeval *)optval, &tl, 825 sizeof (struct timeval)); 826 } else { 827 if (optlen != sizeof (struct timeval32)) { 828 error = EINVAL; 829 goto done; 830 } 831 TIMEVAL32_TO_TIMEVAL(&tl, 832 (struct timeval32 *)optval); 833 } 834 opt = &tl; 835 optlen = sizeof (tl); 836 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 837 mutex_enter(&so->so_lock); 838 if (option_name == SO_RCVTIMEO) 839 so->so_rcvtimeo = drv_usectohz(t_usec); 840 else 841 so->so_sndtimeo = drv_usectohz(t_usec); 842 mutex_exit(&so->so_lock); 843 break; 844 } 845 case SO_RCVBUF: 846 /* 847 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 848 * sockfs since the transport might adjust the value 849 * and not return exactly what was set by the 850 * application. 851 */ 852 so->so_xpg_rcvbuf = *(int32_t *)optval; 853 break; 854 } 855 } 856 error = (*so->so_downcalls->sd_setsockopt) 857 (so->so_proto_handle, level, option_name, opt, optlen, cr); 858 done: 859 SO_UNBLOCK_FALLBACK(so); 860 return (error); 861 } 862 863 int 864 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 865 struct cred *cr, int32_t *rvalp) 866 { 867 int error = 0; 868 869 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 870 871 /* 872 * If there is a pending error, return error 873 * This can happen if a non blocking operation caused an error. 874 */ 875 if (so->so_error != 0) { 876 mutex_enter(&so->so_lock); 877 error = sogeterr(so, B_TRUE); 878 mutex_exit(&so->so_lock); 879 if (error != 0) 880 goto done; 881 } 882 883 /* 884 * calling strioc can result in the socket falling back to TPI, 885 * if that is supported. 886 */ 887 if ((so->so_filter_active == 0 || 888 (error = sof_filter_ioctl(so, cmd, arg, mode, 889 rvalp, cr)) < 0) && 890 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 891 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 892 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 893 cmd, arg, mode, rvalp, cr); 894 } 895 896 done: 897 SO_UNBLOCK_FALLBACK(so); 898 899 return (error); 900 } 901 902 int 903 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 904 struct pollhead **phpp) 905 { 906 int state = so->so_state; 907 *reventsp = 0; 908 909 /* 910 * In sockets the errors are represented as input/output events 911 */ 912 if (so->so_error != 0 && 913 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 914 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 915 return (0); 916 } 917 918 /* 919 * If the socket is in a state where it can send data 920 * turn on POLLWRBAND and POLLOUT events. 921 */ 922 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 923 /* 924 * out of band data is allowed even if the connection 925 * is flow controlled 926 */ 927 *reventsp |= POLLWRBAND & events; 928 if (!SO_SND_FLOWCTRLD(so)) { 929 /* 930 * As long as there is buffer to send data 931 * turn on POLLOUT events 932 */ 933 *reventsp |= POLLOUT & events; 934 } 935 } 936 937 /* 938 * Turn on POLLIN whenever there is data on the receive queue, 939 * or the socket is in a state where no more data will be received. 940 * Also, if the socket is accepting connections, flip the bit if 941 * there is something on the queue. 942 * 943 * We do an initial check for events without holding locks. However, 944 * if there are no event available, then we redo the check for POLLIN 945 * events under the lock. 946 */ 947 948 /* Pending connections */ 949 if (!list_is_empty(&so->so_acceptq_list)) 950 *reventsp |= (POLLIN|POLLRDNORM) & events; 951 952 /* Data */ 953 /* so_downcalls is null for sctp */ 954 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 955 *reventsp |= (*so->so_downcalls->sd_poll) 956 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 957 CRED()) & events; 958 ASSERT((*reventsp & ~events) == 0); 959 /* do not recheck events */ 960 events &= ~SO_PROTO_POLLEV; 961 } else { 962 if (SO_HAVE_DATA(so)) 963 *reventsp |= (POLLIN|POLLRDNORM) & events; 964 965 /* Urgent data */ 966 if ((state & SS_OOBPEND) != 0) { 967 *reventsp |= (POLLRDBAND | POLLPRI) & events; 968 } 969 } 970 971 if (!*reventsp && !anyyet) { 972 /* Check for read events again, but this time under lock */ 973 if (events & (POLLIN|POLLRDNORM)) { 974 mutex_enter(&so->so_lock); 975 if (SO_HAVE_DATA(so) || 976 !list_is_empty(&so->so_acceptq_list)) { 977 mutex_exit(&so->so_lock); 978 *reventsp |= (POLLIN|POLLRDNORM) & events; 979 return (0); 980 } else { 981 so->so_pollev |= SO_POLLEV_IN; 982 mutex_exit(&so->so_lock); 983 } 984 } 985 *phpp = &so->so_poll_list; 986 } 987 return (0); 988 } 989 990 /* 991 * Generic Upcalls 992 */ 993 void 994 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 995 cred_t *peer_cred, pid_t peer_cpid) 996 { 997 struct sonode *so = (struct sonode *)sock_handle; 998 999 mutex_enter(&so->so_lock); 1000 ASSERT(so->so_proto_handle != NULL); 1001 1002 if (peer_cred != NULL) { 1003 if (so->so_peercred != NULL) 1004 crfree(so->so_peercred); 1005 crhold(peer_cred); 1006 so->so_peercred = peer_cred; 1007 so->so_cpid = peer_cpid; 1008 } 1009 1010 so->so_proto_connid = id; 1011 soisconnected(so); 1012 /* 1013 * Wake ones who're waiting for conn to become established. 1014 */ 1015 so_notify_connected(so); 1016 } 1017 1018 int 1019 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1020 { 1021 struct sonode *so = (struct sonode *)sock_handle; 1022 boolean_t connect_failed; 1023 1024 mutex_enter(&so->so_lock); 1025 connect_failed = so->so_state & SS_ISCONNECTED; 1026 so->so_proto_connid = id; 1027 soisdisconnected(so, error); 1028 so_notify_disconnected(so, connect_failed, error); 1029 1030 return (0); 1031 } 1032 1033 void 1034 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1035 uintptr_t arg) 1036 { 1037 struct sonode *so = (struct sonode *)sock_handle; 1038 1039 switch (action) { 1040 case SOCK_OPCTL_SHUT_SEND: 1041 mutex_enter(&so->so_lock); 1042 socantsendmore(so); 1043 so_notify_disconnecting(so); 1044 break; 1045 case SOCK_OPCTL_SHUT_RECV: { 1046 mutex_enter(&so->so_lock); 1047 socantrcvmore(so); 1048 so_notify_eof(so); 1049 break; 1050 } 1051 case SOCK_OPCTL_ENAB_ACCEPT: 1052 mutex_enter(&so->so_lock); 1053 so->so_state |= SS_ACCEPTCONN; 1054 so->so_backlog = (unsigned int)arg; 1055 /* 1056 * The protocol can stop generating newconn upcalls when 1057 * the backlog is full, so to make sure the listener does 1058 * not end up with a queue full of deferred connections 1059 * we reduce the backlog by one. Thus the listener will 1060 * start closing deferred connections before the backlog 1061 * is full. 1062 */ 1063 if (so->so_filter_active > 0) 1064 so->so_backlog = MAX(1, so->so_backlog - 1); 1065 mutex_exit(&so->so_lock); 1066 break; 1067 default: 1068 ASSERT(0); 1069 break; 1070 } 1071 } 1072 1073 void 1074 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1075 { 1076 struct sonode *so = (struct sonode *)sock_handle; 1077 1078 if (qfull) { 1079 so_snd_qfull(so); 1080 } else { 1081 so_snd_qnotfull(so); 1082 mutex_enter(&so->so_lock); 1083 /* so_notify_writable drops so_lock */ 1084 so_notify_writable(so); 1085 } 1086 } 1087 1088 sock_upper_handle_t 1089 so_newconn(sock_upper_handle_t parenthandle, 1090 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1091 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1092 { 1093 struct sonode *so = (struct sonode *)parenthandle; 1094 struct sonode *nso; 1095 int error; 1096 1097 ASSERT(proto_handle != NULL); 1098 1099 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1100 (so->so_acceptq_len >= so->so_backlog && 1101 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1102 return (NULL); 1103 } 1104 1105 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1106 &error); 1107 if (nso == NULL) 1108 return (NULL); 1109 1110 if (peer_cred != NULL) { 1111 crhold(peer_cred); 1112 nso->so_peercred = peer_cred; 1113 nso->so_cpid = peer_cpid; 1114 } 1115 nso->so_listener = so; 1116 1117 /* 1118 * The new socket (nso), proto_handle and sock_upcallsp are all 1119 * valid at this point. But as soon as nso is placed in the accept 1120 * queue that can no longer be assumed (since an accept() thread may 1121 * pull it off the queue and close the socket). 1122 */ 1123 *sock_upcallsp = &so_upcalls; 1124 1125 mutex_enter(&so->so_acceptq_lock); 1126 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1127 mutex_exit(&so->so_acceptq_lock); 1128 ASSERT(nso->so_count == 1); 1129 nso->so_count--; 1130 nso->so_listener = NULL; 1131 /* drop proto ref */ 1132 VN_RELE(SOTOV(nso)); 1133 socket_destroy(nso); 1134 return (NULL); 1135 } else { 1136 so->so_acceptq_len++; 1137 if (nso->so_state & SS_FIL_DEFER) { 1138 list_insert_tail(&so->so_acceptq_defer, nso); 1139 mutex_exit(&so->so_acceptq_lock); 1140 } else { 1141 list_insert_tail(&so->so_acceptq_list, nso); 1142 cv_signal(&so->so_acceptq_cv); 1143 mutex_exit(&so->so_acceptq_lock); 1144 mutex_enter(&so->so_lock); 1145 so_notify_newconn(so); 1146 } 1147 1148 return ((sock_upper_handle_t)nso); 1149 } 1150 } 1151 1152 void 1153 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1154 { 1155 struct sonode *so; 1156 1157 so = (struct sonode *)sock_handle; 1158 1159 mutex_enter(&so->so_lock); 1160 1161 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1162 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1163 if (soppp->sopp_flags & SOCKOPT_WROFF) 1164 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1165 if (soppp->sopp_flags & SOCKOPT_TAIL) 1166 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1167 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1168 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1169 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1170 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1171 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1172 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1173 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1174 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1175 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1176 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1177 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1178 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1179 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1180 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1181 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1182 } 1183 1184 if (soppp->sopp_zcopyflag & COPYCACHED) { 1185 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1186 } 1187 } 1188 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1189 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1190 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1191 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1192 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1193 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1194 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1195 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1196 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1197 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1198 1199 mutex_exit(&so->so_lock); 1200 1201 if (so->so_filter_active > 0) { 1202 sof_instance_t *inst; 1203 ssize_t maxblk; 1204 ushort_t wroff, tail; 1205 maxblk = so->so_proto_props.sopp_maxblk; 1206 wroff = so->so_proto_props.sopp_wroff; 1207 tail = so->so_proto_props.sopp_tail; 1208 for (inst = so->so_filter_bottom; inst != NULL; 1209 inst = inst->sofi_prev) { 1210 if (SOF_INTERESTED(inst, mblk_prop)) { 1211 (*inst->sofi_ops->sofop_mblk_prop)( 1212 (sof_handle_t)inst, inst->sofi_cookie, 1213 &maxblk, &wroff, &tail); 1214 } 1215 } 1216 mutex_enter(&so->so_lock); 1217 so->so_proto_props.sopp_maxblk = maxblk; 1218 so->so_proto_props.sopp_wroff = wroff; 1219 so->so_proto_props.sopp_tail = tail; 1220 mutex_exit(&so->so_lock); 1221 } 1222 #ifdef DEBUG 1223 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1224 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1225 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1226 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1227 SOCKOPT_LOOPBACK); 1228 ASSERT(soppp->sopp_flags == 0); 1229 #endif 1230 } 1231 1232 /* ARGSUSED */ 1233 ssize_t 1234 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1235 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1236 sof_instance_t *filter) 1237 { 1238 boolean_t force_push = B_TRUE; 1239 int space_left; 1240 sodirect_t *sodp = so->so_direct; 1241 1242 ASSERT(errorp != NULL); 1243 *errorp = 0; 1244 if (mp == NULL) { 1245 if (so->so_downcalls->sd_recv_uio != NULL) { 1246 mutex_enter(&so->so_lock); 1247 /* the notify functions will drop the lock */ 1248 if (flags & MSG_OOB) 1249 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1250 else 1251 so_notify_data(so, msg_size); 1252 return (0); 1253 } 1254 ASSERT(msg_size == 0); 1255 mutex_enter(&so->so_lock); 1256 goto space_check; 1257 } 1258 1259 ASSERT(mp->b_next == NULL); 1260 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1261 ASSERT(msg_size == msgdsize(mp)); 1262 1263 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1264 /* The read pointer is not aligned correctly for TPI */ 1265 zcmn_err(getzoneid(), CE_WARN, 1266 "sockfs: Unaligned TPI message received. rptr = %p\n", 1267 (void *)mp->b_rptr); 1268 freemsg(mp); 1269 mutex_enter(&so->so_lock); 1270 if (sodp != NULL) 1271 SOD_UIOAFINI(sodp); 1272 goto space_check; 1273 } 1274 1275 if (so->so_filter_active > 0) { 1276 for (; filter != NULL; filter = filter->sofi_prev) { 1277 if (!SOF_INTERESTED(filter, data_in)) 1278 continue; 1279 mp = (*filter->sofi_ops->sofop_data_in)( 1280 (sof_handle_t)filter, filter->sofi_cookie, mp, 1281 flags, &msg_size); 1282 ASSERT(msgdsize(mp) == msg_size); 1283 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1284 (mblk_t *), mp); 1285 /* Data was consumed/dropped, just do space check */ 1286 if (msg_size == 0) { 1287 mutex_enter(&so->so_lock); 1288 goto space_check; 1289 } 1290 } 1291 } 1292 1293 if (flags & MSG_OOB) { 1294 so_queue_oob(so, mp, msg_size); 1295 mutex_enter(&so->so_lock); 1296 goto space_check; 1297 } 1298 1299 if (force_pushp != NULL) 1300 force_push = *force_pushp; 1301 1302 mutex_enter(&so->so_lock); 1303 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1304 if (sodp != NULL) 1305 SOD_DISABLE(sodp); 1306 mutex_exit(&so->so_lock); 1307 *errorp = EOPNOTSUPP; 1308 return (-1); 1309 } 1310 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1311 freemsg(mp); 1312 if (sodp != NULL) 1313 SOD_DISABLE(sodp); 1314 mutex_exit(&so->so_lock); 1315 return (0); 1316 } 1317 1318 /* process the mblk via I/OAT if capable */ 1319 if (sodp != NULL && sodp->sod_enabled) { 1320 if (DB_TYPE(mp) == M_DATA) { 1321 sod_uioa_mblk_init(sodp, mp, msg_size); 1322 } else { 1323 SOD_UIOAFINI(sodp); 1324 } 1325 } 1326 1327 if (mp->b_next == NULL) { 1328 so_enqueue_msg(so, mp, msg_size); 1329 } else { 1330 do { 1331 mblk_t *nmp; 1332 1333 if ((nmp = mp->b_next) != NULL) { 1334 mp->b_next = NULL; 1335 } 1336 so_enqueue_msg(so, mp, msgdsize(mp)); 1337 mp = nmp; 1338 } while (mp != NULL); 1339 } 1340 1341 space_left = so->so_rcvbuf - so->so_rcv_queued; 1342 if (space_left <= 0) { 1343 so->so_flowctrld = B_TRUE; 1344 *errorp = ENOSPC; 1345 space_left = -1; 1346 } 1347 1348 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1349 so->so_rcv_queued >= so->so_rcv_wanted) { 1350 SOCKET_TIMER_CANCEL(so); 1351 /* 1352 * so_notify_data will release the lock 1353 */ 1354 so_notify_data(so, so->so_rcv_queued); 1355 1356 if (force_pushp != NULL) 1357 *force_pushp = B_TRUE; 1358 goto done; 1359 } else if (so->so_rcv_timer_tid == 0) { 1360 /* Make sure the recv push timer is running */ 1361 SOCKET_TIMER_START(so); 1362 } 1363 1364 done_unlock: 1365 mutex_exit(&so->so_lock); 1366 done: 1367 return (space_left); 1368 1369 space_check: 1370 space_left = so->so_rcvbuf - so->so_rcv_queued; 1371 if (space_left <= 0) { 1372 so->so_flowctrld = B_TRUE; 1373 *errorp = ENOSPC; 1374 space_left = -1; 1375 } 1376 goto done_unlock; 1377 } 1378 1379 #pragma inline(so_queue_msg_impl) 1380 1381 ssize_t 1382 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1383 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1384 { 1385 struct sonode *so = (struct sonode *)sock_handle; 1386 1387 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1388 so->so_filter_bottom)); 1389 } 1390 1391 /* 1392 * Set the offset of where the oob data is relative to the bytes in 1393 * queued. Also generate SIGURG 1394 */ 1395 void 1396 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1397 { 1398 struct sonode *so; 1399 1400 ASSERT(offset >= 0); 1401 so = (struct sonode *)sock_handle; 1402 mutex_enter(&so->so_lock); 1403 if (so->so_direct != NULL) 1404 SOD_UIOAFINI(so->so_direct); 1405 1406 /* 1407 * New urgent data on the way so forget about any old 1408 * urgent data. 1409 */ 1410 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1411 1412 /* 1413 * Record that urgent data is pending. 1414 */ 1415 so->so_state |= SS_OOBPEND; 1416 1417 if (so->so_oobmsg != NULL) { 1418 dprintso(so, 1, ("sock: discarding old oob\n")); 1419 freemsg(so->so_oobmsg); 1420 so->so_oobmsg = NULL; 1421 } 1422 1423 /* 1424 * set the offset where the urgent byte is 1425 */ 1426 so->so_oobmark = so->so_rcv_queued + offset; 1427 if (so->so_oobmark == 0) 1428 so->so_state |= SS_RCVATMARK; 1429 else 1430 so->so_state &= ~SS_RCVATMARK; 1431 1432 so_notify_oobsig(so); 1433 } 1434 1435 /* 1436 * Queue the OOB byte 1437 */ 1438 static void 1439 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1440 { 1441 mutex_enter(&so->so_lock); 1442 if (so->so_direct != NULL) 1443 SOD_UIOAFINI(so->so_direct); 1444 1445 ASSERT(mp != NULL); 1446 if (!IS_SO_OOB_INLINE(so)) { 1447 so->so_oobmsg = mp; 1448 so->so_state |= SS_HAVEOOBDATA; 1449 } else { 1450 so_enqueue_msg(so, mp, len); 1451 } 1452 1453 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1454 } 1455 1456 int 1457 so_close(struct sonode *so, int flag, struct cred *cr) 1458 { 1459 int error; 1460 1461 /* 1462 * No new data will be enqueued once the CLOSING flag is set. 1463 */ 1464 mutex_enter(&so->so_lock); 1465 so->so_state |= SS_CLOSING; 1466 ASSERT(so_verify_oobstate(so)); 1467 so_rcv_flush(so); 1468 mutex_exit(&so->so_lock); 1469 1470 if (so->so_state & SS_ACCEPTCONN) { 1471 /* 1472 * We grab and release the accept lock to ensure that any 1473 * thread about to insert a socket in so_newconn completes 1474 * before we flush the queue. Any thread calling so_newconn 1475 * after we drop the lock will observe the SS_CLOSING flag, 1476 * which will stop it from inserting the socket in the queue. 1477 */ 1478 mutex_enter(&so->so_acceptq_lock); 1479 mutex_exit(&so->so_acceptq_lock); 1480 1481 so_acceptq_flush(so, B_TRUE); 1482 } 1483 1484 if (so->so_filter_active > 0) 1485 sof_sonode_closing(so); 1486 1487 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1488 switch (error) { 1489 default: 1490 /* Protocol made a synchronous close; remove proto ref */ 1491 VN_RELE(SOTOV(so)); 1492 break; 1493 case EINPROGRESS: 1494 /* 1495 * Protocol is in the process of closing, it will make a 1496 * 'closed' upcall to remove the reference. 1497 */ 1498 error = 0; 1499 break; 1500 } 1501 1502 return (error); 1503 } 1504 1505 /* 1506 * Upcall made by the protocol when it's doing an asynchronous close. It 1507 * will drop the protocol's reference on the socket. 1508 */ 1509 void 1510 so_closed(sock_upper_handle_t sock_handle) 1511 { 1512 struct sonode *so = (struct sonode *)sock_handle; 1513 1514 VN_RELE(SOTOV(so)); 1515 } 1516 1517 void 1518 so_zcopy_notify(sock_upper_handle_t sock_handle) 1519 { 1520 struct sonode *so = (struct sonode *)sock_handle; 1521 1522 mutex_enter(&so->so_lock); 1523 so->so_copyflag |= STZCNOTIFY; 1524 cv_broadcast(&so->so_copy_cv); 1525 mutex_exit(&so->so_lock); 1526 } 1527 1528 void 1529 so_set_error(sock_upper_handle_t sock_handle, int error) 1530 { 1531 struct sonode *so = (struct sonode *)sock_handle; 1532 1533 mutex_enter(&so->so_lock); 1534 1535 soseterror(so, error); 1536 1537 so_notify_error(so); 1538 } 1539 1540 /* 1541 * so_recvmsg - read data from the socket 1542 * 1543 * There are two ways of obtaining data; either we ask the protocol to 1544 * copy directly into the supplied buffer, or we copy data from the 1545 * sonode's receive queue. The decision which one to use depends on 1546 * whether the protocol has a sd_recv_uio down call. 1547 */ 1548 int 1549 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1550 struct cred *cr) 1551 { 1552 rval_t rval; 1553 int flags = 0; 1554 t_uscalar_t controllen, namelen; 1555 int error = 0; 1556 int ret; 1557 mblk_t *mctlp = NULL; 1558 union T_primitives *tpr; 1559 void *control; 1560 ssize_t saved_resid; 1561 struct uio *suiop; 1562 1563 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1564 1565 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1566 (so->so_mode & SM_CONNREQUIRED)) { 1567 SO_UNBLOCK_FALLBACK(so); 1568 return (ENOTCONN); 1569 } 1570 1571 if (msg->msg_flags & MSG_PEEK) 1572 msg->msg_flags &= ~MSG_WAITALL; 1573 1574 if (so->so_mode & SM_ATOMIC) 1575 msg->msg_flags |= MSG_TRUNC; 1576 1577 if (msg->msg_flags & MSG_OOB) { 1578 if ((so->so_mode & SM_EXDATA) == 0) { 1579 error = EOPNOTSUPP; 1580 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1581 error = (*so->so_downcalls->sd_recv_uio) 1582 (so->so_proto_handle, uiop, msg, cr); 1583 } else { 1584 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1585 IS_SO_OOB_INLINE(so)); 1586 } 1587 SO_UNBLOCK_FALLBACK(so); 1588 return (error); 1589 } 1590 1591 /* 1592 * If the protocol has the recv down call, then pass the request 1593 * down. 1594 */ 1595 if (so->so_downcalls->sd_recv_uio != NULL) { 1596 error = (*so->so_downcalls->sd_recv_uio) 1597 (so->so_proto_handle, uiop, msg, cr); 1598 SO_UNBLOCK_FALLBACK(so); 1599 return (error); 1600 } 1601 1602 /* 1603 * Reading data from the socket buffer 1604 */ 1605 flags = msg->msg_flags; 1606 msg->msg_flags = 0; 1607 1608 /* 1609 * Set msg_controllen and msg_namelen to zero here to make it 1610 * simpler in the cases that no control or name is returned. 1611 */ 1612 controllen = msg->msg_controllen; 1613 namelen = msg->msg_namelen; 1614 msg->msg_controllen = 0; 1615 msg->msg_namelen = 0; 1616 1617 mutex_enter(&so->so_lock); 1618 /* Set SOREADLOCKED */ 1619 error = so_lock_read_intr(so, 1620 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1621 mutex_exit(&so->so_lock); 1622 if (error) { 1623 SO_UNBLOCK_FALLBACK(so); 1624 return (error); 1625 } 1626 1627 suiop = sod_rcv_init(so, flags, &uiop); 1628 retry: 1629 saved_resid = uiop->uio_resid; 1630 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1631 if (error != 0) { 1632 goto out; 1633 } 1634 /* 1635 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1636 * For non-datagrams MOREDATA is used to set MSG_EOR. 1637 */ 1638 ASSERT(!(rval.r_val1 & MORECTL)); 1639 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1640 msg->msg_flags |= MSG_TRUNC; 1641 if (mctlp == NULL) { 1642 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1643 1644 mutex_enter(&so->so_lock); 1645 /* Set MSG_EOR based on MOREDATA */ 1646 if (!(rval.r_val1 & MOREDATA)) { 1647 if (so->so_state & SS_SAVEDEOR) { 1648 msg->msg_flags |= MSG_EOR; 1649 so->so_state &= ~SS_SAVEDEOR; 1650 } 1651 } 1652 /* 1653 * If some data was received (i.e. not EOF) and the 1654 * read/recv* has not been satisfied wait for some more. 1655 */ 1656 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1657 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1658 mutex_exit(&so->so_lock); 1659 flags |= MSG_NOMARK; 1660 goto retry; 1661 } 1662 1663 goto out_locked; 1664 } 1665 /* so_queue_msg has already verified length and alignment */ 1666 tpr = (union T_primitives *)mctlp->b_rptr; 1667 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1668 switch (tpr->type) { 1669 case T_DATA_IND: { 1670 /* 1671 * Set msg_flags to MSG_EOR based on 1672 * MORE_flag and MOREDATA. 1673 */ 1674 mutex_enter(&so->so_lock); 1675 so->so_state &= ~SS_SAVEDEOR; 1676 if (!(tpr->data_ind.MORE_flag & 1)) { 1677 if (!(rval.r_val1 & MOREDATA)) 1678 msg->msg_flags |= MSG_EOR; 1679 else 1680 so->so_state |= SS_SAVEDEOR; 1681 } 1682 freemsg(mctlp); 1683 /* 1684 * If some data was received (i.e. not EOF) and the 1685 * read/recv* has not been satisfied wait for some more. 1686 */ 1687 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1688 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1689 mutex_exit(&so->so_lock); 1690 flags |= MSG_NOMARK; 1691 goto retry; 1692 } 1693 goto out_locked; 1694 } 1695 case T_UNITDATA_IND: { 1696 void *addr; 1697 t_uscalar_t addrlen; 1698 void *abuf; 1699 t_uscalar_t optlen; 1700 void *opt; 1701 1702 if (namelen != 0) { 1703 /* Caller wants source address */ 1704 addrlen = tpr->unitdata_ind.SRC_length; 1705 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1706 addrlen, 1); 1707 if (addr == NULL) { 1708 freemsg(mctlp); 1709 error = EPROTO; 1710 eprintsoline(so, error); 1711 goto out; 1712 } 1713 ASSERT(so->so_family != AF_UNIX); 1714 } 1715 optlen = tpr->unitdata_ind.OPT_length; 1716 if (optlen != 0) { 1717 t_uscalar_t ncontrollen; 1718 1719 /* 1720 * Extract any source address option. 1721 * Determine how large cmsg buffer is needed. 1722 */ 1723 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1724 optlen, __TPI_ALIGN_SIZE); 1725 1726 if (opt == NULL) { 1727 freemsg(mctlp); 1728 error = EPROTO; 1729 eprintsoline(so, error); 1730 goto out; 1731 } 1732 if (so->so_family == AF_UNIX) 1733 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1734 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1735 !(flags & MSG_XPG4_2)); 1736 if (controllen != 0) 1737 controllen = ncontrollen; 1738 else if (ncontrollen != 0) 1739 msg->msg_flags |= MSG_CTRUNC; 1740 } else { 1741 controllen = 0; 1742 } 1743 1744 if (namelen != 0) { 1745 /* 1746 * Return address to caller. 1747 * Caller handles truncation if length 1748 * exceeds msg_namelen. 1749 * NOTE: AF_UNIX NUL termination is ensured by 1750 * the sender's copyin_name(). 1751 */ 1752 abuf = kmem_alloc(addrlen, KM_SLEEP); 1753 1754 bcopy(addr, abuf, addrlen); 1755 msg->msg_name = abuf; 1756 msg->msg_namelen = addrlen; 1757 } 1758 1759 if (controllen != 0) { 1760 /* 1761 * Return control msg to caller. 1762 * Caller handles truncation if length 1763 * exceeds msg_controllen. 1764 */ 1765 control = kmem_zalloc(controllen, KM_SLEEP); 1766 1767 error = so_opt2cmsg(mctlp, opt, optlen, 1768 !(flags & MSG_XPG4_2), control, controllen); 1769 if (error) { 1770 freemsg(mctlp); 1771 if (msg->msg_namelen != 0) 1772 kmem_free(msg->msg_name, 1773 msg->msg_namelen); 1774 kmem_free(control, controllen); 1775 eprintsoline(so, error); 1776 goto out; 1777 } 1778 msg->msg_control = control; 1779 msg->msg_controllen = controllen; 1780 } 1781 1782 freemsg(mctlp); 1783 goto out; 1784 } 1785 case T_OPTDATA_IND: { 1786 struct T_optdata_req *tdr; 1787 void *opt; 1788 t_uscalar_t optlen; 1789 1790 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1791 optlen = tdr->OPT_length; 1792 if (optlen != 0) { 1793 t_uscalar_t ncontrollen; 1794 /* 1795 * Determine how large cmsg buffer is needed. 1796 */ 1797 opt = sogetoff(mctlp, 1798 tpr->optdata_ind.OPT_offset, optlen, 1799 __TPI_ALIGN_SIZE); 1800 1801 if (opt == NULL) { 1802 freemsg(mctlp); 1803 error = EPROTO; 1804 eprintsoline(so, error); 1805 goto out; 1806 } 1807 1808 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1809 !(flags & MSG_XPG4_2)); 1810 if (controllen != 0) 1811 controllen = ncontrollen; 1812 else if (ncontrollen != 0) 1813 msg->msg_flags |= MSG_CTRUNC; 1814 } else { 1815 controllen = 0; 1816 } 1817 1818 if (controllen != 0) { 1819 /* 1820 * Return control msg to caller. 1821 * Caller handles truncation if length 1822 * exceeds msg_controllen. 1823 */ 1824 control = kmem_zalloc(controllen, KM_SLEEP); 1825 1826 error = so_opt2cmsg(mctlp, opt, optlen, 1827 !(flags & MSG_XPG4_2), control, controllen); 1828 if (error) { 1829 freemsg(mctlp); 1830 kmem_free(control, controllen); 1831 eprintsoline(so, error); 1832 goto out; 1833 } 1834 msg->msg_control = control; 1835 msg->msg_controllen = controllen; 1836 } 1837 1838 /* 1839 * Set msg_flags to MSG_EOR based on 1840 * DATA_flag and MOREDATA. 1841 */ 1842 mutex_enter(&so->so_lock); 1843 so->so_state &= ~SS_SAVEDEOR; 1844 if (!(tpr->data_ind.MORE_flag & 1)) { 1845 if (!(rval.r_val1 & MOREDATA)) 1846 msg->msg_flags |= MSG_EOR; 1847 else 1848 so->so_state |= SS_SAVEDEOR; 1849 } 1850 freemsg(mctlp); 1851 /* 1852 * If some data was received (i.e. not EOF) and the 1853 * read/recv* has not been satisfied wait for some more. 1854 * Not possible to wait if control info was received. 1855 */ 1856 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1857 controllen == 0 && 1858 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1859 mutex_exit(&so->so_lock); 1860 flags |= MSG_NOMARK; 1861 goto retry; 1862 } 1863 goto out_locked; 1864 } 1865 default: 1866 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1867 tpr->type); 1868 freemsg(mctlp); 1869 error = EPROTO; 1870 ASSERT(0); 1871 } 1872 out: 1873 mutex_enter(&so->so_lock); 1874 out_locked: 1875 ret = sod_rcv_done(so, suiop, uiop); 1876 if (ret != 0 && error == 0) 1877 error = ret; 1878 1879 so_unlock_read(so); /* Clear SOREADLOCKED */ 1880 mutex_exit(&so->so_lock); 1881 1882 SO_UNBLOCK_FALLBACK(so); 1883 1884 return (error); 1885 } 1886 1887 sonodeops_t so_sonodeops = { 1888 so_init, /* sop_init */ 1889 so_accept, /* sop_accept */ 1890 so_bind, /* sop_bind */ 1891 so_listen, /* sop_listen */ 1892 so_connect, /* sop_connect */ 1893 so_recvmsg, /* sop_recvmsg */ 1894 so_sendmsg, /* sop_sendmsg */ 1895 so_sendmblk, /* sop_sendmblk */ 1896 so_getpeername, /* sop_getpeername */ 1897 so_getsockname, /* sop_getsockname */ 1898 so_shutdown, /* sop_shutdown */ 1899 so_getsockopt, /* sop_getsockopt */ 1900 so_setsockopt, /* sop_setsockopt */ 1901 so_ioctl, /* sop_ioctl */ 1902 so_poll, /* sop_poll */ 1903 so_close, /* sop_close */ 1904 }; 1905 1906 sock_upcalls_t so_upcalls = { 1907 so_newconn, 1908 so_connected, 1909 so_disconnected, 1910 so_opctl, 1911 so_queue_msg, 1912 so_set_prop, 1913 so_txq_full, 1914 so_signal_oob, 1915 so_zcopy_notify, 1916 so_set_error, 1917 so_closed 1918 }; 1919