1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* This file contains all TCP kernel socket related functions. */ 27 28 #include <sys/types.h> 29 #include <sys/strlog.h> 30 #include <sys/policy.h> 31 #include <sys/sockio.h> 32 #include <sys/strsubr.h> 33 #include <sys/strsun.h> 34 #include <sys/squeue_impl.h> 35 #include <sys/squeue.h> 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/tpicommon.h> 39 #include <sys/socketvar.h> 40 41 #include <inet/common.h> 42 #include <inet/proto_set.h> 43 #include <inet/ip.h> 44 #include <inet/tcp.h> 45 #include <inet/tcp_impl.h> 46 47 static void tcp_activate(sock_lower_handle_t, sock_upper_handle_t, 48 sock_upcalls_t *, int, cred_t *); 49 static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t, 50 sock_upper_handle_t, cred_t *); 51 static int tcp_bind(sock_lower_handle_t, struct sockaddr *, 52 socklen_t, cred_t *); 53 static int tcp_listen(sock_lower_handle_t, int, cred_t *); 54 static int tcp_connect(sock_lower_handle_t, const struct sockaddr *, 55 socklen_t, sock_connid_t *, cred_t *); 56 static int tcp_getsockopt(sock_lower_handle_t, int, int, void *, 57 socklen_t *, cred_t *); 58 static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *, 59 socklen_t, cred_t *); 60 static int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *, 61 cred_t *cr); 62 static int tcp_shutdown(sock_lower_handle_t, int, cred_t *); 63 static void tcp_clr_flowctrl(sock_lower_handle_t); 64 static int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *, 65 cred_t *); 66 static int tcp_close(sock_lower_handle_t, int, cred_t *); 67 68 sock_downcalls_t sock_tcp_downcalls = { 69 tcp_activate, 70 tcp_accept, 71 tcp_bind, 72 tcp_listen, 73 tcp_connect, 74 tcp_getpeername, 75 tcp_getsockname, 76 tcp_getsockopt, 77 tcp_setsockopt, 78 tcp_sendmsg, 79 NULL, 80 NULL, 81 NULL, 82 tcp_shutdown, 83 tcp_clr_flowctrl, 84 tcp_ioctl, 85 tcp_close, 86 }; 87 88 /* ARGSUSED */ 89 static void 90 tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 91 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 92 { 93 conn_t *connp = (conn_t *)proto_handle; 94 struct sock_proto_props sopp; 95 extern struct module_info tcp_rinfo; 96 97 ASSERT(connp->conn_upper_handle == NULL); 98 99 /* All Solaris components should pass a cred for this operation. */ 100 ASSERT(cr != NULL); 101 102 sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 103 SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER | 104 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ; 105 106 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; 107 sopp.sopp_rxlowat = SOCKET_RECVLOWATER; 108 sopp.sopp_maxpsz = INFPSZ; 109 sopp.sopp_maxblk = INFPSZ; 110 sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL; 111 sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3; 112 sopp.sopp_maxaddrlen = sizeof (sin6_t); 113 sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 : 114 tcp_rinfo.mi_minpsz; 115 116 connp->conn_upcalls = sock_upcalls; 117 connp->conn_upper_handle = sock_handle; 118 119 ASSERT(connp->conn_rcvbuf != 0 && 120 connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd); 121 (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp); 122 } 123 124 static int 125 tcp_accept(sock_lower_handle_t lproto_handle, 126 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 127 cred_t *cr) 128 { 129 conn_t *lconnp, *econnp; 130 tcp_t *listener, *eager; 131 132 lconnp = (conn_t *)lproto_handle; 133 listener = lconnp->conn_tcp; 134 ASSERT(listener->tcp_state == TCPS_LISTEN); 135 econnp = (conn_t *)eproto_handle; 136 eager = econnp->conn_tcp; 137 ASSERT(eager->tcp_listener != NULL); 138 139 /* 140 * It is OK to manipulate these fields outside the eager's squeue 141 * because they will not start being used until tcp_accept_finish 142 * has been called. 143 */ 144 ASSERT(lconnp->conn_upper_handle != NULL); 145 ASSERT(econnp->conn_upper_handle == NULL); 146 econnp->conn_upper_handle = sock_handle; 147 econnp->conn_upcalls = lconnp->conn_upcalls; 148 ASSERT(IPCL_IS_NONSTR(econnp)); 149 return (tcp_accept_common(lconnp, econnp, cr)); 150 } 151 152 static int 153 tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 154 socklen_t len, cred_t *cr) 155 { 156 int error; 157 conn_t *connp = (conn_t *)proto_handle; 158 159 /* All Solaris components should pass a cred for this operation. */ 160 ASSERT(cr != NULL); 161 ASSERT(connp->conn_upper_handle != NULL); 162 163 error = squeue_synch_enter(connp, NULL); 164 if (error != 0) { 165 /* failed to enter */ 166 return (ENOSR); 167 } 168 169 /* binding to a NULL address really means unbind */ 170 if (sa == NULL) { 171 if (connp->conn_tcp->tcp_state < TCPS_LISTEN) 172 error = tcp_do_unbind(connp); 173 else 174 error = EINVAL; 175 } else { 176 error = tcp_do_bind(connp, sa, len, cr, B_TRUE); 177 } 178 179 squeue_synch_exit(connp); 180 181 if (error < 0) { 182 if (error == -TOUTSTATE) 183 error = EINVAL; 184 else 185 error = proto_tlitosyserr(-error); 186 } 187 188 return (error); 189 } 190 191 /* 192 * SOP_LISTEN() calls into tcp_listen(). 193 */ 194 /* ARGSUSED */ 195 static int 196 tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 197 { 198 conn_t *connp = (conn_t *)proto_handle; 199 int error; 200 201 ASSERT(connp->conn_upper_handle != NULL); 202 203 /* All Solaris components should pass a cred for this operation. */ 204 ASSERT(cr != NULL); 205 206 error = squeue_synch_enter(connp, NULL); 207 if (error != 0) { 208 /* failed to enter */ 209 return (ENOBUFS); 210 } 211 212 error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE); 213 if (error == 0) { 214 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 215 SOCK_OPCTL_ENAB_ACCEPT, (uintptr_t)backlog); 216 } else if (error < 0) { 217 if (error == -TOUTSTATE) 218 error = EINVAL; 219 else 220 error = proto_tlitosyserr(-error); 221 } 222 squeue_synch_exit(connp); 223 return (error); 224 } 225 226 static int 227 tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 228 socklen_t len, sock_connid_t *id, cred_t *cr) 229 { 230 conn_t *connp = (conn_t *)proto_handle; 231 int error; 232 233 ASSERT(connp->conn_upper_handle != NULL); 234 235 /* All Solaris components should pass a cred for this operation. */ 236 ASSERT(cr != NULL); 237 238 error = proto_verify_ip_addr(connp->conn_family, sa, len); 239 if (error != 0) { 240 return (error); 241 } 242 243 error = squeue_synch_enter(connp, NULL); 244 if (error != 0) { 245 /* failed to enter */ 246 return (ENOSR); 247 } 248 249 /* 250 * TCP supports quick connect, so no need to do an implicit bind 251 */ 252 error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid); 253 if (error == 0) { 254 *id = connp->conn_tcp->tcp_connid; 255 } else if (error < 0) { 256 if (error == -TOUTSTATE) { 257 switch (connp->conn_tcp->tcp_state) { 258 case TCPS_SYN_SENT: 259 error = EALREADY; 260 break; 261 case TCPS_ESTABLISHED: 262 error = EISCONN; 263 break; 264 case TCPS_LISTEN: 265 error = EOPNOTSUPP; 266 break; 267 default: 268 error = EINVAL; 269 break; 270 } 271 } else { 272 error = proto_tlitosyserr(-error); 273 } 274 } 275 276 if (connp->conn_tcp->tcp_loopback) { 277 struct sock_proto_props sopp; 278 279 sopp.sopp_flags = SOCKOPT_LOOPBACK; 280 sopp.sopp_loopback = B_TRUE; 281 282 (*connp->conn_upcalls->su_set_proto_props)( 283 connp->conn_upper_handle, &sopp); 284 } 285 done: 286 squeue_synch_exit(connp); 287 288 return ((error == 0) ? EINPROGRESS : error); 289 } 290 291 /* ARGSUSED3 */ 292 int 293 tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr, 294 socklen_t *addrlenp, cred_t *cr) 295 { 296 conn_t *connp = (conn_t *)proto_handle; 297 tcp_t *tcp = connp->conn_tcp; 298 299 ASSERT(connp->conn_upper_handle != NULL); 300 /* All Solaris components should pass a cred for this operation. */ 301 ASSERT(cr != NULL); 302 303 ASSERT(tcp != NULL); 304 if (tcp->tcp_state < TCPS_SYN_RCVD) 305 return (ENOTCONN); 306 307 return (conn_getpeername(connp, addr, addrlenp)); 308 } 309 310 /* ARGSUSED3 */ 311 int 312 tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr, 313 socklen_t *addrlenp, cred_t *cr) 314 { 315 conn_t *connp = (conn_t *)proto_handle; 316 317 /* All Solaris components should pass a cred for this operation. */ 318 ASSERT(cr != NULL); 319 320 ASSERT(connp->conn_upper_handle != NULL); 321 return (conn_getsockname(connp, addr, addrlenp)); 322 } 323 324 /* returns UNIX error, the optlen is a value-result arg */ 325 static int 326 tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 327 void *optvalp, socklen_t *optlen, cred_t *cr) 328 { 329 conn_t *connp = (conn_t *)proto_handle; 330 int error; 331 t_uscalar_t max_optbuf_len; 332 void *optvalp_buf; 333 int len; 334 335 ASSERT(connp->conn_upper_handle != NULL); 336 337 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 338 tcp_opt_obj.odb_opt_des_arr, 339 tcp_opt_obj.odb_opt_arr_cnt, 340 B_FALSE, B_TRUE, cr); 341 if (error != 0) { 342 if (error < 0) { 343 error = proto_tlitosyserr(-error); 344 } 345 return (error); 346 } 347 348 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 349 350 error = squeue_synch_enter(connp, NULL); 351 if (error == ENOMEM) { 352 kmem_free(optvalp_buf, max_optbuf_len); 353 return (ENOMEM); 354 } 355 356 len = tcp_opt_get(connp, level, option_name, optvalp_buf); 357 squeue_synch_exit(connp); 358 359 if (len == -1) { 360 kmem_free(optvalp_buf, max_optbuf_len); 361 return (EINVAL); 362 } 363 364 /* 365 * update optlen and copy option value 366 */ 367 t_uscalar_t size = MIN(len, *optlen); 368 369 bcopy(optvalp_buf, optvalp, size); 370 bcopy(&size, optlen, sizeof (size)); 371 372 kmem_free(optvalp_buf, max_optbuf_len); 373 return (0); 374 } 375 376 static int 377 tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 378 const void *optvalp, socklen_t optlen, cred_t *cr) 379 { 380 conn_t *connp = (conn_t *)proto_handle; 381 int error; 382 383 ASSERT(connp->conn_upper_handle != NULL); 384 /* 385 * Entering the squeue synchronously can result in a context switch, 386 * which can cause a rather sever performance degradation. So we try to 387 * handle whatever options we can without entering the squeue. 388 */ 389 if (level == IPPROTO_TCP) { 390 switch (option_name) { 391 case TCP_NODELAY: 392 if (optlen != sizeof (int32_t)) 393 return (EINVAL); 394 mutex_enter(&connp->conn_tcp->tcp_non_sq_lock); 395 connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 : 396 connp->conn_tcp->tcp_mss; 397 mutex_exit(&connp->conn_tcp->tcp_non_sq_lock); 398 return (0); 399 default: 400 break; 401 } 402 } 403 404 error = squeue_synch_enter(connp, NULL); 405 if (error == ENOMEM) { 406 return (ENOMEM); 407 } 408 409 error = proto_opt_check(level, option_name, optlen, NULL, 410 tcp_opt_obj.odb_opt_des_arr, 411 tcp_opt_obj.odb_opt_arr_cnt, 412 B_TRUE, B_FALSE, cr); 413 414 if (error != 0) { 415 if (error < 0) { 416 error = proto_tlitosyserr(-error); 417 } 418 squeue_synch_exit(connp); 419 return (error); 420 } 421 422 error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 423 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 424 NULL, cr); 425 squeue_synch_exit(connp); 426 427 ASSERT(error >= 0); 428 429 return (error); 430 } 431 432 /* ARGSUSED */ 433 static int 434 tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 435 cred_t *cr) 436 { 437 tcp_t *tcp; 438 uint32_t msize; 439 conn_t *connp = (conn_t *)proto_handle; 440 int32_t tcpstate; 441 442 /* All Solaris components should pass a cred for this operation. */ 443 ASSERT(cr != NULL); 444 445 ASSERT(connp->conn_ref >= 2); 446 ASSERT(connp->conn_upper_handle != NULL); 447 448 if (msg->msg_controllen != 0) { 449 freemsg(mp); 450 return (EOPNOTSUPP); 451 } 452 453 switch (DB_TYPE(mp)) { 454 case M_DATA: 455 tcp = connp->conn_tcp; 456 ASSERT(tcp != NULL); 457 458 tcpstate = tcp->tcp_state; 459 if (tcpstate < TCPS_ESTABLISHED) { 460 freemsg(mp); 461 /* 462 * We return ENOTCONN if the endpoint is trying to 463 * connect or has never been connected, and EPIPE if it 464 * has been disconnected. The connection id helps us 465 * distinguish between the last two cases. 466 */ 467 return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN : 468 ((tcp->tcp_connid > 0) ? EPIPE : ENOTCONN)); 469 } else if (tcpstate > TCPS_CLOSE_WAIT) { 470 freemsg(mp); 471 return (EPIPE); 472 } 473 474 msize = msgdsize(mp); 475 476 mutex_enter(&tcp->tcp_non_sq_lock); 477 tcp->tcp_squeue_bytes += msize; 478 /* 479 * Squeue Flow Control 480 */ 481 if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) { 482 tcp_setqfull(tcp); 483 } 484 mutex_exit(&tcp->tcp_non_sq_lock); 485 486 /* 487 * The application may pass in an address in the msghdr, but 488 * we ignore the address on connection-oriented sockets. 489 * Just like BSD this code does not generate an error for 490 * TCP (a CONNREQUIRED socket) when sending to an address 491 * passed in with sendto/sendmsg. Instead the data is 492 * delivered on the connection as if no address had been 493 * supplied. 494 */ 495 CONN_INC_REF(connp); 496 497 if (msg->msg_flags & MSG_OOB) { 498 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent, 499 connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 500 } else { 501 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output, 502 connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 503 } 504 505 return (0); 506 507 default: 508 ASSERT(0); 509 } 510 511 freemsg(mp); 512 return (0); 513 } 514 515 /* ARGSUSED */ 516 static int 517 tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 518 { 519 conn_t *connp = (conn_t *)proto_handle; 520 tcp_t *tcp = connp->conn_tcp; 521 522 ASSERT(connp->conn_upper_handle != NULL); 523 524 /* All Solaris components should pass a cred for this operation. */ 525 ASSERT(cr != NULL); 526 527 /* 528 * X/Open requires that we check the connected state. 529 */ 530 if (tcp->tcp_state < TCPS_SYN_SENT) 531 return (ENOTCONN); 532 533 /* shutdown the send side */ 534 if (how != SHUT_RD) { 535 mblk_t *bp; 536 537 bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL); 538 CONN_INC_REF(connp); 539 SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output, 540 connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT); 541 542 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 543 SOCK_OPCTL_SHUT_SEND, 0); 544 } 545 546 /* shutdown the recv side */ 547 if (how != SHUT_WR) 548 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 549 SOCK_OPCTL_SHUT_RECV, 0); 550 551 return (0); 552 } 553 554 static void 555 tcp_clr_flowctrl(sock_lower_handle_t proto_handle) 556 { 557 conn_t *connp = (conn_t *)proto_handle; 558 tcp_t *tcp = connp->conn_tcp; 559 mblk_t *mp; 560 int error; 561 562 ASSERT(connp->conn_upper_handle != NULL); 563 564 /* 565 * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl() 566 * is currently running. 567 */ 568 mutex_enter(&tcp->tcp_rsrv_mp_lock); 569 if ((mp = tcp->tcp_rsrv_mp) == NULL) { 570 mutex_exit(&tcp->tcp_rsrv_mp_lock); 571 return; 572 } 573 tcp->tcp_rsrv_mp = NULL; 574 mutex_exit(&tcp->tcp_rsrv_mp_lock); 575 576 error = squeue_synch_enter(connp, mp); 577 ASSERT(error == 0); 578 579 mutex_enter(&tcp->tcp_rsrv_mp_lock); 580 tcp->tcp_rsrv_mp = mp; 581 mutex_exit(&tcp->tcp_rsrv_mp_lock); 582 583 if (tcp->tcp_fused) { 584 tcp_fuse_backenable(tcp); 585 } else { 586 tcp->tcp_rwnd = connp->conn_rcvbuf; 587 /* 588 * Send back a window update immediately if TCP is above 589 * ESTABLISHED state and the increase of the rcv window 590 * that the other side knows is at least 1 MSS after flow 591 * control is lifted. 592 */ 593 if (tcp->tcp_state >= TCPS_ESTABLISHED && 594 tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) { 595 tcp_xmit_ctl(NULL, tcp, 596 (tcp->tcp_swnd == 0) ? tcp->tcp_suna : 597 tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); 598 } 599 } 600 601 squeue_synch_exit(connp); 602 } 603 604 /* ARGSUSED */ 605 static int 606 tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 607 int mode, int32_t *rvalp, cred_t *cr) 608 { 609 conn_t *connp = (conn_t *)proto_handle; 610 int error; 611 612 ASSERT(connp->conn_upper_handle != NULL); 613 614 /* All Solaris components should pass a cred for this operation. */ 615 ASSERT(cr != NULL); 616 617 /* 618 * If we don't have a helper stream then create one. 619 * ip_create_helper_stream takes care of locking the conn_t, 620 * so this check for NULL is just a performance optimization. 621 */ 622 if (connp->conn_helper_info == NULL) { 623 tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps; 624 625 /* 626 * Create a helper stream for non-STREAMS socket. 627 */ 628 error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident); 629 if (error != 0) { 630 ip0dbg(("tcp_ioctl: create of IP helper stream " 631 "failed %d\n", error)); 632 return (error); 633 } 634 } 635 636 switch (cmd) { 637 case ND_SET: 638 case ND_GET: 639 case _SIOCSOCKFALLBACK: 640 case TCP_IOC_ABORT_CONN: 641 case TI_GETPEERNAME: 642 case TI_GETMYNAME: 643 ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket", 644 cmd)); 645 error = EINVAL; 646 break; 647 default: 648 /* 649 * If the conn is not closing, pass on to IP using 650 * helper stream. Bump the ioctlref to prevent tcp_close 651 * from closing the rq/wq out from underneath the ioctl 652 * if it ends up queued or aborted/interrupted. 653 */ 654 mutex_enter(&connp->conn_lock); 655 if (connp->conn_state_flags & (CONN_CLOSING)) { 656 mutex_exit(&connp->conn_lock); 657 error = EINVAL; 658 break; 659 } 660 CONN_INC_IOCTLREF_LOCKED(connp); 661 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 662 cmd, arg, mode, cr, rvalp); 663 CONN_DEC_IOCTLREF(connp); 664 break; 665 } 666 return (error); 667 } 668 669 /* ARGSUSED */ 670 static int 671 tcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 672 { 673 conn_t *connp = (conn_t *)proto_handle; 674 675 ASSERT(connp->conn_upper_handle != NULL); 676 677 /* All Solaris components should pass a cred for this operation. */ 678 ASSERT(cr != NULL); 679 680 tcp_close_common(connp, flags); 681 682 ip_free_helper_stream(connp); 683 684 /* 685 * Drop IP's reference on the conn. This is the last reference 686 * on the connp if the state was less than established. If the 687 * connection has gone into timewait state, then we will have 688 * one ref for the TCP and one more ref (total of two) for the 689 * classifier connected hash list (a timewait connections stays 690 * in connected hash till closed). 691 * 692 * We can't assert the references because there might be other 693 * transient reference places because of some walkers or queued 694 * packets in squeue for the timewait state. 695 */ 696 CONN_DEC_REF(connp); 697 return (0); 698 } 699 700 /* ARGSUSED */ 701 sock_lower_handle_t 702 tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 703 uint_t *smodep, int *errorp, int flags, cred_t *credp) 704 { 705 conn_t *connp; 706 boolean_t isv6 = family == AF_INET6; 707 if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) || 708 (proto != 0 && proto != IPPROTO_TCP)) { 709 *errorp = EPROTONOSUPPORT; 710 return (NULL); 711 } 712 713 connp = tcp_create_common(credp, isv6, B_TRUE, errorp); 714 if (connp == NULL) { 715 return (NULL); 716 } 717 718 /* 719 * Put the ref for TCP. Ref for IP was already put 720 * by ipcl_conn_create. Also Make the conn_t globally 721 * visible to walkers 722 */ 723 mutex_enter(&connp->conn_lock); 724 CONN_INC_REF_LOCKED(connp); 725 ASSERT(connp->conn_ref == 2); 726 connp->conn_state_flags &= ~CONN_INCIPIENT; 727 728 connp->conn_flags |= IPCL_NONSTR; 729 mutex_exit(&connp->conn_lock); 730 731 ASSERT(errorp != NULL); 732 *errorp = 0; 733 *sock_downcalls = &sock_tcp_downcalls; 734 *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP | 735 SM_SENDFILESUPP; 736 737 return ((sock_lower_handle_t)connp); 738 } 739 740 int 741 tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 742 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 743 { 744 tcp_t *tcp; 745 conn_t *connp = (conn_t *)proto_handle; 746 int error; 747 mblk_t *stropt_mp; 748 mblk_t *ordrel_mp; 749 750 tcp = connp->conn_tcp; 751 752 stropt_mp = allocb_wait(sizeof (struct stroptions), BPRI_HI, STR_NOSIG, 753 NULL); 754 755 /* Pre-allocate the T_ordrel_ind mblk. */ 756 ASSERT(tcp->tcp_ordrel_mp == NULL); 757 ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI, 758 STR_NOSIG, NULL); 759 ordrel_mp->b_datap->db_type = M_PROTO; 760 ((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND; 761 ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind); 762 763 /* 764 * Enter the squeue so that no new packets can come in 765 */ 766 error = squeue_synch_enter(connp, NULL); 767 if (error != 0) { 768 /* failed to enter, free all the pre-allocated messages. */ 769 freeb(stropt_mp); 770 freeb(ordrel_mp); 771 /* 772 * We cannot process the eager, so at least send out a 773 * RST so the peer can reconnect. 774 */ 775 if (tcp->tcp_listener != NULL) { 776 (void) tcp_eager_blowoff(tcp->tcp_listener, 777 tcp->tcp_conn_req_seqnum); 778 } 779 return (ENOMEM); 780 } 781 782 /* 783 * Both endpoints must be of the same type (either STREAMS or 784 * non-STREAMS) for fusion to be enabled. So if we are fused, 785 * we have to unfuse. 786 */ 787 if (tcp->tcp_fused) 788 tcp_unfuse(tcp); 789 790 /* 791 * No longer a direct socket 792 */ 793 connp->conn_flags &= ~IPCL_NONSTR; 794 tcp->tcp_ordrel_mp = ordrel_mp; 795 796 if (tcp->tcp_listener != NULL) { 797 /* The eager will deal with opts when accept() is called */ 798 freeb(stropt_mp); 799 tcp_fallback_eager(tcp, direct_sockfs); 800 } else { 801 tcp_fallback_noneager(tcp, stropt_mp, q, direct_sockfs, 802 quiesced_cb); 803 } 804 805 /* 806 * There should be atleast two ref's (IP + TCP) 807 */ 808 ASSERT(connp->conn_ref >= 2); 809 squeue_synch_exit(connp); 810 811 return (0); 812 } 813