1721fffe3SKacheong Poon /* 2721fffe3SKacheong Poon * CDDL HEADER START 3721fffe3SKacheong Poon * 4721fffe3SKacheong Poon * The contents of this file are subject to the terms of the 5721fffe3SKacheong Poon * Common Development and Distribution License (the "License"). 6721fffe3SKacheong Poon * You may not use this file except in compliance with the License. 7721fffe3SKacheong Poon * 8721fffe3SKacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9721fffe3SKacheong Poon * or http://www.opensolaris.org/os/licensing. 10721fffe3SKacheong Poon * See the License for the specific language governing permissions 11721fffe3SKacheong Poon * and limitations under the License. 12721fffe3SKacheong Poon * 13721fffe3SKacheong Poon * When distributing Covered Code, include this CDDL HEADER in each 14721fffe3SKacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15721fffe3SKacheong Poon * If applicable, add the following below this CDDL HEADER, with the 16721fffe3SKacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying 17721fffe3SKacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner] 18721fffe3SKacheong Poon * 19721fffe3SKacheong Poon * CDDL HEADER END 20721fffe3SKacheong Poon */ 21721fffe3SKacheong Poon 22721fffe3SKacheong Poon /* 239ee3959aSAnders Persson * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24721fffe3SKacheong Poon */ 25721fffe3SKacheong Poon 26721fffe3SKacheong Poon /* This file contains all TCP kernel socket related functions. */ 27721fffe3SKacheong Poon 28721fffe3SKacheong Poon #include <sys/types.h> 29721fffe3SKacheong Poon #include <sys/strlog.h> 30721fffe3SKacheong Poon #include <sys/policy.h> 31721fffe3SKacheong Poon #include <sys/sockio.h> 32721fffe3SKacheong Poon #include <sys/strsubr.h> 33721fffe3SKacheong Poon #include <sys/strsun.h> 34721fffe3SKacheong Poon #include <sys/squeue_impl.h> 35721fffe3SKacheong Poon #include <sys/squeue.h> 363e95bd4aSAnders Persson #define _SUN_TPI_VERSION 2 37721fffe3SKacheong Poon #include <sys/tihdr.h> 38721fffe3SKacheong Poon #include <sys/timod.h> 39721fffe3SKacheong Poon #include <sys/tpicommon.h> 40721fffe3SKacheong Poon #include <sys/socketvar.h> 41721fffe3SKacheong Poon 42721fffe3SKacheong Poon #include <inet/common.h> 43721fffe3SKacheong Poon #include <inet/proto_set.h> 44721fffe3SKacheong Poon #include <inet/ip.h> 45721fffe3SKacheong Poon #include <inet/tcp.h> 46721fffe3SKacheong Poon #include <inet/tcp_impl.h> 47721fffe3SKacheong Poon 48721fffe3SKacheong Poon static void tcp_activate(sock_lower_handle_t, sock_upper_handle_t, 49721fffe3SKacheong Poon sock_upcalls_t *, int, cred_t *); 50721fffe3SKacheong Poon static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t, 51721fffe3SKacheong Poon sock_upper_handle_t, cred_t *); 52721fffe3SKacheong Poon static int tcp_bind(sock_lower_handle_t, struct sockaddr *, 53721fffe3SKacheong Poon socklen_t, cred_t *); 54721fffe3SKacheong Poon static int tcp_listen(sock_lower_handle_t, int, cred_t *); 55721fffe3SKacheong Poon static int tcp_connect(sock_lower_handle_t, const struct sockaddr *, 56721fffe3SKacheong Poon socklen_t, sock_connid_t *, cred_t *); 57*ca3c8f41SDavid Höppner static int tcp_getpeername(sock_lower_handle_t, struct sockaddr *, 58*ca3c8f41SDavid Höppner socklen_t *, cred_t *); 59*ca3c8f41SDavid Höppner static int tcp_getsockname(sock_lower_handle_t, struct sockaddr *, 60*ca3c8f41SDavid Höppner socklen_t *, cred_t *); 61721fffe3SKacheong Poon static int tcp_getsockopt(sock_lower_handle_t, int, int, void *, 62721fffe3SKacheong Poon socklen_t *, cred_t *); 63721fffe3SKacheong Poon static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *, 64721fffe3SKacheong Poon socklen_t, cred_t *); 65721fffe3SKacheong Poon static int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *, 66*ca3c8f41SDavid Höppner cred_t *); 67721fffe3SKacheong Poon static int tcp_shutdown(sock_lower_handle_t, int, cred_t *); 68721fffe3SKacheong Poon static void tcp_clr_flowctrl(sock_lower_handle_t); 69721fffe3SKacheong Poon static int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *, 70721fffe3SKacheong Poon cred_t *); 71721fffe3SKacheong Poon static int tcp_close(sock_lower_handle_t, int, cred_t *); 72721fffe3SKacheong Poon 73721fffe3SKacheong Poon sock_downcalls_t sock_tcp_downcalls = { 74721fffe3SKacheong Poon tcp_activate, 75721fffe3SKacheong Poon tcp_accept, 76721fffe3SKacheong Poon tcp_bind, 77721fffe3SKacheong Poon tcp_listen, 78721fffe3SKacheong Poon tcp_connect, 79721fffe3SKacheong Poon tcp_getpeername, 80721fffe3SKacheong Poon tcp_getsockname, 81721fffe3SKacheong Poon tcp_getsockopt, 82721fffe3SKacheong Poon tcp_setsockopt, 83721fffe3SKacheong Poon tcp_sendmsg, 84721fffe3SKacheong Poon NULL, 85721fffe3SKacheong Poon NULL, 86721fffe3SKacheong Poon NULL, 87721fffe3SKacheong Poon tcp_shutdown, 88721fffe3SKacheong Poon tcp_clr_flowctrl, 89721fffe3SKacheong Poon tcp_ioctl, 90721fffe3SKacheong Poon tcp_close, 91721fffe3SKacheong Poon }; 92721fffe3SKacheong Poon 93721fffe3SKacheong Poon /* ARGSUSED */ 94721fffe3SKacheong Poon static void 95721fffe3SKacheong Poon tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 96721fffe3SKacheong Poon sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 97721fffe3SKacheong Poon { 98721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 99721fffe3SKacheong Poon struct sock_proto_props sopp; 100721fffe3SKacheong Poon extern struct module_info tcp_rinfo; 101721fffe3SKacheong Poon 102721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle == NULL); 103721fffe3SKacheong Poon 104721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 105721fffe3SKacheong Poon ASSERT(cr != NULL); 106721fffe3SKacheong Poon 107721fffe3SKacheong Poon sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 108721fffe3SKacheong Poon SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER | 109721fffe3SKacheong Poon SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ; 110721fffe3SKacheong Poon 111721fffe3SKacheong Poon sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; 112721fffe3SKacheong Poon sopp.sopp_rxlowat = SOCKET_RECVLOWATER; 113721fffe3SKacheong Poon sopp.sopp_maxpsz = INFPSZ; 114721fffe3SKacheong Poon sopp.sopp_maxblk = INFPSZ; 115721fffe3SKacheong Poon sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL; 116721fffe3SKacheong Poon sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3; 117721fffe3SKacheong Poon sopp.sopp_maxaddrlen = sizeof (sin6_t); 118721fffe3SKacheong Poon sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 : 119721fffe3SKacheong Poon tcp_rinfo.mi_minpsz; 120721fffe3SKacheong Poon 121721fffe3SKacheong Poon connp->conn_upcalls = sock_upcalls; 122721fffe3SKacheong Poon connp->conn_upper_handle = sock_handle; 123721fffe3SKacheong Poon 124721fffe3SKacheong Poon ASSERT(connp->conn_rcvbuf != 0 && 125721fffe3SKacheong Poon connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd); 126721fffe3SKacheong Poon (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp); 127721fffe3SKacheong Poon } 128721fffe3SKacheong Poon 1293e95bd4aSAnders Persson /*ARGSUSED*/ 130721fffe3SKacheong Poon static int 131721fffe3SKacheong Poon tcp_accept(sock_lower_handle_t lproto_handle, 132721fffe3SKacheong Poon sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 133721fffe3SKacheong Poon cred_t *cr) 134721fffe3SKacheong Poon { 135721fffe3SKacheong Poon conn_t *lconnp, *econnp; 136721fffe3SKacheong Poon tcp_t *listener, *eager; 137721fffe3SKacheong Poon 138dd49f125SAnders Persson /* 139dd49f125SAnders Persson * KSSL can move a socket from one listener to another, in which 140dd49f125SAnders Persson * case `lproto_handle' points to the new listener. To ensure that 141dd49f125SAnders Persson * the original listener is used the information is obtained from 142dd49f125SAnders Persson * the eager. 143dd49f125SAnders Persson */ 144721fffe3SKacheong Poon econnp = (conn_t *)eproto_handle; 145721fffe3SKacheong Poon eager = econnp->conn_tcp; 1463e95bd4aSAnders Persson ASSERT(IPCL_IS_NONSTR(econnp)); 147dd49f125SAnders Persson ASSERT(eager->tcp_listener != NULL); 148dd49f125SAnders Persson listener = eager->tcp_listener; 149dd49f125SAnders Persson lconnp = (conn_t *)listener->tcp_connp; 150dd49f125SAnders Persson ASSERT(listener->tcp_state == TCPS_LISTEN); 1513e95bd4aSAnders Persson ASSERT(lconnp->conn_upper_handle != NULL); 152721fffe3SKacheong Poon 153721fffe3SKacheong Poon /* 1543e95bd4aSAnders Persson * It is possible for the accept thread to race with the thread that 1553e95bd4aSAnders Persson * made the su_newconn upcall in tcp_newconn_notify. Both 1563e95bd4aSAnders Persson * tcp_newconn_notify and tcp_accept require that conn_upper_handle 1573e95bd4aSAnders Persson * and conn_upcalls be set before returning, so they both write to 1583e95bd4aSAnders Persson * them. However, we're guaranteed that the value written is the same 1593e95bd4aSAnders Persson * for both threads. 160721fffe3SKacheong Poon */ 1613e95bd4aSAnders Persson ASSERT(econnp->conn_upper_handle == NULL || 1623e95bd4aSAnders Persson econnp->conn_upper_handle == sock_handle); 1633e95bd4aSAnders Persson ASSERT(econnp->conn_upcalls == NULL || 1643e95bd4aSAnders Persson econnp->conn_upcalls == lconnp->conn_upcalls); 165721fffe3SKacheong Poon econnp->conn_upper_handle = sock_handle; 166721fffe3SKacheong Poon econnp->conn_upcalls = lconnp->conn_upcalls; 1673e95bd4aSAnders Persson 1683e95bd4aSAnders Persson ASSERT(econnp->conn_netstack == 1693e95bd4aSAnders Persson listener->tcp_connp->conn_netstack); 1703e95bd4aSAnders Persson ASSERT(eager->tcp_tcps == listener->tcp_tcps); 1713e95bd4aSAnders Persson 1723e95bd4aSAnders Persson /* 1733e95bd4aSAnders Persson * We should have a minimum of 2 references on the conn at this 1743e95bd4aSAnders Persson * point. One for TCP and one for the newconn notification 1753e95bd4aSAnders Persson * (which is now taken over by IP). In the normal case we would 1763e95bd4aSAnders Persson * also have another reference (making a total of 3) for the conn 1773e95bd4aSAnders Persson * being in the classifier hash list. However the eager could have 1783e95bd4aSAnders Persson * received an RST subsequently and tcp_closei_local could have 1793e95bd4aSAnders Persson * removed the eager from the classifier hash list, hence we can't 1803e95bd4aSAnders Persson * assert that reference. 1813e95bd4aSAnders Persson */ 1823e95bd4aSAnders Persson ASSERT(econnp->conn_ref >= 2); 1833e95bd4aSAnders Persson 1843e95bd4aSAnders Persson mutex_enter(&listener->tcp_eager_lock); 1853e95bd4aSAnders Persson /* 1863e95bd4aSAnders Persson * Non-STREAMS listeners never defer the notification of new 1873e95bd4aSAnders Persson * connections. 1883e95bd4aSAnders Persson */ 1893e95bd4aSAnders Persson ASSERT(!listener->tcp_eager_prev_q0->tcp_conn_def_q0); 1903e95bd4aSAnders Persson tcp_eager_unlink(eager); 1913e95bd4aSAnders Persson mutex_exit(&listener->tcp_eager_lock); 1923e95bd4aSAnders Persson CONN_DEC_REF(listener->tcp_connp); 1933e95bd4aSAnders Persson 194b1cd7879SAnders Persson return ((eager->tcp_state < TCPS_ESTABLISHED) ? ECONNABORTED : 0); 195721fffe3SKacheong Poon } 196721fffe3SKacheong Poon 197721fffe3SKacheong Poon static int 198721fffe3SKacheong Poon tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 199721fffe3SKacheong Poon socklen_t len, cred_t *cr) 200721fffe3SKacheong Poon { 201721fffe3SKacheong Poon int error; 202721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 203721fffe3SKacheong Poon 204721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 205721fffe3SKacheong Poon ASSERT(cr != NULL); 206721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 207721fffe3SKacheong Poon 2089ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 209721fffe3SKacheong Poon if (error != 0) { 210721fffe3SKacheong Poon /* failed to enter */ 211721fffe3SKacheong Poon return (ENOSR); 212721fffe3SKacheong Poon } 213721fffe3SKacheong Poon 214721fffe3SKacheong Poon /* binding to a NULL address really means unbind */ 215721fffe3SKacheong Poon if (sa == NULL) { 216721fffe3SKacheong Poon if (connp->conn_tcp->tcp_state < TCPS_LISTEN) 217721fffe3SKacheong Poon error = tcp_do_unbind(connp); 218721fffe3SKacheong Poon else 219721fffe3SKacheong Poon error = EINVAL; 220721fffe3SKacheong Poon } else { 221721fffe3SKacheong Poon error = tcp_do_bind(connp, sa, len, cr, B_TRUE); 222721fffe3SKacheong Poon } 223721fffe3SKacheong Poon 2249ee3959aSAnders Persson squeue_synch_exit(connp); 225721fffe3SKacheong Poon 226721fffe3SKacheong Poon if (error < 0) { 227721fffe3SKacheong Poon if (error == -TOUTSTATE) 228721fffe3SKacheong Poon error = EINVAL; 229721fffe3SKacheong Poon else 230721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 231721fffe3SKacheong Poon } 232721fffe3SKacheong Poon 233721fffe3SKacheong Poon return (error); 234721fffe3SKacheong Poon } 235721fffe3SKacheong Poon 236721fffe3SKacheong Poon /* ARGSUSED */ 237721fffe3SKacheong Poon static int 238721fffe3SKacheong Poon tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 239721fffe3SKacheong Poon { 240721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 2413e95bd4aSAnders Persson tcp_t *tcp = connp->conn_tcp; 242721fffe3SKacheong Poon int error; 243721fffe3SKacheong Poon 244721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 245721fffe3SKacheong Poon 246721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 247721fffe3SKacheong Poon ASSERT(cr != NULL); 248721fffe3SKacheong Poon 2499ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 250721fffe3SKacheong Poon if (error != 0) { 251721fffe3SKacheong Poon /* failed to enter */ 252721fffe3SKacheong Poon return (ENOBUFS); 253721fffe3SKacheong Poon } 254721fffe3SKacheong Poon 255721fffe3SKacheong Poon error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE); 256721fffe3SKacheong Poon if (error == 0) { 2573e95bd4aSAnders Persson /* 2583e95bd4aSAnders Persson * sockfs needs to know what's the maximum number of socket 2593e95bd4aSAnders Persson * that can be queued on the listener. 2603e95bd4aSAnders Persson */ 261721fffe3SKacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 2623e95bd4aSAnders Persson SOCK_OPCTL_ENAB_ACCEPT, 2633e95bd4aSAnders Persson (uintptr_t)(tcp->tcp_conn_req_max + 2643e95bd4aSAnders Persson tcp->tcp_tcps->tcps_conn_req_max_q0)); 265721fffe3SKacheong Poon } else if (error < 0) { 266721fffe3SKacheong Poon if (error == -TOUTSTATE) 267721fffe3SKacheong Poon error = EINVAL; 268721fffe3SKacheong Poon else 269721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 270721fffe3SKacheong Poon } 2719ee3959aSAnders Persson squeue_synch_exit(connp); 272721fffe3SKacheong Poon return (error); 273721fffe3SKacheong Poon } 274721fffe3SKacheong Poon 275721fffe3SKacheong Poon static int 276721fffe3SKacheong Poon tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 277721fffe3SKacheong Poon socklen_t len, sock_connid_t *id, cred_t *cr) 278721fffe3SKacheong Poon { 279721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 280721fffe3SKacheong Poon int error; 281721fffe3SKacheong Poon 282721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 283721fffe3SKacheong Poon 284721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 285721fffe3SKacheong Poon ASSERT(cr != NULL); 286721fffe3SKacheong Poon 287721fffe3SKacheong Poon error = proto_verify_ip_addr(connp->conn_family, sa, len); 288721fffe3SKacheong Poon if (error != 0) { 289721fffe3SKacheong Poon return (error); 290721fffe3SKacheong Poon } 291721fffe3SKacheong Poon 2929ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 293721fffe3SKacheong Poon if (error != 0) { 294721fffe3SKacheong Poon /* failed to enter */ 295721fffe3SKacheong Poon return (ENOSR); 296721fffe3SKacheong Poon } 297721fffe3SKacheong Poon 298721fffe3SKacheong Poon /* 299721fffe3SKacheong Poon * TCP supports quick connect, so no need to do an implicit bind 300721fffe3SKacheong Poon */ 301721fffe3SKacheong Poon error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid); 302721fffe3SKacheong Poon if (error == 0) { 303721fffe3SKacheong Poon *id = connp->conn_tcp->tcp_connid; 304721fffe3SKacheong Poon } else if (error < 0) { 305721fffe3SKacheong Poon if (error == -TOUTSTATE) { 306721fffe3SKacheong Poon switch (connp->conn_tcp->tcp_state) { 307721fffe3SKacheong Poon case TCPS_SYN_SENT: 308721fffe3SKacheong Poon error = EALREADY; 309721fffe3SKacheong Poon break; 310721fffe3SKacheong Poon case TCPS_ESTABLISHED: 311721fffe3SKacheong Poon error = EISCONN; 312721fffe3SKacheong Poon break; 313721fffe3SKacheong Poon case TCPS_LISTEN: 314721fffe3SKacheong Poon error = EOPNOTSUPP; 315721fffe3SKacheong Poon break; 316721fffe3SKacheong Poon default: 317721fffe3SKacheong Poon error = EINVAL; 318721fffe3SKacheong Poon break; 319721fffe3SKacheong Poon } 320721fffe3SKacheong Poon } else { 321721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 322721fffe3SKacheong Poon } 323721fffe3SKacheong Poon } 324721fffe3SKacheong Poon 325721fffe3SKacheong Poon if (connp->conn_tcp->tcp_loopback) { 326721fffe3SKacheong Poon struct sock_proto_props sopp; 327721fffe3SKacheong Poon 328721fffe3SKacheong Poon sopp.sopp_flags = SOCKOPT_LOOPBACK; 329721fffe3SKacheong Poon sopp.sopp_loopback = B_TRUE; 330721fffe3SKacheong Poon 331721fffe3SKacheong Poon (*connp->conn_upcalls->su_set_proto_props)( 332721fffe3SKacheong Poon connp->conn_upper_handle, &sopp); 333721fffe3SKacheong Poon } 334721fffe3SKacheong Poon done: 3359ee3959aSAnders Persson squeue_synch_exit(connp); 336721fffe3SKacheong Poon 337721fffe3SKacheong Poon return ((error == 0) ? EINPROGRESS : error); 338721fffe3SKacheong Poon } 339721fffe3SKacheong Poon 340721fffe3SKacheong Poon /* ARGSUSED3 */ 341*ca3c8f41SDavid Höppner static int 342721fffe3SKacheong Poon tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr, 343721fffe3SKacheong Poon socklen_t *addrlenp, cred_t *cr) 344721fffe3SKacheong Poon { 345721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 346721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 347721fffe3SKacheong Poon 348721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 349721fffe3SKacheong Poon ASSERT(cr != NULL); 350721fffe3SKacheong Poon 351721fffe3SKacheong Poon ASSERT(tcp != NULL); 352721fffe3SKacheong Poon if (tcp->tcp_state < TCPS_SYN_RCVD) 353721fffe3SKacheong Poon return (ENOTCONN); 354721fffe3SKacheong Poon 355721fffe3SKacheong Poon return (conn_getpeername(connp, addr, addrlenp)); 356721fffe3SKacheong Poon } 357721fffe3SKacheong Poon 358721fffe3SKacheong Poon /* ARGSUSED3 */ 359*ca3c8f41SDavid Höppner static int 360721fffe3SKacheong Poon tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr, 361721fffe3SKacheong Poon socklen_t *addrlenp, cred_t *cr) 362721fffe3SKacheong Poon { 363721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 364721fffe3SKacheong Poon 365721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 366721fffe3SKacheong Poon ASSERT(cr != NULL); 367721fffe3SKacheong Poon 368721fffe3SKacheong Poon return (conn_getsockname(connp, addr, addrlenp)); 369721fffe3SKacheong Poon } 370721fffe3SKacheong Poon 371721fffe3SKacheong Poon /* returns UNIX error, the optlen is a value-result arg */ 372721fffe3SKacheong Poon static int 373721fffe3SKacheong Poon tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 374721fffe3SKacheong Poon void *optvalp, socklen_t *optlen, cred_t *cr) 375721fffe3SKacheong Poon { 376721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 377721fffe3SKacheong Poon int error; 378721fffe3SKacheong Poon t_uscalar_t max_optbuf_len; 379721fffe3SKacheong Poon void *optvalp_buf; 380721fffe3SKacheong Poon int len; 381721fffe3SKacheong Poon 382721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 383721fffe3SKacheong Poon 384721fffe3SKacheong Poon error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 385721fffe3SKacheong Poon tcp_opt_obj.odb_opt_des_arr, 386721fffe3SKacheong Poon tcp_opt_obj.odb_opt_arr_cnt, 387721fffe3SKacheong Poon B_FALSE, B_TRUE, cr); 388721fffe3SKacheong Poon if (error != 0) { 389721fffe3SKacheong Poon if (error < 0) { 390721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 391721fffe3SKacheong Poon } 392721fffe3SKacheong Poon return (error); 393721fffe3SKacheong Poon } 394721fffe3SKacheong Poon 395721fffe3SKacheong Poon optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 396721fffe3SKacheong Poon 3979ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 398721fffe3SKacheong Poon if (error == ENOMEM) { 399721fffe3SKacheong Poon kmem_free(optvalp_buf, max_optbuf_len); 400721fffe3SKacheong Poon return (ENOMEM); 401721fffe3SKacheong Poon } 402721fffe3SKacheong Poon 403721fffe3SKacheong Poon len = tcp_opt_get(connp, level, option_name, optvalp_buf); 4049ee3959aSAnders Persson squeue_synch_exit(connp); 405721fffe3SKacheong Poon 406721fffe3SKacheong Poon if (len == -1) { 407721fffe3SKacheong Poon kmem_free(optvalp_buf, max_optbuf_len); 408721fffe3SKacheong Poon return (EINVAL); 409721fffe3SKacheong Poon } 410721fffe3SKacheong Poon 411721fffe3SKacheong Poon /* 412721fffe3SKacheong Poon * update optlen and copy option value 413721fffe3SKacheong Poon */ 414721fffe3SKacheong Poon t_uscalar_t size = MIN(len, *optlen); 415721fffe3SKacheong Poon 416721fffe3SKacheong Poon bcopy(optvalp_buf, optvalp, size); 417721fffe3SKacheong Poon bcopy(&size, optlen, sizeof (size)); 418721fffe3SKacheong Poon 419721fffe3SKacheong Poon kmem_free(optvalp_buf, max_optbuf_len); 420721fffe3SKacheong Poon return (0); 421721fffe3SKacheong Poon } 422721fffe3SKacheong Poon 423721fffe3SKacheong Poon static int 424721fffe3SKacheong Poon tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 425721fffe3SKacheong Poon const void *optvalp, socklen_t optlen, cred_t *cr) 426721fffe3SKacheong Poon { 427721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 428721fffe3SKacheong Poon int error; 429721fffe3SKacheong Poon 430721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 431721fffe3SKacheong Poon /* 432721fffe3SKacheong Poon * Entering the squeue synchronously can result in a context switch, 433721fffe3SKacheong Poon * which can cause a rather sever performance degradation. So we try to 434721fffe3SKacheong Poon * handle whatever options we can without entering the squeue. 435721fffe3SKacheong Poon */ 436721fffe3SKacheong Poon if (level == IPPROTO_TCP) { 437721fffe3SKacheong Poon switch (option_name) { 438721fffe3SKacheong Poon case TCP_NODELAY: 439721fffe3SKacheong Poon if (optlen != sizeof (int32_t)) 440721fffe3SKacheong Poon return (EINVAL); 441721fffe3SKacheong Poon mutex_enter(&connp->conn_tcp->tcp_non_sq_lock); 442721fffe3SKacheong Poon connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 : 443721fffe3SKacheong Poon connp->conn_tcp->tcp_mss; 444721fffe3SKacheong Poon mutex_exit(&connp->conn_tcp->tcp_non_sq_lock); 445721fffe3SKacheong Poon return (0); 446721fffe3SKacheong Poon default: 447721fffe3SKacheong Poon break; 448721fffe3SKacheong Poon } 449721fffe3SKacheong Poon } 450721fffe3SKacheong Poon 4519ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 452721fffe3SKacheong Poon if (error == ENOMEM) { 453721fffe3SKacheong Poon return (ENOMEM); 454721fffe3SKacheong Poon } 455721fffe3SKacheong Poon 456721fffe3SKacheong Poon error = proto_opt_check(level, option_name, optlen, NULL, 457721fffe3SKacheong Poon tcp_opt_obj.odb_opt_des_arr, 458721fffe3SKacheong Poon tcp_opt_obj.odb_opt_arr_cnt, 459721fffe3SKacheong Poon B_TRUE, B_FALSE, cr); 460721fffe3SKacheong Poon 461721fffe3SKacheong Poon if (error != 0) { 462721fffe3SKacheong Poon if (error < 0) { 463721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 464721fffe3SKacheong Poon } 4659ee3959aSAnders Persson squeue_synch_exit(connp); 466721fffe3SKacheong Poon return (error); 467721fffe3SKacheong Poon } 468721fffe3SKacheong Poon 469721fffe3SKacheong Poon error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 470721fffe3SKacheong Poon optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 471721fffe3SKacheong Poon NULL, cr); 4729ee3959aSAnders Persson squeue_synch_exit(connp); 473721fffe3SKacheong Poon 474721fffe3SKacheong Poon ASSERT(error >= 0); 475721fffe3SKacheong Poon 476721fffe3SKacheong Poon return (error); 477721fffe3SKacheong Poon } 478721fffe3SKacheong Poon 479721fffe3SKacheong Poon /* ARGSUSED */ 480721fffe3SKacheong Poon static int 481721fffe3SKacheong Poon tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 482721fffe3SKacheong Poon cred_t *cr) 483721fffe3SKacheong Poon { 484721fffe3SKacheong Poon tcp_t *tcp; 485721fffe3SKacheong Poon uint32_t msize; 486721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 487721fffe3SKacheong Poon int32_t tcpstate; 488721fffe3SKacheong Poon 489721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 490721fffe3SKacheong Poon ASSERT(cr != NULL); 491721fffe3SKacheong Poon 492721fffe3SKacheong Poon ASSERT(connp->conn_ref >= 2); 493721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 494721fffe3SKacheong Poon 495721fffe3SKacheong Poon if (msg->msg_controllen != 0) { 496721fffe3SKacheong Poon freemsg(mp); 497721fffe3SKacheong Poon return (EOPNOTSUPP); 498721fffe3SKacheong Poon } 499721fffe3SKacheong Poon 500721fffe3SKacheong Poon switch (DB_TYPE(mp)) { 501721fffe3SKacheong Poon case M_DATA: 502721fffe3SKacheong Poon tcp = connp->conn_tcp; 503721fffe3SKacheong Poon ASSERT(tcp != NULL); 504721fffe3SKacheong Poon 505721fffe3SKacheong Poon tcpstate = tcp->tcp_state; 506721fffe3SKacheong Poon if (tcpstate < TCPS_ESTABLISHED) { 507721fffe3SKacheong Poon freemsg(mp); 508721fffe3SKacheong Poon /* 509721fffe3SKacheong Poon * We return ENOTCONN if the endpoint is trying to 510721fffe3SKacheong Poon * connect or has never been connected, and EPIPE if it 511721fffe3SKacheong Poon * has been disconnected. The connection id helps us 512721fffe3SKacheong Poon * distinguish between the last two cases. 513721fffe3SKacheong Poon */ 514721fffe3SKacheong Poon return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN : 515721fffe3SKacheong Poon ((tcp->tcp_connid > 0) ? EPIPE : ENOTCONN)); 516721fffe3SKacheong Poon } else if (tcpstate > TCPS_CLOSE_WAIT) { 517721fffe3SKacheong Poon freemsg(mp); 518721fffe3SKacheong Poon return (EPIPE); 519721fffe3SKacheong Poon } 520721fffe3SKacheong Poon 521721fffe3SKacheong Poon msize = msgdsize(mp); 522721fffe3SKacheong Poon 523721fffe3SKacheong Poon mutex_enter(&tcp->tcp_non_sq_lock); 524721fffe3SKacheong Poon tcp->tcp_squeue_bytes += msize; 525721fffe3SKacheong Poon /* 526721fffe3SKacheong Poon * Squeue Flow Control 527721fffe3SKacheong Poon */ 528721fffe3SKacheong Poon if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) { 529721fffe3SKacheong Poon tcp_setqfull(tcp); 530721fffe3SKacheong Poon } 531721fffe3SKacheong Poon mutex_exit(&tcp->tcp_non_sq_lock); 532721fffe3SKacheong Poon 533721fffe3SKacheong Poon /* 534721fffe3SKacheong Poon * The application may pass in an address in the msghdr, but 535721fffe3SKacheong Poon * we ignore the address on connection-oriented sockets. 536721fffe3SKacheong Poon * Just like BSD this code does not generate an error for 537721fffe3SKacheong Poon * TCP (a CONNREQUIRED socket) when sending to an address 538721fffe3SKacheong Poon * passed in with sendto/sendmsg. Instead the data is 539721fffe3SKacheong Poon * delivered on the connection as if no address had been 540721fffe3SKacheong Poon * supplied. 541721fffe3SKacheong Poon */ 542721fffe3SKacheong Poon CONN_INC_REF(connp); 543721fffe3SKacheong Poon 544721fffe3SKacheong Poon if (msg->msg_flags & MSG_OOB) { 545721fffe3SKacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent, 546721fffe3SKacheong Poon connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 547721fffe3SKacheong Poon } else { 548721fffe3SKacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output, 549721fffe3SKacheong Poon connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 550721fffe3SKacheong Poon } 551721fffe3SKacheong Poon 552721fffe3SKacheong Poon return (0); 553721fffe3SKacheong Poon 554721fffe3SKacheong Poon default: 555721fffe3SKacheong Poon ASSERT(0); 556721fffe3SKacheong Poon } 557721fffe3SKacheong Poon 558721fffe3SKacheong Poon freemsg(mp); 559721fffe3SKacheong Poon return (0); 560721fffe3SKacheong Poon } 561721fffe3SKacheong Poon 562721fffe3SKacheong Poon /* ARGSUSED */ 563721fffe3SKacheong Poon static int 564721fffe3SKacheong Poon tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 565721fffe3SKacheong Poon { 566721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 567721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 568721fffe3SKacheong Poon 569721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 570721fffe3SKacheong Poon 571721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 572721fffe3SKacheong Poon ASSERT(cr != NULL); 573721fffe3SKacheong Poon 574721fffe3SKacheong Poon /* 575721fffe3SKacheong Poon * X/Open requires that we check the connected state. 576721fffe3SKacheong Poon */ 577721fffe3SKacheong Poon if (tcp->tcp_state < TCPS_SYN_SENT) 578721fffe3SKacheong Poon return (ENOTCONN); 579721fffe3SKacheong Poon 580721fffe3SKacheong Poon /* shutdown the send side */ 581721fffe3SKacheong Poon if (how != SHUT_RD) { 582721fffe3SKacheong Poon mblk_t *bp; 583721fffe3SKacheong Poon 584721fffe3SKacheong Poon bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL); 585721fffe3SKacheong Poon CONN_INC_REF(connp); 586721fffe3SKacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output, 587721fffe3SKacheong Poon connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT); 588721fffe3SKacheong Poon 589721fffe3SKacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 590721fffe3SKacheong Poon SOCK_OPCTL_SHUT_SEND, 0); 591721fffe3SKacheong Poon } 592721fffe3SKacheong Poon 593721fffe3SKacheong Poon /* shutdown the recv side */ 594721fffe3SKacheong Poon if (how != SHUT_WR) 595721fffe3SKacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 596721fffe3SKacheong Poon SOCK_OPCTL_SHUT_RECV, 0); 597721fffe3SKacheong Poon 598721fffe3SKacheong Poon return (0); 599721fffe3SKacheong Poon } 600721fffe3SKacheong Poon 601721fffe3SKacheong Poon static void 602721fffe3SKacheong Poon tcp_clr_flowctrl(sock_lower_handle_t proto_handle) 603721fffe3SKacheong Poon { 604721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 605721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 606721fffe3SKacheong Poon mblk_t *mp; 607721fffe3SKacheong Poon int error; 608721fffe3SKacheong Poon 609721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 610721fffe3SKacheong Poon 611721fffe3SKacheong Poon /* 612721fffe3SKacheong Poon * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl() 613721fffe3SKacheong Poon * is currently running. 614721fffe3SKacheong Poon */ 615721fffe3SKacheong Poon mutex_enter(&tcp->tcp_rsrv_mp_lock); 616721fffe3SKacheong Poon if ((mp = tcp->tcp_rsrv_mp) == NULL) { 617721fffe3SKacheong Poon mutex_exit(&tcp->tcp_rsrv_mp_lock); 618721fffe3SKacheong Poon return; 619721fffe3SKacheong Poon } 620721fffe3SKacheong Poon tcp->tcp_rsrv_mp = NULL; 621721fffe3SKacheong Poon mutex_exit(&tcp->tcp_rsrv_mp_lock); 622721fffe3SKacheong Poon 6239ee3959aSAnders Persson error = squeue_synch_enter(connp, mp); 624721fffe3SKacheong Poon ASSERT(error == 0); 625721fffe3SKacheong Poon 626721fffe3SKacheong Poon mutex_enter(&tcp->tcp_rsrv_mp_lock); 627721fffe3SKacheong Poon tcp->tcp_rsrv_mp = mp; 628721fffe3SKacheong Poon mutex_exit(&tcp->tcp_rsrv_mp_lock); 629721fffe3SKacheong Poon 630721fffe3SKacheong Poon if (tcp->tcp_fused) { 631721fffe3SKacheong Poon tcp_fuse_backenable(tcp); 632721fffe3SKacheong Poon } else { 633721fffe3SKacheong Poon tcp->tcp_rwnd = connp->conn_rcvbuf; 634721fffe3SKacheong Poon /* 635721fffe3SKacheong Poon * Send back a window update immediately if TCP is above 636721fffe3SKacheong Poon * ESTABLISHED state and the increase of the rcv window 637721fffe3SKacheong Poon * that the other side knows is at least 1 MSS after flow 638721fffe3SKacheong Poon * control is lifted. 639721fffe3SKacheong Poon */ 640721fffe3SKacheong Poon if (tcp->tcp_state >= TCPS_ESTABLISHED && 641721fffe3SKacheong Poon tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) { 642721fffe3SKacheong Poon tcp_xmit_ctl(NULL, tcp, 643721fffe3SKacheong Poon (tcp->tcp_swnd == 0) ? tcp->tcp_suna : 644721fffe3SKacheong Poon tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); 645721fffe3SKacheong Poon } 646721fffe3SKacheong Poon } 647721fffe3SKacheong Poon 6489ee3959aSAnders Persson squeue_synch_exit(connp); 649721fffe3SKacheong Poon } 650721fffe3SKacheong Poon 651721fffe3SKacheong Poon /* ARGSUSED */ 652721fffe3SKacheong Poon static int 653721fffe3SKacheong Poon tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 654721fffe3SKacheong Poon int mode, int32_t *rvalp, cred_t *cr) 655721fffe3SKacheong Poon { 656721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 657721fffe3SKacheong Poon int error; 658721fffe3SKacheong Poon 659721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 660721fffe3SKacheong Poon 661721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 662721fffe3SKacheong Poon ASSERT(cr != NULL); 663721fffe3SKacheong Poon 664721fffe3SKacheong Poon /* 665721fffe3SKacheong Poon * If we don't have a helper stream then create one. 666721fffe3SKacheong Poon * ip_create_helper_stream takes care of locking the conn_t, 667721fffe3SKacheong Poon * so this check for NULL is just a performance optimization. 668721fffe3SKacheong Poon */ 669721fffe3SKacheong Poon if (connp->conn_helper_info == NULL) { 670721fffe3SKacheong Poon tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps; 671721fffe3SKacheong Poon 672721fffe3SKacheong Poon /* 673721fffe3SKacheong Poon * Create a helper stream for non-STREAMS socket. 674721fffe3SKacheong Poon */ 675721fffe3SKacheong Poon error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident); 676721fffe3SKacheong Poon if (error != 0) { 677721fffe3SKacheong Poon ip0dbg(("tcp_ioctl: create of IP helper stream " 678721fffe3SKacheong Poon "failed %d\n", error)); 679721fffe3SKacheong Poon return (error); 680721fffe3SKacheong Poon } 681721fffe3SKacheong Poon } 682721fffe3SKacheong Poon 683721fffe3SKacheong Poon switch (cmd) { 684721fffe3SKacheong Poon case ND_SET: 685721fffe3SKacheong Poon case ND_GET: 686721fffe3SKacheong Poon case _SIOCSOCKFALLBACK: 687721fffe3SKacheong Poon case TCP_IOC_ABORT_CONN: 688721fffe3SKacheong Poon case TI_GETPEERNAME: 689721fffe3SKacheong Poon case TI_GETMYNAME: 690721fffe3SKacheong Poon ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket", 691721fffe3SKacheong Poon cmd)); 692721fffe3SKacheong Poon error = EINVAL; 693721fffe3SKacheong Poon break; 694721fffe3SKacheong Poon default: 695721fffe3SKacheong Poon /* 696721fffe3SKacheong Poon * If the conn is not closing, pass on to IP using 697721fffe3SKacheong Poon * helper stream. Bump the ioctlref to prevent tcp_close 698721fffe3SKacheong Poon * from closing the rq/wq out from underneath the ioctl 699721fffe3SKacheong Poon * if it ends up queued or aborted/interrupted. 700721fffe3SKacheong Poon */ 701721fffe3SKacheong Poon mutex_enter(&connp->conn_lock); 702721fffe3SKacheong Poon if (connp->conn_state_flags & (CONN_CLOSING)) { 703721fffe3SKacheong Poon mutex_exit(&connp->conn_lock); 704721fffe3SKacheong Poon error = EINVAL; 705721fffe3SKacheong Poon break; 706721fffe3SKacheong Poon } 707721fffe3SKacheong Poon CONN_INC_IOCTLREF_LOCKED(connp); 708721fffe3SKacheong Poon error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 709721fffe3SKacheong Poon cmd, arg, mode, cr, rvalp); 710721fffe3SKacheong Poon CONN_DEC_IOCTLREF(connp); 711721fffe3SKacheong Poon break; 712721fffe3SKacheong Poon } 713721fffe3SKacheong Poon return (error); 714721fffe3SKacheong Poon } 715721fffe3SKacheong Poon 716721fffe3SKacheong Poon /* ARGSUSED */ 717721fffe3SKacheong Poon static int 718721fffe3SKacheong Poon tcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 719721fffe3SKacheong Poon { 720721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 721721fffe3SKacheong Poon 722721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 723721fffe3SKacheong Poon 724721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 725721fffe3SKacheong Poon ASSERT(cr != NULL); 726721fffe3SKacheong Poon 727721fffe3SKacheong Poon tcp_close_common(connp, flags); 728721fffe3SKacheong Poon 729721fffe3SKacheong Poon ip_free_helper_stream(connp); 730721fffe3SKacheong Poon 731721fffe3SKacheong Poon /* 732721fffe3SKacheong Poon * Drop IP's reference on the conn. This is the last reference 733721fffe3SKacheong Poon * on the connp if the state was less than established. If the 734721fffe3SKacheong Poon * connection has gone into timewait state, then we will have 735721fffe3SKacheong Poon * one ref for the TCP and one more ref (total of two) for the 736721fffe3SKacheong Poon * classifier connected hash list (a timewait connections stays 737721fffe3SKacheong Poon * in connected hash till closed). 738721fffe3SKacheong Poon * 739721fffe3SKacheong Poon * We can't assert the references because there might be other 740721fffe3SKacheong Poon * transient reference places because of some walkers or queued 741721fffe3SKacheong Poon * packets in squeue for the timewait state. 742721fffe3SKacheong Poon */ 743721fffe3SKacheong Poon CONN_DEC_REF(connp); 7443e95bd4aSAnders Persson 7453e95bd4aSAnders Persson /* 7463e95bd4aSAnders Persson * EINPROGRESS tells sockfs to wait for a 'closed' upcall before 7473e95bd4aSAnders Persson * freeing the socket. 7483e95bd4aSAnders Persson */ 7493e95bd4aSAnders Persson return (EINPROGRESS); 750721fffe3SKacheong Poon } 751721fffe3SKacheong Poon 752721fffe3SKacheong Poon /* ARGSUSED */ 753721fffe3SKacheong Poon sock_lower_handle_t 754721fffe3SKacheong Poon tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 755721fffe3SKacheong Poon uint_t *smodep, int *errorp, int flags, cred_t *credp) 756721fffe3SKacheong Poon { 757721fffe3SKacheong Poon conn_t *connp; 758721fffe3SKacheong Poon boolean_t isv6 = family == AF_INET6; 759*ca3c8f41SDavid Höppner 760721fffe3SKacheong Poon if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) || 761721fffe3SKacheong Poon (proto != 0 && proto != IPPROTO_TCP)) { 762721fffe3SKacheong Poon *errorp = EPROTONOSUPPORT; 763721fffe3SKacheong Poon return (NULL); 764721fffe3SKacheong Poon } 765721fffe3SKacheong Poon 766721fffe3SKacheong Poon connp = tcp_create_common(credp, isv6, B_TRUE, errorp); 767721fffe3SKacheong Poon if (connp == NULL) { 768721fffe3SKacheong Poon return (NULL); 769721fffe3SKacheong Poon } 770721fffe3SKacheong Poon 771721fffe3SKacheong Poon /* 772721fffe3SKacheong Poon * Put the ref for TCP. Ref for IP was already put 773*ca3c8f41SDavid Höppner * by ipcl_conn_create. Also make the conn_t globally 774721fffe3SKacheong Poon * visible to walkers 775721fffe3SKacheong Poon */ 776721fffe3SKacheong Poon mutex_enter(&connp->conn_lock); 777721fffe3SKacheong Poon CONN_INC_REF_LOCKED(connp); 778721fffe3SKacheong Poon ASSERT(connp->conn_ref == 2); 779721fffe3SKacheong Poon connp->conn_state_flags &= ~CONN_INCIPIENT; 780721fffe3SKacheong Poon 781721fffe3SKacheong Poon connp->conn_flags |= IPCL_NONSTR; 782721fffe3SKacheong Poon mutex_exit(&connp->conn_lock); 783721fffe3SKacheong Poon 784721fffe3SKacheong Poon ASSERT(errorp != NULL); 785721fffe3SKacheong Poon *errorp = 0; 786721fffe3SKacheong Poon *sock_downcalls = &sock_tcp_downcalls; 787721fffe3SKacheong Poon *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP | 788721fffe3SKacheong Poon SM_SENDFILESUPP; 789721fffe3SKacheong Poon 790721fffe3SKacheong Poon return ((sock_lower_handle_t)connp); 791721fffe3SKacheong Poon } 792721fffe3SKacheong Poon 7933e95bd4aSAnders Persson /* 7943e95bd4aSAnders Persson * tcp_fallback 7953e95bd4aSAnders Persson * 7963e95bd4aSAnders Persson * A direct socket is falling back to using STREAMS. The queue 7973e95bd4aSAnders Persson * that is being passed down was created using tcp_open() with 7983e95bd4aSAnders Persson * the SO_FALLBACK flag set. As a result, the queue is not 7993e95bd4aSAnders Persson * associated with a conn, and the q_ptrs instead contain the 8003e95bd4aSAnders Persson * dev and minor area that should be used. 8013e95bd4aSAnders Persson * 8023e95bd4aSAnders Persson * The 'issocket' flag indicates whether the FireEngine 8033e95bd4aSAnders Persson * optimizations should be used. The common case would be that 8043e95bd4aSAnders Persson * optimizations are enabled, and they might be subsequently 8053e95bd4aSAnders Persson * disabled using the _SIOCSOCKFALLBACK ioctl. 8063e95bd4aSAnders Persson */ 8073e95bd4aSAnders Persson 8083e95bd4aSAnders Persson /* 8093e95bd4aSAnders Persson * An active connection is falling back to TPI. Gather all the information 8103e95bd4aSAnders Persson * required by the STREAM head and TPI sonode and send it up. 8113e95bd4aSAnders Persson */ 8123e95bd4aSAnders Persson static void 8133e95bd4aSAnders Persson tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q, 8143e95bd4aSAnders Persson boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 8153e95bd4aSAnders Persson sock_quiesce_arg_t *arg) 8163e95bd4aSAnders Persson { 8173e95bd4aSAnders Persson conn_t *connp = tcp->tcp_connp; 8183e95bd4aSAnders Persson struct stroptions *stropt; 8193e95bd4aSAnders Persson struct T_capability_ack tca; 8203e95bd4aSAnders Persson struct sockaddr_in6 laddr, faddr; 8213e95bd4aSAnders Persson socklen_t laddrlen, faddrlen; 8223e95bd4aSAnders Persson short opts; 8233e95bd4aSAnders Persson int error; 8243e95bd4aSAnders Persson mblk_t *mp, *mpnext; 8253e95bd4aSAnders Persson 8263e95bd4aSAnders Persson connp->conn_dev = (dev_t)RD(q)->q_ptr; 8273e95bd4aSAnders Persson connp->conn_minor_arena = WR(q)->q_ptr; 8283e95bd4aSAnders Persson 8293e95bd4aSAnders Persson RD(q)->q_ptr = WR(q)->q_ptr = connp; 8303e95bd4aSAnders Persson 8313e95bd4aSAnders Persson connp->conn_rq = RD(q); 8323e95bd4aSAnders Persson connp->conn_wq = WR(q); 8333e95bd4aSAnders Persson 8343e95bd4aSAnders Persson WR(q)->q_qinfo = &tcp_sock_winit; 8353e95bd4aSAnders Persson 8363e95bd4aSAnders Persson if (!issocket) 8373e95bd4aSAnders Persson tcp_use_pure_tpi(tcp); 8383e95bd4aSAnders Persson 8393e95bd4aSAnders Persson /* 8403e95bd4aSAnders Persson * free the helper stream 8413e95bd4aSAnders Persson */ 8423e95bd4aSAnders Persson ip_free_helper_stream(connp); 8433e95bd4aSAnders Persson 8443e95bd4aSAnders Persson /* 8453e95bd4aSAnders Persson * Notify the STREAM head about options 8463e95bd4aSAnders Persson */ 8473e95bd4aSAnders Persson DB_TYPE(stropt_mp) = M_SETOPTS; 8483e95bd4aSAnders Persson stropt = (struct stroptions *)stropt_mp->b_rptr; 8493e95bd4aSAnders Persson stropt_mp->b_wptr += sizeof (struct stroptions); 8503e95bd4aSAnders Persson stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK; 8513e95bd4aSAnders Persson 8523e95bd4aSAnders Persson stropt->so_wroff = connp->conn_ht_iphc_len + (tcp->tcp_loopback ? 0 : 8533e95bd4aSAnders Persson tcp->tcp_tcps->tcps_wroff_xtra); 8543e95bd4aSAnders Persson if (tcp->tcp_snd_sack_ok) 8553e95bd4aSAnders Persson stropt->so_wroff += TCPOPT_MAX_SACK_LEN; 8563e95bd4aSAnders Persson stropt->so_hiwat = connp->conn_rcvbuf; 8573e95bd4aSAnders Persson stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE); 8583e95bd4aSAnders Persson 8593e95bd4aSAnders Persson putnext(RD(q), stropt_mp); 8603e95bd4aSAnders Persson 8613e95bd4aSAnders Persson /* 8623e95bd4aSAnders Persson * Collect the information needed to sync with the sonode 8633e95bd4aSAnders Persson */ 8643e95bd4aSAnders Persson tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID); 8653e95bd4aSAnders Persson 8663e95bd4aSAnders Persson laddrlen = faddrlen = sizeof (sin6_t); 8673e95bd4aSAnders Persson (void) tcp_getsockname((sock_lower_handle_t)connp, 8683e95bd4aSAnders Persson (struct sockaddr *)&laddr, &laddrlen, CRED()); 8693e95bd4aSAnders Persson error = tcp_getpeername((sock_lower_handle_t)connp, 8703e95bd4aSAnders Persson (struct sockaddr *)&faddr, &faddrlen, CRED()); 8713e95bd4aSAnders Persson if (error != 0) 8723e95bd4aSAnders Persson faddrlen = 0; 8733e95bd4aSAnders Persson 8743e95bd4aSAnders Persson opts = 0; 8753e95bd4aSAnders Persson if (connp->conn_oobinline) 8763e95bd4aSAnders Persson opts |= SO_OOBINLINE; 8773e95bd4aSAnders Persson if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 8783e95bd4aSAnders Persson opts |= SO_DONTROUTE; 8793e95bd4aSAnders Persson 8803e95bd4aSAnders Persson /* 8813e95bd4aSAnders Persson * Notify the socket that the protocol is now quiescent, 8823e95bd4aSAnders Persson * and it's therefore safe move data from the socket 8833e95bd4aSAnders Persson * to the stream head. 8843e95bd4aSAnders Persson */ 8853e95bd4aSAnders Persson mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 8863e95bd4aSAnders Persson (struct sockaddr *)&laddr, laddrlen, 8873e95bd4aSAnders Persson (struct sockaddr *)&faddr, faddrlen, opts); 8883e95bd4aSAnders Persson 8893e95bd4aSAnders Persson while (mp != NULL) { 8903e95bd4aSAnders Persson mpnext = mp->b_next; 8913e95bd4aSAnders Persson tcp->tcp_rcv_list = mp->b_next; 8923e95bd4aSAnders Persson mp->b_next = NULL; 8933e95bd4aSAnders Persson putnext(q, mp); 8943e95bd4aSAnders Persson mp = mpnext; 8953e95bd4aSAnders Persson } 8963e95bd4aSAnders Persson ASSERT(tcp->tcp_rcv_last_head == NULL); 8973e95bd4aSAnders Persson ASSERT(tcp->tcp_rcv_last_tail == NULL); 8983e95bd4aSAnders Persson ASSERT(tcp->tcp_rcv_cnt == 0); 8993e95bd4aSAnders Persson 9003e95bd4aSAnders Persson /* 9013e95bd4aSAnders Persson * All eagers in q0 are marked as being non-STREAM, so they will 9023e95bd4aSAnders Persson * make su_newconn upcalls when the handshake completes, which 9033e95bd4aSAnders Persson * will fail (resulting in the conn being closed). So we just blow 9043e95bd4aSAnders Persson * off everything in q0 instead of waiting for the inevitable. 9053e95bd4aSAnders Persson */ 9063e95bd4aSAnders Persson if (tcp->tcp_conn_req_cnt_q0 != 0) 9073e95bd4aSAnders Persson tcp_eager_cleanup(tcp, B_TRUE); 9083e95bd4aSAnders Persson } 9093e95bd4aSAnders Persson 9103e95bd4aSAnders Persson /* 9113e95bd4aSAnders Persson * An eager is falling back to TPI. All we have to do is send 9123e95bd4aSAnders Persson * up a T_CONN_IND. 9133e95bd4aSAnders Persson */ 9143e95bd4aSAnders Persson static void 9153e95bd4aSAnders Persson tcp_fallback_eager(tcp_t *eager, boolean_t issocket, 9163e95bd4aSAnders Persson so_proto_quiesced_cb_t quiesced_cb, sock_quiesce_arg_t *arg) 9173e95bd4aSAnders Persson { 9183e95bd4aSAnders Persson conn_t *connp = eager->tcp_connp; 9193e95bd4aSAnders Persson tcp_t *listener = eager->tcp_listener; 9203e95bd4aSAnders Persson mblk_t *mp; 9213e95bd4aSAnders Persson 9223e95bd4aSAnders Persson ASSERT(listener != NULL); 9233e95bd4aSAnders Persson 9243e95bd4aSAnders Persson /* 9253e95bd4aSAnders Persson * Notify the socket that the protocol is now quiescent, 9263e95bd4aSAnders Persson * and it's therefore safe move data from the socket 9273e95bd4aSAnders Persson * to tcp's rcv queue. 9283e95bd4aSAnders Persson */ 9293e95bd4aSAnders Persson mp = (*quiesced_cb)(connp->conn_upper_handle, arg, NULL, NULL, 0, 9303e95bd4aSAnders Persson NULL, 0, 0); 9313e95bd4aSAnders Persson 9323e95bd4aSAnders Persson if (mp != NULL) { 9333e95bd4aSAnders Persson ASSERT(eager->tcp_rcv_cnt == 0); 9343e95bd4aSAnders Persson 9353e95bd4aSAnders Persson eager->tcp_rcv_list = mp; 9363e95bd4aSAnders Persson eager->tcp_rcv_cnt = msgdsize(mp); 9373e95bd4aSAnders Persson while (mp->b_next != NULL) { 9383e95bd4aSAnders Persson mp = mp->b_next; 9393e95bd4aSAnders Persson eager->tcp_rcv_cnt += msgdsize(mp); 9403e95bd4aSAnders Persson } 9413e95bd4aSAnders Persson eager->tcp_rcv_last_head = mp; 9423e95bd4aSAnders Persson while (mp->b_cont) 9433e95bd4aSAnders Persson mp = mp->b_cont; 9443e95bd4aSAnders Persson eager->tcp_rcv_last_tail = mp; 9453e95bd4aSAnders Persson if (eager->tcp_rcv_cnt > eager->tcp_rwnd) 9463e95bd4aSAnders Persson eager->tcp_rwnd = 0; 9473e95bd4aSAnders Persson else 9483e95bd4aSAnders Persson eager->tcp_rwnd -= eager->tcp_rcv_cnt; 9493e95bd4aSAnders Persson } 9503e95bd4aSAnders Persson 9513e95bd4aSAnders Persson if (!issocket) 9523e95bd4aSAnders Persson eager->tcp_issocket = B_FALSE; 9533e95bd4aSAnders Persson /* 9543e95bd4aSAnders Persson * The stream for this eager does not yet exist, so mark it as 9553e95bd4aSAnders Persson * being detached. 9563e95bd4aSAnders Persson */ 9573e95bd4aSAnders Persson eager->tcp_detached = B_TRUE; 9583e95bd4aSAnders Persson eager->tcp_hard_binding = B_TRUE; 9593e95bd4aSAnders Persson connp->conn_rq = listener->tcp_connp->conn_rq; 9603e95bd4aSAnders Persson connp->conn_wq = listener->tcp_connp->conn_wq; 9613e95bd4aSAnders Persson 9623e95bd4aSAnders Persson /* Send up the connection indication */ 9633e95bd4aSAnders Persson mp = eager->tcp_conn.tcp_eager_conn_ind; 9643e95bd4aSAnders Persson ASSERT(mp != NULL); 9653e95bd4aSAnders Persson eager->tcp_conn.tcp_eager_conn_ind = NULL; 9663e95bd4aSAnders Persson 9673e95bd4aSAnders Persson /* 9683e95bd4aSAnders Persson * TLI/XTI applications will get confused by 9693e95bd4aSAnders Persson * sending eager as an option since it violates 9703e95bd4aSAnders Persson * the option semantics. So remove the eager as 9713e95bd4aSAnders Persson * option since TLI/XTI app doesn't need it anyway. 9723e95bd4aSAnders Persson */ 9733e95bd4aSAnders Persson if (!issocket) { 9743e95bd4aSAnders Persson struct T_conn_ind *conn_ind; 9753e95bd4aSAnders Persson 9763e95bd4aSAnders Persson conn_ind = (struct T_conn_ind *)mp->b_rptr; 9773e95bd4aSAnders Persson conn_ind->OPT_length = 0; 9783e95bd4aSAnders Persson conn_ind->OPT_offset = 0; 9793e95bd4aSAnders Persson } 9803e95bd4aSAnders Persson 9813e95bd4aSAnders Persson /* 9823e95bd4aSAnders Persson * Sockfs guarantees that the listener will not be closed 9833e95bd4aSAnders Persson * during fallback. So we can safely use the listener's queue. 9843e95bd4aSAnders Persson */ 9853e95bd4aSAnders Persson putnext(listener->tcp_connp->conn_rq, mp); 9863e95bd4aSAnders Persson } 9873e95bd4aSAnders Persson 9883e95bd4aSAnders Persson 989721fffe3SKacheong Poon int 990721fffe3SKacheong Poon tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9913e95bd4aSAnders Persson boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb, 9923e95bd4aSAnders Persson sock_quiesce_arg_t *arg) 993721fffe3SKacheong Poon { 994721fffe3SKacheong Poon tcp_t *tcp; 995721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 996721fffe3SKacheong Poon int error; 997721fffe3SKacheong Poon mblk_t *stropt_mp; 998721fffe3SKacheong Poon mblk_t *ordrel_mp; 999721fffe3SKacheong Poon 1000721fffe3SKacheong Poon tcp = connp->conn_tcp; 1001721fffe3SKacheong Poon 1002721fffe3SKacheong Poon stropt_mp = allocb_wait(sizeof (struct stroptions), BPRI_HI, STR_NOSIG, 1003721fffe3SKacheong Poon NULL); 1004721fffe3SKacheong Poon 1005721fffe3SKacheong Poon /* Pre-allocate the T_ordrel_ind mblk. */ 1006721fffe3SKacheong Poon ASSERT(tcp->tcp_ordrel_mp == NULL); 1007721fffe3SKacheong Poon ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI, 1008721fffe3SKacheong Poon STR_NOSIG, NULL); 1009721fffe3SKacheong Poon ordrel_mp->b_datap->db_type = M_PROTO; 1010721fffe3SKacheong Poon ((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND; 1011721fffe3SKacheong Poon ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind); 1012721fffe3SKacheong Poon 1013721fffe3SKacheong Poon /* 1014721fffe3SKacheong Poon * Enter the squeue so that no new packets can come in 1015721fffe3SKacheong Poon */ 10169ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 1017721fffe3SKacheong Poon if (error != 0) { 1018721fffe3SKacheong Poon /* failed to enter, free all the pre-allocated messages. */ 1019721fffe3SKacheong Poon freeb(stropt_mp); 1020721fffe3SKacheong Poon freeb(ordrel_mp); 1021721fffe3SKacheong Poon return (ENOMEM); 1022721fffe3SKacheong Poon } 1023721fffe3SKacheong Poon 1024721fffe3SKacheong Poon /* 1025721fffe3SKacheong Poon * Both endpoints must be of the same type (either STREAMS or 1026721fffe3SKacheong Poon * non-STREAMS) for fusion to be enabled. So if we are fused, 1027721fffe3SKacheong Poon * we have to unfuse. 1028721fffe3SKacheong Poon */ 1029721fffe3SKacheong Poon if (tcp->tcp_fused) 1030721fffe3SKacheong Poon tcp_unfuse(tcp); 1031721fffe3SKacheong Poon 1032721fffe3SKacheong Poon if (tcp->tcp_listener != NULL) { 1033721fffe3SKacheong Poon /* The eager will deal with opts when accept() is called */ 1034721fffe3SKacheong Poon freeb(stropt_mp); 10353e95bd4aSAnders Persson tcp_fallback_eager(tcp, direct_sockfs, quiesced_cb, arg); 1036721fffe3SKacheong Poon } else { 1037721fffe3SKacheong Poon tcp_fallback_noneager(tcp, stropt_mp, q, direct_sockfs, 10383e95bd4aSAnders Persson quiesced_cb, arg); 1039721fffe3SKacheong Poon } 1040721fffe3SKacheong Poon 1041721fffe3SKacheong Poon /* 10423e95bd4aSAnders Persson * No longer a direct socket 10433e95bd4aSAnders Persson * 10443e95bd4aSAnders Persson * Note that we intentionally leave the upper_handle and upcalls 10453e95bd4aSAnders Persson * intact, since eagers may still be using them. 10463e95bd4aSAnders Persson */ 10473e95bd4aSAnders Persson connp->conn_flags &= ~IPCL_NONSTR; 10483e95bd4aSAnders Persson tcp->tcp_ordrel_mp = ordrel_mp; 10493e95bd4aSAnders Persson 10503e95bd4aSAnders Persson /* 1051721fffe3SKacheong Poon * There should be atleast two ref's (IP + TCP) 1052721fffe3SKacheong Poon */ 1053721fffe3SKacheong Poon ASSERT(connp->conn_ref >= 2); 10549ee3959aSAnders Persson squeue_synch_exit(connp); 1055721fffe3SKacheong Poon 1056721fffe3SKacheong Poon return (0); 1057721fffe3SKacheong Poon } 10583e95bd4aSAnders Persson 10593e95bd4aSAnders Persson /* 10603e95bd4aSAnders Persson * Notifies a non-STREAMS based listener about a new connection. This 10613e95bd4aSAnders Persson * function is executed on the *eager*'s squeue once the 3 way handshake 10623e95bd4aSAnders Persson * has completed. Note that the behavior differs from STREAMS, where the 1063e82bc0baSAnders Persson * T_CONN_IND is sent up by tcp_send_conn_ind() while on the *listener*'s 10643e95bd4aSAnders Persson * squeue. 10653e95bd4aSAnders Persson * 1066e82bc0baSAnders Persson * Returns B_TRUE if the notification succeeded and an upper handle was 1067e82bc0baSAnders Persson * obtained. `tcp' should be closed on failure. 10683e95bd4aSAnders Persson */ 10693e95bd4aSAnders Persson boolean_t 10703e95bd4aSAnders Persson tcp_newconn_notify(tcp_t *tcp, ip_recv_attr_t *ira) 10713e95bd4aSAnders Persson { 10723e95bd4aSAnders Persson tcp_t *listener = tcp->tcp_listener; 10733e95bd4aSAnders Persson conn_t *lconnp = listener->tcp_connp; 10743e95bd4aSAnders Persson conn_t *econnp = tcp->tcp_connp; 10753e95bd4aSAnders Persson tcp_t *tail; 10763e95bd4aSAnders Persson ipaddr_t *addr_cache; 10773e95bd4aSAnders Persson sock_upper_handle_t upper; 10783e95bd4aSAnders Persson struct sock_proto_props sopp; 10793e95bd4aSAnders Persson 10803e95bd4aSAnders Persson mutex_enter(&listener->tcp_eager_lock); 10813e95bd4aSAnders Persson /* 10823e95bd4aSAnders Persson * Take the eager out, if it is in the list of droppable eagers 10833e95bd4aSAnders Persson * as we are here because the 3W handshake is over. 10843e95bd4aSAnders Persson */ 10853e95bd4aSAnders Persson MAKE_UNDROPPABLE(tcp); 10863e95bd4aSAnders Persson /* 10873e95bd4aSAnders Persson * The eager already has an extra ref put in tcp_input_data 10883e95bd4aSAnders Persson * so that it stays till accept comes back even though it 10893e95bd4aSAnders Persson * might get into TCPS_CLOSED as a result of a TH_RST etc. 10903e95bd4aSAnders Persson */ 10913e95bd4aSAnders Persson ASSERT(listener->tcp_conn_req_cnt_q0 > 0); 10923e95bd4aSAnders Persson listener->tcp_conn_req_cnt_q0--; 10933e95bd4aSAnders Persson listener->tcp_conn_req_cnt_q++; 10943e95bd4aSAnders Persson 10953e95bd4aSAnders Persson /* Move from SYN_RCVD to ESTABLISHED list */ 10963e95bd4aSAnders Persson tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = tcp->tcp_eager_prev_q0; 10973e95bd4aSAnders Persson tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp->tcp_eager_next_q0; 10983e95bd4aSAnders Persson tcp->tcp_eager_prev_q0 = NULL; 10993e95bd4aSAnders Persson tcp->tcp_eager_next_q0 = NULL; 11003e95bd4aSAnders Persson 11013e95bd4aSAnders Persson /* 11023e95bd4aSAnders Persson * Insert at end of the queue because connections are accepted 11033e95bd4aSAnders Persson * in chronological order. Leaving the older connections at front 11043e95bd4aSAnders Persson * of the queue helps reducing search time. 11053e95bd4aSAnders Persson */ 11063e95bd4aSAnders Persson tail = listener->tcp_eager_last_q; 11073e95bd4aSAnders Persson if (tail != NULL) 11083e95bd4aSAnders Persson tail->tcp_eager_next_q = tcp; 11093e95bd4aSAnders Persson else 11103e95bd4aSAnders Persson listener->tcp_eager_next_q = tcp; 11113e95bd4aSAnders Persson listener->tcp_eager_last_q = tcp; 11123e95bd4aSAnders Persson tcp->tcp_eager_next_q = NULL; 11133e95bd4aSAnders Persson 11143e95bd4aSAnders Persson /* we have timed out before */ 11153e95bd4aSAnders Persson if (tcp->tcp_syn_rcvd_timeout != 0) { 11163e95bd4aSAnders Persson tcp->tcp_syn_rcvd_timeout = 0; 11173e95bd4aSAnders Persson listener->tcp_syn_rcvd_timeout--; 11183e95bd4aSAnders Persson if (listener->tcp_syn_defense && 11193e95bd4aSAnders Persson listener->tcp_syn_rcvd_timeout <= 11203e95bd4aSAnders Persson (listener->tcp_tcps->tcps_conn_req_max_q0 >> 5) && 11213e95bd4aSAnders Persson 10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() - 11223e95bd4aSAnders Persson listener->tcp_last_rcv_lbolt)) { 11233e95bd4aSAnders Persson /* 11243e95bd4aSAnders Persson * Turn off the defense mode if we 11253e95bd4aSAnders Persson * believe the SYN attack is over. 11263e95bd4aSAnders Persson */ 11273e95bd4aSAnders Persson listener->tcp_syn_defense = B_FALSE; 11283e95bd4aSAnders Persson if (listener->tcp_ip_addr_cache) { 11293e95bd4aSAnders Persson kmem_free((void *)listener->tcp_ip_addr_cache, 11303e95bd4aSAnders Persson IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t)); 11313e95bd4aSAnders Persson listener->tcp_ip_addr_cache = NULL; 11323e95bd4aSAnders Persson } 11333e95bd4aSAnders Persson } 11343e95bd4aSAnders Persson } 11353e95bd4aSAnders Persson addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache); 11363e95bd4aSAnders Persson if (addr_cache != NULL) { 11373e95bd4aSAnders Persson /* 11383e95bd4aSAnders Persson * We have finished a 3-way handshake with this 11393e95bd4aSAnders Persson * remote host. This proves the IP addr is good. 11403e95bd4aSAnders Persson * Cache it! 11413e95bd4aSAnders Persson */ 11423e95bd4aSAnders Persson addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] = 11433e95bd4aSAnders Persson tcp->tcp_connp->conn_faddr_v4; 11443e95bd4aSAnders Persson } 11453e95bd4aSAnders Persson mutex_exit(&listener->tcp_eager_lock); 11463e95bd4aSAnders Persson 11473e95bd4aSAnders Persson /* 11483e95bd4aSAnders Persson * Notify the ULP about the newconn. It is guaranteed that no 11493e95bd4aSAnders Persson * tcp_accept() call will be made for the eager if the 11503e95bd4aSAnders Persson * notification fails. 11513e95bd4aSAnders Persson */ 11523e95bd4aSAnders Persson if ((upper = (*lconnp->conn_upcalls->su_newconn) 11533e95bd4aSAnders Persson (lconnp->conn_upper_handle, (sock_lower_handle_t)econnp, 11543e95bd4aSAnders Persson &sock_tcp_downcalls, ira->ira_cred, ira->ira_cpid, 11553e95bd4aSAnders Persson &econnp->conn_upcalls)) == NULL) { 11563e95bd4aSAnders Persson return (B_FALSE); 11573e95bd4aSAnders Persson } 11583e95bd4aSAnders Persson econnp->conn_upper_handle = upper; 11593e95bd4aSAnders Persson 11603e95bd4aSAnders Persson tcp->tcp_detached = B_FALSE; 11613e95bd4aSAnders Persson tcp->tcp_hard_binding = B_FALSE; 11623e95bd4aSAnders Persson tcp->tcp_tconnind_started = B_TRUE; 11633e95bd4aSAnders Persson 11643e95bd4aSAnders Persson if (econnp->conn_keepalive) { 11653e95bd4aSAnders Persson tcp->tcp_ka_last_intrvl = 0; 11663e95bd4aSAnders Persson tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer, 11673e95bd4aSAnders Persson tcp->tcp_ka_interval); 11683e95bd4aSAnders Persson } 11693e95bd4aSAnders Persson 11703e95bd4aSAnders Persson /* Update the necessary parameters */ 11713e95bd4aSAnders Persson tcp_get_proto_props(tcp, &sopp); 11723e95bd4aSAnders Persson 11733e95bd4aSAnders Persson (*econnp->conn_upcalls->su_set_proto_props) 11743e95bd4aSAnders Persson (econnp->conn_upper_handle, &sopp); 11753e95bd4aSAnders Persson 11763e95bd4aSAnders Persson return (B_TRUE); 11773e95bd4aSAnders Persson } 1178