1439b3deaSKacheong Poon /* 2439b3deaSKacheong Poon * CDDL HEADER START 3439b3deaSKacheong Poon * 4439b3deaSKacheong Poon * The contents of this file are subject to the terms of the 5439b3deaSKacheong Poon * Common Development and Distribution License (the "License"). 6439b3deaSKacheong Poon * You may not use this file except in compliance with the License. 7439b3deaSKacheong Poon * 8439b3deaSKacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9439b3deaSKacheong Poon * or http://www.opensolaris.org/os/licensing. 10439b3deaSKacheong Poon * See the License for the specific language governing permissions 11439b3deaSKacheong Poon * and limitations under the License. 12439b3deaSKacheong Poon * 13439b3deaSKacheong Poon * When distributing Covered Code, include this CDDL HEADER in each 14439b3deaSKacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15439b3deaSKacheong Poon * If applicable, add the following below this CDDL HEADER, with the 16439b3deaSKacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying 17439b3deaSKacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner] 18439b3deaSKacheong Poon * 19439b3deaSKacheong Poon * CDDL HEADER END 20439b3deaSKacheong Poon */ 21439b3deaSKacheong Poon 22439b3deaSKacheong Poon /* 233e95bd4aSAnders Persson * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24439b3deaSKacheong Poon */ 25439b3deaSKacheong Poon 26439b3deaSKacheong Poon /* This files contains all TCP TLI/TPI related functions */ 27439b3deaSKacheong Poon 28439b3deaSKacheong Poon #include <sys/types.h> 29439b3deaSKacheong Poon #include <sys/stream.h> 30439b3deaSKacheong Poon #include <sys/strsun.h> 31439b3deaSKacheong Poon #include <sys/strsubr.h> 32439b3deaSKacheong Poon #include <sys/stropts.h> 33439b3deaSKacheong Poon #include <sys/strlog.h> 34439b3deaSKacheong Poon #define _SUN_TPI_VERSION 2 35439b3deaSKacheong Poon #include <sys/tihdr.h> 36439b3deaSKacheong Poon #include <sys/suntpi.h> 37439b3deaSKacheong Poon #include <sys/xti_inet.h> 38439b3deaSKacheong Poon #include <sys/squeue_impl.h> 39439b3deaSKacheong Poon #include <sys/squeue.h> 40439b3deaSKacheong Poon 41439b3deaSKacheong Poon #include <inet/common.h> 42439b3deaSKacheong Poon #include <inet/ip.h> 43439b3deaSKacheong Poon #include <inet/tcp.h> 44439b3deaSKacheong Poon #include <inet/tcp_impl.h> 45439b3deaSKacheong Poon #include <inet/proto_set.h> 46439b3deaSKacheong Poon 47439b3deaSKacheong Poon static void tcp_accept_swap(tcp_t *, tcp_t *, tcp_t *); 48439b3deaSKacheong Poon static int tcp_conprim_opt_process(tcp_t *, mblk_t *, int *, int *, int *); 49439b3deaSKacheong Poon 50439b3deaSKacheong Poon void 51439b3deaSKacheong Poon tcp_use_pure_tpi(tcp_t *tcp) 52439b3deaSKacheong Poon { 53439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 54439b3deaSKacheong Poon 55439b3deaSKacheong Poon #ifdef _ILP32 56439b3deaSKacheong Poon tcp->tcp_acceptor_id = (t_uscalar_t)connp->conn_rq; 57439b3deaSKacheong Poon #else 58439b3deaSKacheong Poon tcp->tcp_acceptor_id = connp->conn_dev; 59439b3deaSKacheong Poon #endif 60439b3deaSKacheong Poon /* 61439b3deaSKacheong Poon * Insert this socket into the acceptor hash. 62439b3deaSKacheong Poon * We might need it for T_CONN_RES message 63439b3deaSKacheong Poon */ 64439b3deaSKacheong Poon tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp); 65439b3deaSKacheong Poon 66439b3deaSKacheong Poon tcp->tcp_issocket = B_FALSE; 67439b3deaSKacheong Poon TCP_STAT(tcp->tcp_tcps, tcp_sock_fallback); 68439b3deaSKacheong Poon } 69439b3deaSKacheong Poon 70439b3deaSKacheong Poon /* Shorthand to generate and send TPI error acks to our client */ 71439b3deaSKacheong Poon void 72439b3deaSKacheong Poon tcp_err_ack(tcp_t *tcp, mblk_t *mp, int t_error, int sys_error) 73439b3deaSKacheong Poon { 74439b3deaSKacheong Poon if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 75439b3deaSKacheong Poon putnext(tcp->tcp_connp->conn_rq, mp); 76439b3deaSKacheong Poon } 77439b3deaSKacheong Poon 78439b3deaSKacheong Poon /* Shorthand to generate and send TPI error acks to our client */ 79439b3deaSKacheong Poon void 80439b3deaSKacheong Poon tcp_err_ack_prim(tcp_t *tcp, mblk_t *mp, int primitive, 81439b3deaSKacheong Poon int t_error, int sys_error) 82439b3deaSKacheong Poon { 83439b3deaSKacheong Poon struct T_error_ack *teackp; 84439b3deaSKacheong Poon 85439b3deaSKacheong Poon if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 86439b3deaSKacheong Poon M_PCPROTO, T_ERROR_ACK)) != NULL) { 87439b3deaSKacheong Poon teackp = (struct T_error_ack *)mp->b_rptr; 88439b3deaSKacheong Poon teackp->ERROR_prim = primitive; 89439b3deaSKacheong Poon teackp->TLI_error = t_error; 90439b3deaSKacheong Poon teackp->UNIX_error = sys_error; 91439b3deaSKacheong Poon putnext(tcp->tcp_connp->conn_rq, mp); 92439b3deaSKacheong Poon } 93439b3deaSKacheong Poon } 94439b3deaSKacheong Poon 95439b3deaSKacheong Poon /* 96439b3deaSKacheong Poon * TCP routine to get the values of options. 97439b3deaSKacheong Poon */ 98439b3deaSKacheong Poon int 99439b3deaSKacheong Poon tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 100439b3deaSKacheong Poon { 101439b3deaSKacheong Poon return (tcp_opt_get(Q_TO_CONN(q), level, name, ptr)); 102439b3deaSKacheong Poon } 103439b3deaSKacheong Poon 104439b3deaSKacheong Poon /* ARGSUSED */ 105439b3deaSKacheong Poon int 106439b3deaSKacheong Poon tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 107439b3deaSKacheong Poon uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 108439b3deaSKacheong Poon void *thisdg_attrs, cred_t *cr) 109439b3deaSKacheong Poon { 110439b3deaSKacheong Poon conn_t *connp = Q_TO_CONN(q); 111439b3deaSKacheong Poon 112439b3deaSKacheong Poon return (tcp_opt_set(connp, optset_context, level, name, inlen, invalp, 113439b3deaSKacheong Poon outlenp, outvalp, thisdg_attrs, cr)); 114439b3deaSKacheong Poon } 115439b3deaSKacheong Poon 116439b3deaSKacheong Poon static int 117439b3deaSKacheong Poon tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp, 118439b3deaSKacheong Poon int *t_errorp, int *sys_errorp) 119439b3deaSKacheong Poon { 120439b3deaSKacheong Poon int error; 121439b3deaSKacheong Poon int is_absreq_failure; 122439b3deaSKacheong Poon t_scalar_t *opt_lenp; 123439b3deaSKacheong Poon t_scalar_t opt_offset; 124439b3deaSKacheong Poon int prim_type; 125439b3deaSKacheong Poon struct T_conn_req *tcreqp; 126439b3deaSKacheong Poon struct T_conn_res *tcresp; 127439b3deaSKacheong Poon cred_t *cr; 128439b3deaSKacheong Poon 129439b3deaSKacheong Poon /* 130439b3deaSKacheong Poon * All Solaris components should pass a db_credp 131439b3deaSKacheong Poon * for this TPI message, hence we ASSERT. 132439b3deaSKacheong Poon * But in case there is some other M_PROTO that looks 133439b3deaSKacheong Poon * like a TPI message sent by some other kernel 134439b3deaSKacheong Poon * component, we check and return an error. 135439b3deaSKacheong Poon */ 136439b3deaSKacheong Poon cr = msg_getcred(mp, NULL); 137439b3deaSKacheong Poon ASSERT(cr != NULL); 138439b3deaSKacheong Poon if (cr == NULL) 139439b3deaSKacheong Poon return (-1); 140439b3deaSKacheong Poon 141439b3deaSKacheong Poon prim_type = ((union T_primitives *)mp->b_rptr)->type; 142439b3deaSKacheong Poon ASSERT(prim_type == T_CONN_REQ || prim_type == O_T_CONN_RES || 143439b3deaSKacheong Poon prim_type == T_CONN_RES); 144439b3deaSKacheong Poon 145439b3deaSKacheong Poon switch (prim_type) { 146439b3deaSKacheong Poon case T_CONN_REQ: 147439b3deaSKacheong Poon tcreqp = (struct T_conn_req *)mp->b_rptr; 148439b3deaSKacheong Poon opt_offset = tcreqp->OPT_offset; 149439b3deaSKacheong Poon opt_lenp = (t_scalar_t *)&tcreqp->OPT_length; 150439b3deaSKacheong Poon break; 151439b3deaSKacheong Poon case O_T_CONN_RES: 152439b3deaSKacheong Poon case T_CONN_RES: 153439b3deaSKacheong Poon tcresp = (struct T_conn_res *)mp->b_rptr; 154439b3deaSKacheong Poon opt_offset = tcresp->OPT_offset; 155439b3deaSKacheong Poon opt_lenp = (t_scalar_t *)&tcresp->OPT_length; 156439b3deaSKacheong Poon break; 157439b3deaSKacheong Poon } 158439b3deaSKacheong Poon 159439b3deaSKacheong Poon *t_errorp = 0; 160439b3deaSKacheong Poon *sys_errorp = 0; 161439b3deaSKacheong Poon *do_disconnectp = 0; 162439b3deaSKacheong Poon 163439b3deaSKacheong Poon error = tpi_optcom_buf(tcp->tcp_connp->conn_wq, mp, opt_lenp, 164439b3deaSKacheong Poon opt_offset, cr, &tcp_opt_obj, 165439b3deaSKacheong Poon NULL, &is_absreq_failure); 166439b3deaSKacheong Poon 167439b3deaSKacheong Poon switch (error) { 168439b3deaSKacheong Poon case 0: /* no error */ 169439b3deaSKacheong Poon ASSERT(is_absreq_failure == 0); 170439b3deaSKacheong Poon return (0); 171439b3deaSKacheong Poon case ENOPROTOOPT: 172439b3deaSKacheong Poon *t_errorp = TBADOPT; 173439b3deaSKacheong Poon break; 174439b3deaSKacheong Poon case EACCES: 175439b3deaSKacheong Poon *t_errorp = TACCES; 176439b3deaSKacheong Poon break; 177439b3deaSKacheong Poon default: 178439b3deaSKacheong Poon *t_errorp = TSYSERR; *sys_errorp = error; 179439b3deaSKacheong Poon break; 180439b3deaSKacheong Poon } 181439b3deaSKacheong Poon if (is_absreq_failure != 0) { 182439b3deaSKacheong Poon /* 183439b3deaSKacheong Poon * The connection request should get the local ack 184439b3deaSKacheong Poon * T_OK_ACK and then a T_DISCON_IND. 185439b3deaSKacheong Poon */ 186439b3deaSKacheong Poon *do_disconnectp = 1; 187439b3deaSKacheong Poon } 188439b3deaSKacheong Poon return (-1); 189439b3deaSKacheong Poon } 190439b3deaSKacheong Poon 191439b3deaSKacheong Poon void 192439b3deaSKacheong Poon tcp_tpi_bind(tcp_t *tcp, mblk_t *mp) 193439b3deaSKacheong Poon { 194439b3deaSKacheong Poon int error; 195439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 196439b3deaSKacheong Poon struct sockaddr *sa; 197439b3deaSKacheong Poon mblk_t *mp1; 198439b3deaSKacheong Poon struct T_bind_req *tbr; 199439b3deaSKacheong Poon int backlog; 200439b3deaSKacheong Poon socklen_t len; 201439b3deaSKacheong Poon sin_t *sin; 202439b3deaSKacheong Poon sin6_t *sin6; 203439b3deaSKacheong Poon cred_t *cr; 204439b3deaSKacheong Poon 205439b3deaSKacheong Poon /* 206439b3deaSKacheong Poon * All Solaris components should pass a db_credp 207439b3deaSKacheong Poon * for this TPI message, hence we ASSERT. 208439b3deaSKacheong Poon * But in case there is some other M_PROTO that looks 209439b3deaSKacheong Poon * like a TPI message sent by some other kernel 210439b3deaSKacheong Poon * component, we check and return an error. 211439b3deaSKacheong Poon */ 212439b3deaSKacheong Poon cr = msg_getcred(mp, NULL); 213439b3deaSKacheong Poon ASSERT(cr != NULL); 214439b3deaSKacheong Poon if (cr == NULL) { 215439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, EINVAL); 216439b3deaSKacheong Poon return; 217439b3deaSKacheong Poon } 218439b3deaSKacheong Poon 219439b3deaSKacheong Poon ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX); 220439b3deaSKacheong Poon if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 221439b3deaSKacheong Poon if (connp->conn_debug) { 222439b3deaSKacheong Poon (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 223439b3deaSKacheong Poon "tcp_tpi_bind: bad req, len %u", 224439b3deaSKacheong Poon (uint_t)(mp->b_wptr - mp->b_rptr)); 225439b3deaSKacheong Poon } 226439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TPROTO, 0); 227439b3deaSKacheong Poon return; 228439b3deaSKacheong Poon } 229439b3deaSKacheong Poon /* Make sure the largest address fits */ 230439b3deaSKacheong Poon mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 231439b3deaSKacheong Poon if (mp1 == NULL) { 232439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, ENOMEM); 233439b3deaSKacheong Poon return; 234439b3deaSKacheong Poon } 235439b3deaSKacheong Poon mp = mp1; 236439b3deaSKacheong Poon tbr = (struct T_bind_req *)mp->b_rptr; 237439b3deaSKacheong Poon 238439b3deaSKacheong Poon backlog = tbr->CONIND_number; 239439b3deaSKacheong Poon len = tbr->ADDR_length; 240439b3deaSKacheong Poon 241439b3deaSKacheong Poon switch (len) { 242439b3deaSKacheong Poon case 0: /* request for a generic port */ 243439b3deaSKacheong Poon tbr->ADDR_offset = sizeof (struct T_bind_req); 244439b3deaSKacheong Poon if (connp->conn_family == AF_INET) { 245439b3deaSKacheong Poon tbr->ADDR_length = sizeof (sin_t); 246439b3deaSKacheong Poon sin = (sin_t *)&tbr[1]; 247439b3deaSKacheong Poon *sin = sin_null; 248439b3deaSKacheong Poon sin->sin_family = AF_INET; 249439b3deaSKacheong Poon sa = (struct sockaddr *)sin; 250439b3deaSKacheong Poon len = sizeof (sin_t); 251439b3deaSKacheong Poon mp->b_wptr = (uchar_t *)&sin[1]; 252439b3deaSKacheong Poon } else { 253439b3deaSKacheong Poon ASSERT(connp->conn_family == AF_INET6); 254439b3deaSKacheong Poon tbr->ADDR_length = sizeof (sin6_t); 255439b3deaSKacheong Poon sin6 = (sin6_t *)&tbr[1]; 256439b3deaSKacheong Poon *sin6 = sin6_null; 257439b3deaSKacheong Poon sin6->sin6_family = AF_INET6; 258439b3deaSKacheong Poon sa = (struct sockaddr *)sin6; 259439b3deaSKacheong Poon len = sizeof (sin6_t); 260439b3deaSKacheong Poon mp->b_wptr = (uchar_t *)&sin6[1]; 261439b3deaSKacheong Poon } 262439b3deaSKacheong Poon break; 263439b3deaSKacheong Poon 264439b3deaSKacheong Poon case sizeof (sin_t): /* Complete IPv4 address */ 265439b3deaSKacheong Poon sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 266439b3deaSKacheong Poon sizeof (sin_t)); 267439b3deaSKacheong Poon break; 268439b3deaSKacheong Poon 269439b3deaSKacheong Poon case sizeof (sin6_t): /* Complete IPv6 address */ 270439b3deaSKacheong Poon sa = (struct sockaddr *)mi_offset_param(mp, 271439b3deaSKacheong Poon tbr->ADDR_offset, sizeof (sin6_t)); 272439b3deaSKacheong Poon break; 273439b3deaSKacheong Poon 274439b3deaSKacheong Poon default: 275439b3deaSKacheong Poon if (connp->conn_debug) { 276439b3deaSKacheong Poon (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 277439b3deaSKacheong Poon "tcp_tpi_bind: bad address length, %d", 278439b3deaSKacheong Poon tbr->ADDR_length); 279439b3deaSKacheong Poon } 280439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TBADADDR, 0); 281439b3deaSKacheong Poon return; 282439b3deaSKacheong Poon } 283439b3deaSKacheong Poon 284439b3deaSKacheong Poon if (backlog > 0) { 285439b3deaSKacheong Poon error = tcp_do_listen(connp, sa, len, backlog, DB_CRED(mp), 286439b3deaSKacheong Poon tbr->PRIM_type != O_T_BIND_REQ); 287439b3deaSKacheong Poon } else { 288439b3deaSKacheong Poon error = tcp_do_bind(connp, sa, len, DB_CRED(mp), 289439b3deaSKacheong Poon tbr->PRIM_type != O_T_BIND_REQ); 290439b3deaSKacheong Poon } 291439b3deaSKacheong Poon done: 292439b3deaSKacheong Poon if (error > 0) { 293439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, error); 294439b3deaSKacheong Poon } else if (error < 0) { 295439b3deaSKacheong Poon tcp_err_ack(tcp, mp, -error, 0); 296439b3deaSKacheong Poon } else { 297439b3deaSKacheong Poon /* 298439b3deaSKacheong Poon * Update port information as sockfs/tpi needs it for checking 299439b3deaSKacheong Poon */ 300439b3deaSKacheong Poon if (connp->conn_family == AF_INET) { 301439b3deaSKacheong Poon sin = (sin_t *)sa; 302439b3deaSKacheong Poon sin->sin_port = connp->conn_lport; 303439b3deaSKacheong Poon } else { 304439b3deaSKacheong Poon sin6 = (sin6_t *)sa; 305439b3deaSKacheong Poon sin6->sin6_port = connp->conn_lport; 306439b3deaSKacheong Poon } 307439b3deaSKacheong Poon mp->b_datap->db_type = M_PCPROTO; 308439b3deaSKacheong Poon tbr->PRIM_type = T_BIND_ACK; 309439b3deaSKacheong Poon putnext(connp->conn_rq, mp); 310439b3deaSKacheong Poon } 311439b3deaSKacheong Poon } 312439b3deaSKacheong Poon 313439b3deaSKacheong Poon /* tcp_unbind is called by tcp_wput_proto to handle T_UNBIND_REQ messages. */ 314439b3deaSKacheong Poon void 315439b3deaSKacheong Poon tcp_tpi_unbind(tcp_t *tcp, mblk_t *mp) 316439b3deaSKacheong Poon { 317439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 318439b3deaSKacheong Poon int error; 319439b3deaSKacheong Poon 320439b3deaSKacheong Poon error = tcp_do_unbind(connp); 321439b3deaSKacheong Poon if (error > 0) { 322439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, error); 323439b3deaSKacheong Poon } else if (error < 0) { 324439b3deaSKacheong Poon tcp_err_ack(tcp, mp, -error, 0); 325439b3deaSKacheong Poon } else { 326439b3deaSKacheong Poon /* Send M_FLUSH according to TPI */ 327439b3deaSKacheong Poon (void) putnextctl1(connp->conn_rq, M_FLUSH, FLUSHRW); 328439b3deaSKacheong Poon 329439b3deaSKacheong Poon mp = mi_tpi_ok_ack_alloc(mp); 330439b3deaSKacheong Poon if (mp != NULL) 331439b3deaSKacheong Poon putnext(connp->conn_rq, mp); 332439b3deaSKacheong Poon } 333439b3deaSKacheong Poon } 334439b3deaSKacheong Poon 335439b3deaSKacheong Poon int 336439b3deaSKacheong Poon tcp_tpi_close(queue_t *q, int flags) 337439b3deaSKacheong Poon { 338439b3deaSKacheong Poon conn_t *connp; 339439b3deaSKacheong Poon 340439b3deaSKacheong Poon ASSERT(WR(q)->q_next == NULL); 341439b3deaSKacheong Poon 342439b3deaSKacheong Poon if (flags & SO_FALLBACK) { 343439b3deaSKacheong Poon /* 344439b3deaSKacheong Poon * stream is being closed while in fallback 345439b3deaSKacheong Poon * simply free the resources that were allocated 346439b3deaSKacheong Poon */ 347439b3deaSKacheong Poon inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 348439b3deaSKacheong Poon qprocsoff(q); 349439b3deaSKacheong Poon goto done; 350439b3deaSKacheong Poon } 351439b3deaSKacheong Poon 352439b3deaSKacheong Poon connp = Q_TO_CONN(q); 353439b3deaSKacheong Poon /* 354439b3deaSKacheong Poon * We are being closed as /dev/tcp or /dev/tcp6. 355439b3deaSKacheong Poon */ 356439b3deaSKacheong Poon tcp_close_common(connp, flags); 357439b3deaSKacheong Poon 358439b3deaSKacheong Poon qprocsoff(q); 359439b3deaSKacheong Poon inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 360439b3deaSKacheong Poon 361439b3deaSKacheong Poon /* 362439b3deaSKacheong Poon * Drop IP's reference on the conn. This is the last reference 363439b3deaSKacheong Poon * on the connp if the state was less than established. If the 364439b3deaSKacheong Poon * connection has gone into timewait state, then we will have 365439b3deaSKacheong Poon * one ref for the TCP and one more ref (total of two) for the 366439b3deaSKacheong Poon * classifier connected hash list (a timewait connections stays 367439b3deaSKacheong Poon * in connected hash till closed). 368439b3deaSKacheong Poon * 369439b3deaSKacheong Poon * We can't assert the references because there might be other 370439b3deaSKacheong Poon * transient reference places because of some walkers or queued 371439b3deaSKacheong Poon * packets in squeue for the timewait state. 372439b3deaSKacheong Poon */ 373439b3deaSKacheong Poon CONN_DEC_REF(connp); 374439b3deaSKacheong Poon done: 375439b3deaSKacheong Poon q->q_ptr = WR(q)->q_ptr = NULL; 376439b3deaSKacheong Poon return (0); 377439b3deaSKacheong Poon } 378439b3deaSKacheong Poon 379439b3deaSKacheong Poon int 380439b3deaSKacheong Poon tcp_tpi_close_accept(queue_t *q) 381439b3deaSKacheong Poon { 382439b3deaSKacheong Poon vmem_t *minor_arena; 383439b3deaSKacheong Poon dev_t conn_dev; 384439b3deaSKacheong Poon extern struct qinit tcp_acceptor_winit; 385439b3deaSKacheong Poon 386439b3deaSKacheong Poon ASSERT(WR(q)->q_qinfo == &tcp_acceptor_winit); 387439b3deaSKacheong Poon 388439b3deaSKacheong Poon /* 389439b3deaSKacheong Poon * We had opened an acceptor STREAM for sockfs which is 390439b3deaSKacheong Poon * now being closed due to some error. 391439b3deaSKacheong Poon */ 392439b3deaSKacheong Poon qprocsoff(q); 393439b3deaSKacheong Poon 394439b3deaSKacheong Poon minor_arena = (vmem_t *)WR(q)->q_ptr; 395439b3deaSKacheong Poon conn_dev = (dev_t)RD(q)->q_ptr; 396439b3deaSKacheong Poon ASSERT(minor_arena != NULL); 397439b3deaSKacheong Poon ASSERT(conn_dev != 0); 398439b3deaSKacheong Poon inet_minor_free(minor_arena, conn_dev); 399439b3deaSKacheong Poon q->q_ptr = WR(q)->q_ptr = NULL; 400439b3deaSKacheong Poon return (0); 401439b3deaSKacheong Poon } 402439b3deaSKacheong Poon 403439b3deaSKacheong Poon /* 404439b3deaSKacheong Poon * Put a connection confirmation message upstream built from the 405439b3deaSKacheong Poon * address/flowid information with the conn and iph. Report our success or 406439b3deaSKacheong Poon * failure. 407439b3deaSKacheong Poon */ 408439b3deaSKacheong Poon boolean_t 409439b3deaSKacheong Poon tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, mblk_t *idmp, 410439b3deaSKacheong Poon mblk_t **defermp, ip_recv_attr_t *ira) 411439b3deaSKacheong Poon { 412439b3deaSKacheong Poon sin_t sin; 413439b3deaSKacheong Poon sin6_t sin6; 414439b3deaSKacheong Poon mblk_t *mp; 415439b3deaSKacheong Poon char *optp = NULL; 416439b3deaSKacheong Poon int optlen = 0; 417439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 418439b3deaSKacheong Poon 419439b3deaSKacheong Poon if (defermp != NULL) 420439b3deaSKacheong Poon *defermp = NULL; 421439b3deaSKacheong Poon 422439b3deaSKacheong Poon if (tcp->tcp_conn.tcp_opts_conn_req != NULL) { 423439b3deaSKacheong Poon /* 424439b3deaSKacheong Poon * Return in T_CONN_CON results of option negotiation through 425439b3deaSKacheong Poon * the T_CONN_REQ. Note: If there is an real end-to-end option 426439b3deaSKacheong Poon * negotiation, then what is received from remote end needs 427439b3deaSKacheong Poon * to be taken into account but there is no such thing (yet?) 428439b3deaSKacheong Poon * in our TCP/IP. 429439b3deaSKacheong Poon * Note: We do not use mi_offset_param() here as 430439b3deaSKacheong Poon * tcp_opts_conn_req contents do not directly come from 431439b3deaSKacheong Poon * an application and are either generated in kernel or 432439b3deaSKacheong Poon * from user input that was already verified. 433439b3deaSKacheong Poon */ 434439b3deaSKacheong Poon mp = tcp->tcp_conn.tcp_opts_conn_req; 435439b3deaSKacheong Poon optp = (char *)(mp->b_rptr + 436439b3deaSKacheong Poon ((struct T_conn_req *)mp->b_rptr)->OPT_offset); 437439b3deaSKacheong Poon optlen = (int) 438439b3deaSKacheong Poon ((struct T_conn_req *)mp->b_rptr)->OPT_length; 439439b3deaSKacheong Poon } 440439b3deaSKacheong Poon 441439b3deaSKacheong Poon if (IPH_HDR_VERSION(iphdr) == IPV4_VERSION) { 442439b3deaSKacheong Poon 443439b3deaSKacheong Poon /* packet is IPv4 */ 444439b3deaSKacheong Poon if (connp->conn_family == AF_INET) { 445439b3deaSKacheong Poon sin = sin_null; 446439b3deaSKacheong Poon sin.sin_addr.s_addr = connp->conn_faddr_v4; 447439b3deaSKacheong Poon sin.sin_port = connp->conn_fport; 448439b3deaSKacheong Poon sin.sin_family = AF_INET; 449439b3deaSKacheong Poon mp = mi_tpi_conn_con(NULL, (char *)&sin, 450439b3deaSKacheong Poon (int)sizeof (sin_t), optp, optlen); 451439b3deaSKacheong Poon } else { 452439b3deaSKacheong Poon sin6 = sin6_null; 453439b3deaSKacheong Poon sin6.sin6_addr = connp->conn_faddr_v6; 454439b3deaSKacheong Poon sin6.sin6_port = connp->conn_fport; 455439b3deaSKacheong Poon sin6.sin6_family = AF_INET6; 456439b3deaSKacheong Poon mp = mi_tpi_conn_con(NULL, (char *)&sin6, 457439b3deaSKacheong Poon (int)sizeof (sin6_t), optp, optlen); 458439b3deaSKacheong Poon 459439b3deaSKacheong Poon } 460439b3deaSKacheong Poon } else { 461439b3deaSKacheong Poon ip6_t *ip6h = (ip6_t *)iphdr; 462439b3deaSKacheong Poon 463439b3deaSKacheong Poon ASSERT(IPH_HDR_VERSION(iphdr) == IPV6_VERSION); 464439b3deaSKacheong Poon ASSERT(connp->conn_family == AF_INET6); 465439b3deaSKacheong Poon sin6 = sin6_null; 466439b3deaSKacheong Poon sin6.sin6_addr = connp->conn_faddr_v6; 467439b3deaSKacheong Poon sin6.sin6_port = connp->conn_fport; 468439b3deaSKacheong Poon sin6.sin6_family = AF_INET6; 469439b3deaSKacheong Poon sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 470439b3deaSKacheong Poon mp = mi_tpi_conn_con(NULL, (char *)&sin6, 471439b3deaSKacheong Poon (int)sizeof (sin6_t), optp, optlen); 472439b3deaSKacheong Poon } 473439b3deaSKacheong Poon 474439b3deaSKacheong Poon if (!mp) 475439b3deaSKacheong Poon return (B_FALSE); 476439b3deaSKacheong Poon 477439b3deaSKacheong Poon mblk_copycred(mp, idmp); 478439b3deaSKacheong Poon 479439b3deaSKacheong Poon if (defermp == NULL) { 480439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 481439b3deaSKacheong Poon if (IPCL_IS_NONSTR(connp)) { 482439b3deaSKacheong Poon (*connp->conn_upcalls->su_connected) 483439b3deaSKacheong Poon (connp->conn_upper_handle, tcp->tcp_connid, 484439b3deaSKacheong Poon ira->ira_cred, ira->ira_cpid); 485439b3deaSKacheong Poon freemsg(mp); 486439b3deaSKacheong Poon } else { 487439b3deaSKacheong Poon if (ira->ira_cred != NULL) { 488439b3deaSKacheong Poon /* So that getpeerucred works for TPI sockfs */ 489439b3deaSKacheong Poon mblk_setcred(mp, ira->ira_cred, ira->ira_cpid); 490439b3deaSKacheong Poon } 491439b3deaSKacheong Poon putnext(connp->conn_rq, mp); 492439b3deaSKacheong Poon } 493439b3deaSKacheong Poon } else { 494439b3deaSKacheong Poon *defermp = mp; 495439b3deaSKacheong Poon } 496439b3deaSKacheong Poon 497439b3deaSKacheong Poon if (tcp->tcp_conn.tcp_opts_conn_req != NULL) 498439b3deaSKacheong Poon tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req); 499439b3deaSKacheong Poon return (B_TRUE); 500439b3deaSKacheong Poon } 501439b3deaSKacheong Poon 502439b3deaSKacheong Poon /* 503439b3deaSKacheong Poon * Successful connect request processing begins when our client passes 504439b3deaSKacheong Poon * a T_CONN_REQ message into tcp_wput(), which performs function calls into 505439b3deaSKacheong Poon * IP and the passes a T_OK_ACK (or T_ERROR_ACK upstream). 506439b3deaSKacheong Poon * 507439b3deaSKacheong Poon * After various error checks are completed, tcp_tpi_connect() lays 508439b3deaSKacheong Poon * the target address and port into the composite header template. 509439b3deaSKacheong Poon * Then we ask IP for information, including a source address if we didn't 510439b3deaSKacheong Poon * already have one. Finally we prepare to send the SYN packet, and then 511439b3deaSKacheong Poon * send up the T_OK_ACK reply message. 512439b3deaSKacheong Poon */ 513439b3deaSKacheong Poon void 514439b3deaSKacheong Poon tcp_tpi_connect(tcp_t *tcp, mblk_t *mp) 515439b3deaSKacheong Poon { 516439b3deaSKacheong Poon sin_t *sin; 517439b3deaSKacheong Poon struct T_conn_req *tcr; 518439b3deaSKacheong Poon struct sockaddr *sa; 519439b3deaSKacheong Poon socklen_t len; 520439b3deaSKacheong Poon int error; 521439b3deaSKacheong Poon cred_t *cr; 522439b3deaSKacheong Poon pid_t cpid; 523439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 524439b3deaSKacheong Poon queue_t *q = connp->conn_wq; 525439b3deaSKacheong Poon 526439b3deaSKacheong Poon /* 527439b3deaSKacheong Poon * All Solaris components should pass a db_credp 528439b3deaSKacheong Poon * for this TPI message, hence we ASSERT. 529439b3deaSKacheong Poon * But in case there is some other M_PROTO that looks 530439b3deaSKacheong Poon * like a TPI message sent by some other kernel 531439b3deaSKacheong Poon * component, we check and return an error. 532439b3deaSKacheong Poon */ 533439b3deaSKacheong Poon cr = msg_getcred(mp, &cpid); 534439b3deaSKacheong Poon ASSERT(cr != NULL); 535439b3deaSKacheong Poon if (cr == NULL) { 536439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, EINVAL); 537439b3deaSKacheong Poon return; 538439b3deaSKacheong Poon } 539439b3deaSKacheong Poon 540439b3deaSKacheong Poon tcr = (struct T_conn_req *)mp->b_rptr; 541439b3deaSKacheong Poon 542439b3deaSKacheong Poon ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX); 543439b3deaSKacheong Poon if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) { 544439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TPROTO, 0); 545439b3deaSKacheong Poon return; 546439b3deaSKacheong Poon } 547439b3deaSKacheong Poon 548439b3deaSKacheong Poon /* 549439b3deaSKacheong Poon * Pre-allocate the T_ordrel_ind mblk so that at close time, we 550439b3deaSKacheong Poon * will always have that to send up. Otherwise, we need to do 551439b3deaSKacheong Poon * special handling in case the allocation fails at that time. 552439b3deaSKacheong Poon * If the end point is TPI, the tcp_t can be reused and the 553439b3deaSKacheong Poon * tcp_ordrel_mp may be allocated already. 554439b3deaSKacheong Poon */ 555439b3deaSKacheong Poon if (tcp->tcp_ordrel_mp == NULL) { 556439b3deaSKacheong Poon if ((tcp->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL) { 557439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, ENOMEM); 558439b3deaSKacheong Poon return; 559439b3deaSKacheong Poon } 560439b3deaSKacheong Poon } 561439b3deaSKacheong Poon 562439b3deaSKacheong Poon /* 563439b3deaSKacheong Poon * Determine packet type based on type of address passed in 564439b3deaSKacheong Poon * the request should contain an IPv4 or IPv6 address. 565439b3deaSKacheong Poon * Make sure that address family matches the type of 566439b3deaSKacheong Poon * family of the address passed down. 567439b3deaSKacheong Poon */ 568439b3deaSKacheong Poon switch (tcr->DEST_length) { 569439b3deaSKacheong Poon default: 570439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TBADADDR, 0); 571439b3deaSKacheong Poon return; 572439b3deaSKacheong Poon 573439b3deaSKacheong Poon case (sizeof (sin_t) - sizeof (sin->sin_zero)): { 574439b3deaSKacheong Poon /* 575439b3deaSKacheong Poon * XXX: The check for valid DEST_length was not there 576439b3deaSKacheong Poon * in earlier releases and some buggy 577439b3deaSKacheong Poon * TLI apps (e.g Sybase) got away with not feeding 578439b3deaSKacheong Poon * in sin_zero part of address. 579439b3deaSKacheong Poon * We allow that bug to keep those buggy apps humming. 580439b3deaSKacheong Poon * Test suites require the check on DEST_length. 581439b3deaSKacheong Poon * We construct a new mblk with valid DEST_length 582439b3deaSKacheong Poon * free the original so the rest of the code does 583439b3deaSKacheong Poon * not have to keep track of this special shorter 584439b3deaSKacheong Poon * length address case. 585439b3deaSKacheong Poon */ 586439b3deaSKacheong Poon mblk_t *nmp; 587439b3deaSKacheong Poon struct T_conn_req *ntcr; 588439b3deaSKacheong Poon sin_t *nsin; 589439b3deaSKacheong Poon 590439b3deaSKacheong Poon nmp = allocb(sizeof (struct T_conn_req) + sizeof (sin_t) + 591439b3deaSKacheong Poon tcr->OPT_length, BPRI_HI); 592439b3deaSKacheong Poon if (nmp == NULL) { 593439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, ENOMEM); 594439b3deaSKacheong Poon return; 595439b3deaSKacheong Poon } 596439b3deaSKacheong Poon ntcr = (struct T_conn_req *)nmp->b_rptr; 597439b3deaSKacheong Poon bzero(ntcr, sizeof (struct T_conn_req)); /* zero fill */ 598439b3deaSKacheong Poon ntcr->PRIM_type = T_CONN_REQ; 599439b3deaSKacheong Poon ntcr->DEST_length = sizeof (sin_t); 600439b3deaSKacheong Poon ntcr->DEST_offset = sizeof (struct T_conn_req); 601439b3deaSKacheong Poon 602439b3deaSKacheong Poon nsin = (sin_t *)((uchar_t *)ntcr + ntcr->DEST_offset); 603439b3deaSKacheong Poon *nsin = sin_null; 604439b3deaSKacheong Poon /* Get pointer to shorter address to copy from original mp */ 605439b3deaSKacheong Poon sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 606439b3deaSKacheong Poon tcr->DEST_length); /* extract DEST_length worth of sin_t */ 607439b3deaSKacheong Poon if (sin == NULL || !OK_32PTR((char *)sin)) { 608439b3deaSKacheong Poon freemsg(nmp); 609439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, EINVAL); 610439b3deaSKacheong Poon return; 611439b3deaSKacheong Poon } 612439b3deaSKacheong Poon nsin->sin_family = sin->sin_family; 613439b3deaSKacheong Poon nsin->sin_port = sin->sin_port; 614439b3deaSKacheong Poon nsin->sin_addr = sin->sin_addr; 615439b3deaSKacheong Poon /* Note:nsin->sin_zero zero-fill with sin_null assign above */ 616439b3deaSKacheong Poon nmp->b_wptr = (uchar_t *)&nsin[1]; 617439b3deaSKacheong Poon if (tcr->OPT_length != 0) { 618439b3deaSKacheong Poon ntcr->OPT_length = tcr->OPT_length; 619439b3deaSKacheong Poon ntcr->OPT_offset = nmp->b_wptr - nmp->b_rptr; 620439b3deaSKacheong Poon bcopy((uchar_t *)tcr + tcr->OPT_offset, 621439b3deaSKacheong Poon (uchar_t *)ntcr + ntcr->OPT_offset, 622439b3deaSKacheong Poon tcr->OPT_length); 623439b3deaSKacheong Poon nmp->b_wptr += tcr->OPT_length; 624439b3deaSKacheong Poon } 625439b3deaSKacheong Poon freemsg(mp); /* original mp freed */ 626439b3deaSKacheong Poon mp = nmp; /* re-initialize original variables */ 627439b3deaSKacheong Poon tcr = ntcr; 628439b3deaSKacheong Poon } 629439b3deaSKacheong Poon /* FALLTHRU */ 630439b3deaSKacheong Poon 631439b3deaSKacheong Poon case sizeof (sin_t): 632439b3deaSKacheong Poon sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 633439b3deaSKacheong Poon sizeof (sin_t)); 634439b3deaSKacheong Poon len = sizeof (sin_t); 635439b3deaSKacheong Poon break; 636439b3deaSKacheong Poon 637439b3deaSKacheong Poon case sizeof (sin6_t): 638439b3deaSKacheong Poon sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 639439b3deaSKacheong Poon sizeof (sin6_t)); 640439b3deaSKacheong Poon len = sizeof (sin6_t); 641439b3deaSKacheong Poon break; 642439b3deaSKacheong Poon } 643439b3deaSKacheong Poon 644439b3deaSKacheong Poon error = proto_verify_ip_addr(connp->conn_family, sa, len); 645439b3deaSKacheong Poon if (error != 0) { 646439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, error); 647439b3deaSKacheong Poon return; 648439b3deaSKacheong Poon } 649439b3deaSKacheong Poon 650439b3deaSKacheong Poon /* 651439b3deaSKacheong Poon * TODO: If someone in TCPS_TIME_WAIT has this dst/port we 652439b3deaSKacheong Poon * should key on their sequence number and cut them loose. 653439b3deaSKacheong Poon */ 654439b3deaSKacheong Poon 655439b3deaSKacheong Poon /* 656439b3deaSKacheong Poon * If options passed in, feed it for verification and handling 657439b3deaSKacheong Poon */ 658439b3deaSKacheong Poon if (tcr->OPT_length != 0) { 659439b3deaSKacheong Poon mblk_t *ok_mp; 660439b3deaSKacheong Poon mblk_t *discon_mp; 661439b3deaSKacheong Poon mblk_t *conn_opts_mp; 662439b3deaSKacheong Poon int t_error, sys_error, do_disconnect; 663439b3deaSKacheong Poon 664439b3deaSKacheong Poon conn_opts_mp = NULL; 665439b3deaSKacheong Poon 666439b3deaSKacheong Poon if (tcp_conprim_opt_process(tcp, mp, 667439b3deaSKacheong Poon &do_disconnect, &t_error, &sys_error) < 0) { 668439b3deaSKacheong Poon if (do_disconnect) { 669439b3deaSKacheong Poon ASSERT(t_error == 0 && sys_error == 0); 670439b3deaSKacheong Poon discon_mp = mi_tpi_discon_ind(NULL, 671439b3deaSKacheong Poon ECONNREFUSED, 0); 672439b3deaSKacheong Poon if (!discon_mp) { 673439b3deaSKacheong Poon tcp_err_ack_prim(tcp, mp, T_CONN_REQ, 674439b3deaSKacheong Poon TSYSERR, ENOMEM); 675439b3deaSKacheong Poon return; 676439b3deaSKacheong Poon } 677439b3deaSKacheong Poon ok_mp = mi_tpi_ok_ack_alloc(mp); 678439b3deaSKacheong Poon if (!ok_mp) { 679439b3deaSKacheong Poon tcp_err_ack_prim(tcp, NULL, T_CONN_REQ, 680439b3deaSKacheong Poon TSYSERR, ENOMEM); 681439b3deaSKacheong Poon return; 682439b3deaSKacheong Poon } 683439b3deaSKacheong Poon qreply(q, ok_mp); 684439b3deaSKacheong Poon qreply(q, discon_mp); /* no flush! */ 685439b3deaSKacheong Poon } else { 686439b3deaSKacheong Poon ASSERT(t_error != 0); 687439b3deaSKacheong Poon tcp_err_ack_prim(tcp, mp, T_CONN_REQ, t_error, 688439b3deaSKacheong Poon sys_error); 689439b3deaSKacheong Poon } 690439b3deaSKacheong Poon return; 691439b3deaSKacheong Poon } 692439b3deaSKacheong Poon /* 693439b3deaSKacheong Poon * Success in setting options, the mp option buffer represented 694439b3deaSKacheong Poon * by OPT_length/offset has been potentially modified and 695439b3deaSKacheong Poon * contains results of option processing. We copy it in 696439b3deaSKacheong Poon * another mp to save it for potentially influencing returning 697439b3deaSKacheong Poon * it in T_CONN_CONN. 698439b3deaSKacheong Poon */ 699439b3deaSKacheong Poon if (tcr->OPT_length != 0) { /* there are resulting options */ 700439b3deaSKacheong Poon conn_opts_mp = copyb(mp); 701439b3deaSKacheong Poon if (!conn_opts_mp) { 702439b3deaSKacheong Poon tcp_err_ack_prim(tcp, mp, T_CONN_REQ, 703439b3deaSKacheong Poon TSYSERR, ENOMEM); 704439b3deaSKacheong Poon return; 705439b3deaSKacheong Poon } 706439b3deaSKacheong Poon ASSERT(tcp->tcp_conn.tcp_opts_conn_req == NULL); 707439b3deaSKacheong Poon tcp->tcp_conn.tcp_opts_conn_req = conn_opts_mp; 708439b3deaSKacheong Poon /* 709439b3deaSKacheong Poon * Note: 710439b3deaSKacheong Poon * These resulting option negotiation can include any 711439b3deaSKacheong Poon * end-to-end negotiation options but there no such 712439b3deaSKacheong Poon * thing (yet?) in our TCP/IP. 713439b3deaSKacheong Poon */ 714439b3deaSKacheong Poon } 715439b3deaSKacheong Poon } 716439b3deaSKacheong Poon 717439b3deaSKacheong Poon /* call the non-TPI version */ 718439b3deaSKacheong Poon error = tcp_do_connect(tcp->tcp_connp, sa, len, cr, cpid); 719439b3deaSKacheong Poon if (error < 0) { 720439b3deaSKacheong Poon mp = mi_tpi_err_ack_alloc(mp, -error, 0); 721439b3deaSKacheong Poon } else if (error > 0) { 722439b3deaSKacheong Poon mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 723439b3deaSKacheong Poon } else { 724439b3deaSKacheong Poon mp = mi_tpi_ok_ack_alloc(mp); 725439b3deaSKacheong Poon } 726439b3deaSKacheong Poon 727439b3deaSKacheong Poon /* 728439b3deaSKacheong Poon * Note: Code below is the "failure" case 729439b3deaSKacheong Poon */ 730439b3deaSKacheong Poon /* return error ack and blow away saved option results if any */ 731439b3deaSKacheong Poon connect_failed: 732439b3deaSKacheong Poon if (mp != NULL) 733439b3deaSKacheong Poon putnext(connp->conn_rq, mp); 734439b3deaSKacheong Poon else { 735439b3deaSKacheong Poon tcp_err_ack_prim(tcp, NULL, T_CONN_REQ, 736439b3deaSKacheong Poon TSYSERR, ENOMEM); 737439b3deaSKacheong Poon } 738439b3deaSKacheong Poon } 739439b3deaSKacheong Poon 740439b3deaSKacheong Poon /* Return the TPI/TLI equivalent of our current tcp_state */ 741439b3deaSKacheong Poon static int 742439b3deaSKacheong Poon tcp_tpistate(tcp_t *tcp) 743439b3deaSKacheong Poon { 744439b3deaSKacheong Poon switch (tcp->tcp_state) { 745439b3deaSKacheong Poon case TCPS_IDLE: 746439b3deaSKacheong Poon return (TS_UNBND); 747439b3deaSKacheong Poon case TCPS_LISTEN: 748439b3deaSKacheong Poon /* 749439b3deaSKacheong Poon * Return whether there are outstanding T_CONN_IND waiting 750439b3deaSKacheong Poon * for the matching T_CONN_RES. Therefore don't count q0. 751439b3deaSKacheong Poon */ 752439b3deaSKacheong Poon if (tcp->tcp_conn_req_cnt_q > 0) 753439b3deaSKacheong Poon return (TS_WRES_CIND); 754439b3deaSKacheong Poon else 755439b3deaSKacheong Poon return (TS_IDLE); 756439b3deaSKacheong Poon case TCPS_BOUND: 757439b3deaSKacheong Poon return (TS_IDLE); 758439b3deaSKacheong Poon case TCPS_SYN_SENT: 759439b3deaSKacheong Poon return (TS_WCON_CREQ); 760439b3deaSKacheong Poon case TCPS_SYN_RCVD: 761439b3deaSKacheong Poon /* 762439b3deaSKacheong Poon * Note: assumption: this has to the active open SYN_RCVD. 763439b3deaSKacheong Poon * The passive instance is detached in SYN_RCVD stage of 764439b3deaSKacheong Poon * incoming connection processing so we cannot get request 765439b3deaSKacheong Poon * for T_info_ack on it. 766439b3deaSKacheong Poon */ 767439b3deaSKacheong Poon return (TS_WACK_CRES); 768439b3deaSKacheong Poon case TCPS_ESTABLISHED: 769439b3deaSKacheong Poon return (TS_DATA_XFER); 770439b3deaSKacheong Poon case TCPS_CLOSE_WAIT: 771439b3deaSKacheong Poon return (TS_WREQ_ORDREL); 772439b3deaSKacheong Poon case TCPS_FIN_WAIT_1: 773439b3deaSKacheong Poon return (TS_WIND_ORDREL); 774439b3deaSKacheong Poon case TCPS_FIN_WAIT_2: 775439b3deaSKacheong Poon return (TS_WIND_ORDREL); 776439b3deaSKacheong Poon 777439b3deaSKacheong Poon case TCPS_CLOSING: 778439b3deaSKacheong Poon case TCPS_LAST_ACK: 779439b3deaSKacheong Poon case TCPS_TIME_WAIT: 780439b3deaSKacheong Poon case TCPS_CLOSED: 781439b3deaSKacheong Poon /* 782439b3deaSKacheong Poon * Following TS_WACK_DREQ7 is a rendition of "not 783439b3deaSKacheong Poon * yet TS_IDLE" TPI state. There is no best match to any 784439b3deaSKacheong Poon * TPI state for TCPS_{CLOSING, LAST_ACK, TIME_WAIT} but we 785439b3deaSKacheong Poon * choose a value chosen that will map to TLI/XTI level 786439b3deaSKacheong Poon * state of TSTATECHNG (state is process of changing) which 787439b3deaSKacheong Poon * captures what this dummy state represents. 788439b3deaSKacheong Poon */ 789439b3deaSKacheong Poon return (TS_WACK_DREQ7); 790439b3deaSKacheong Poon default: 791439b3deaSKacheong Poon cmn_err(CE_WARN, "tcp_tpistate: strange state (%d) %s", 792439b3deaSKacheong Poon tcp->tcp_state, tcp_display(tcp, NULL, 793439b3deaSKacheong Poon DISP_PORT_ONLY)); 794439b3deaSKacheong Poon return (TS_UNBND); 795439b3deaSKacheong Poon } 796439b3deaSKacheong Poon } 797439b3deaSKacheong Poon 798439b3deaSKacheong Poon static void 799439b3deaSKacheong Poon tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp) 800439b3deaSKacheong Poon { 801439b3deaSKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps; 802439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 803439b3deaSKacheong Poon extern struct T_info_ack tcp_g_t_info_ack; 804439b3deaSKacheong Poon extern struct T_info_ack tcp_g_t_info_ack_v6; 805439b3deaSKacheong Poon 806439b3deaSKacheong Poon if (connp->conn_family == AF_INET6) 807439b3deaSKacheong Poon *tia = tcp_g_t_info_ack_v6; 808439b3deaSKacheong Poon else 809439b3deaSKacheong Poon *tia = tcp_g_t_info_ack; 810439b3deaSKacheong Poon tia->CURRENT_state = tcp_tpistate(tcp); 811439b3deaSKacheong Poon tia->OPT_size = tcp_max_optsize; 812439b3deaSKacheong Poon if (tcp->tcp_mss == 0) { 813439b3deaSKacheong Poon /* Not yet set - tcp_open does not set mss */ 814439b3deaSKacheong Poon if (connp->conn_ipversion == IPV4_VERSION) 815439b3deaSKacheong Poon tia->TIDU_size = tcps->tcps_mss_def_ipv4; 816439b3deaSKacheong Poon else 817439b3deaSKacheong Poon tia->TIDU_size = tcps->tcps_mss_def_ipv6; 818439b3deaSKacheong Poon } else { 819439b3deaSKacheong Poon tia->TIDU_size = tcp->tcp_mss; 820439b3deaSKacheong Poon } 821439b3deaSKacheong Poon /* TODO: Default ETSDU is 1. Is that correct for tcp? */ 822439b3deaSKacheong Poon } 823439b3deaSKacheong Poon 8243e95bd4aSAnders Persson void 825439b3deaSKacheong Poon tcp_do_capability_ack(tcp_t *tcp, struct T_capability_ack *tcap, 826439b3deaSKacheong Poon t_uscalar_t cap_bits1) 827439b3deaSKacheong Poon { 828439b3deaSKacheong Poon tcap->CAP_bits1 = 0; 829439b3deaSKacheong Poon 830439b3deaSKacheong Poon if (cap_bits1 & TC1_INFO) { 831439b3deaSKacheong Poon tcp_copy_info(&tcap->INFO_ack, tcp); 832439b3deaSKacheong Poon tcap->CAP_bits1 |= TC1_INFO; 833439b3deaSKacheong Poon } 834439b3deaSKacheong Poon 835439b3deaSKacheong Poon if (cap_bits1 & TC1_ACCEPTOR_ID) { 836439b3deaSKacheong Poon tcap->ACCEPTOR_id = tcp->tcp_acceptor_id; 837439b3deaSKacheong Poon tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 838439b3deaSKacheong Poon } 839439b3deaSKacheong Poon 840439b3deaSKacheong Poon } 841439b3deaSKacheong Poon 842439b3deaSKacheong Poon /* 843439b3deaSKacheong Poon * This routine responds to T_CAPABILITY_REQ messages. It is called by 844439b3deaSKacheong Poon * tcp_wput. Much of the T_CAPABILITY_ACK information is copied from 845439b3deaSKacheong Poon * tcp_g_t_info_ack. The current state of the stream is copied from 846439b3deaSKacheong Poon * tcp_state. 847439b3deaSKacheong Poon */ 848439b3deaSKacheong Poon void 849439b3deaSKacheong Poon tcp_capability_req(tcp_t *tcp, mblk_t *mp) 850439b3deaSKacheong Poon { 851439b3deaSKacheong Poon t_uscalar_t cap_bits1; 852439b3deaSKacheong Poon struct T_capability_ack *tcap; 853439b3deaSKacheong Poon 854439b3deaSKacheong Poon if (MBLKL(mp) < sizeof (struct T_capability_req)) { 855439b3deaSKacheong Poon freemsg(mp); 856439b3deaSKacheong Poon return; 857439b3deaSKacheong Poon } 858439b3deaSKacheong Poon 859439b3deaSKacheong Poon cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 860439b3deaSKacheong Poon 861439b3deaSKacheong Poon mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 862439b3deaSKacheong Poon mp->b_datap->db_type, T_CAPABILITY_ACK); 863439b3deaSKacheong Poon if (mp == NULL) 864439b3deaSKacheong Poon return; 865439b3deaSKacheong Poon 866439b3deaSKacheong Poon tcap = (struct T_capability_ack *)mp->b_rptr; 867439b3deaSKacheong Poon tcp_do_capability_ack(tcp, tcap, cap_bits1); 868439b3deaSKacheong Poon 869439b3deaSKacheong Poon putnext(tcp->tcp_connp->conn_rq, mp); 870439b3deaSKacheong Poon } 871439b3deaSKacheong Poon 872439b3deaSKacheong Poon /* 873439b3deaSKacheong Poon * This routine responds to T_INFO_REQ messages. It is called by tcp_wput. 874439b3deaSKacheong Poon * Most of the T_INFO_ACK information is copied from tcp_g_t_info_ack. 875439b3deaSKacheong Poon * The current state of the stream is copied from tcp_state. 876439b3deaSKacheong Poon */ 877439b3deaSKacheong Poon void 878439b3deaSKacheong Poon tcp_info_req(tcp_t *tcp, mblk_t *mp) 879439b3deaSKacheong Poon { 880439b3deaSKacheong Poon mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 881439b3deaSKacheong Poon T_INFO_ACK); 882439b3deaSKacheong Poon if (!mp) { 883439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, ENOMEM); 884439b3deaSKacheong Poon return; 885439b3deaSKacheong Poon } 886439b3deaSKacheong Poon tcp_copy_info((struct T_info_ack *)mp->b_rptr, tcp); 887439b3deaSKacheong Poon putnext(tcp->tcp_connp->conn_rq, mp); 888439b3deaSKacheong Poon } 889439b3deaSKacheong Poon 890439b3deaSKacheong Poon /* Respond to the TPI addr request */ 891439b3deaSKacheong Poon void 892439b3deaSKacheong Poon tcp_addr_req(tcp_t *tcp, mblk_t *mp) 893439b3deaSKacheong Poon { 894439b3deaSKacheong Poon struct sockaddr *sa; 895439b3deaSKacheong Poon mblk_t *ackmp; 896439b3deaSKacheong Poon struct T_addr_ack *taa; 897439b3deaSKacheong Poon conn_t *connp = tcp->tcp_connp; 898439b3deaSKacheong Poon uint_t addrlen; 899439b3deaSKacheong Poon 900439b3deaSKacheong Poon /* Make it large enough for worst case */ 901439b3deaSKacheong Poon ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 902439b3deaSKacheong Poon 2 * sizeof (sin6_t), 1); 903439b3deaSKacheong Poon if (ackmp == NULL) { 904439b3deaSKacheong Poon tcp_err_ack(tcp, mp, TSYSERR, ENOMEM); 905439b3deaSKacheong Poon return; 906439b3deaSKacheong Poon } 907439b3deaSKacheong Poon 908439b3deaSKacheong Poon taa = (struct T_addr_ack *)ackmp->b_rptr; 909439b3deaSKacheong Poon 910439b3deaSKacheong Poon bzero(taa, sizeof (struct T_addr_ack)); 911439b3deaSKacheong Poon ackmp->b_wptr = (uchar_t *)&taa[1]; 912439b3deaSKacheong Poon 913439b3deaSKacheong Poon taa->PRIM_type = T_ADDR_ACK; 914439b3deaSKacheong Poon ackmp->b_datap->db_type = M_PCPROTO; 915439b3deaSKacheong Poon 916439b3deaSKacheong Poon if (connp->conn_family == AF_INET) 917439b3deaSKacheong Poon addrlen = sizeof (sin_t); 918439b3deaSKacheong Poon else 919439b3deaSKacheong Poon addrlen = sizeof (sin6_t); 920439b3deaSKacheong Poon 921439b3deaSKacheong Poon /* 922439b3deaSKacheong Poon * Note: Following code assumes 32 bit alignment of basic 923439b3deaSKacheong Poon * data structures like sin_t and struct T_addr_ack. 924439b3deaSKacheong Poon */ 925439b3deaSKacheong Poon if (tcp->tcp_state >= TCPS_BOUND) { 926439b3deaSKacheong Poon /* 927439b3deaSKacheong Poon * Fill in local address first 928439b3deaSKacheong Poon */ 929439b3deaSKacheong Poon taa->LOCADDR_offset = sizeof (*taa); 930439b3deaSKacheong Poon taa->LOCADDR_length = addrlen; 931439b3deaSKacheong Poon sa = (struct sockaddr *)&taa[1]; 932439b3deaSKacheong Poon (void) conn_getsockname(connp, sa, &addrlen); 933439b3deaSKacheong Poon ackmp->b_wptr += addrlen; 934439b3deaSKacheong Poon } 935439b3deaSKacheong Poon if (tcp->tcp_state >= TCPS_SYN_RCVD) { 936439b3deaSKacheong Poon /* 937439b3deaSKacheong Poon * Fill in Remote address 938439b3deaSKacheong Poon */ 939439b3deaSKacheong Poon taa->REMADDR_length = addrlen; 940439b3deaSKacheong Poon /* assumed 32-bit alignment */ 941439b3deaSKacheong Poon taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 942439b3deaSKacheong Poon sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 943439b3deaSKacheong Poon (void) conn_getpeername(connp, sa, &addrlen); 944439b3deaSKacheong Poon ackmp->b_wptr += addrlen; 945439b3deaSKacheong Poon } 946439b3deaSKacheong Poon ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 947439b3deaSKacheong Poon putnext(tcp->tcp_connp->conn_rq, ackmp); 948439b3deaSKacheong Poon } 949439b3deaSKacheong Poon 950439b3deaSKacheong Poon /* 951439b3deaSKacheong Poon * Swap information between the eager and acceptor for a TLI/XTI client. 952439b3deaSKacheong Poon * The sockfs accept is done on the acceptor stream and control goes 953439b3deaSKacheong Poon * through tcp_tli_accept() and tcp_accept()/tcp_accept_swap() is not 954439b3deaSKacheong Poon * called. In either case, both the eager and listener are in their own 955439b3deaSKacheong Poon * perimeter (squeue) and the code has to deal with potential race. 956439b3deaSKacheong Poon * 957439b3deaSKacheong Poon * See the block comment on top of tcp_accept() and tcp_tli_accept(). 958439b3deaSKacheong Poon */ 959439b3deaSKacheong Poon static void 960439b3deaSKacheong Poon tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager) 961439b3deaSKacheong Poon { 962439b3deaSKacheong Poon conn_t *econnp, *aconnp; 963439b3deaSKacheong Poon 964439b3deaSKacheong Poon ASSERT(eager->tcp_connp->conn_rq == listener->tcp_connp->conn_rq); 965439b3deaSKacheong Poon ASSERT(eager->tcp_detached && !acceptor->tcp_detached); 966439b3deaSKacheong Poon ASSERT(!TCP_IS_SOCKET(acceptor)); 967439b3deaSKacheong Poon ASSERT(!TCP_IS_SOCKET(eager)); 968439b3deaSKacheong Poon ASSERT(!TCP_IS_SOCKET(listener)); 969439b3deaSKacheong Poon 970439b3deaSKacheong Poon /* 971439b3deaSKacheong Poon * Trusted Extensions may need to use a security label that is 972439b3deaSKacheong Poon * different from the acceptor's label on MLP and MAC-Exempt 973439b3deaSKacheong Poon * sockets. If this is the case, the required security label 974439b3deaSKacheong Poon * already exists in econnp->conn_ixa->ixa_tsl. Since we make the 975439b3deaSKacheong Poon * acceptor stream refer to econnp we atomatically get that label. 976439b3deaSKacheong Poon */ 977439b3deaSKacheong Poon 978439b3deaSKacheong Poon acceptor->tcp_detached = B_TRUE; 979439b3deaSKacheong Poon /* 980439b3deaSKacheong Poon * To permit stream re-use by TLI/XTI, the eager needs a copy of 981439b3deaSKacheong Poon * the acceptor id. 982439b3deaSKacheong Poon */ 983439b3deaSKacheong Poon eager->tcp_acceptor_id = acceptor->tcp_acceptor_id; 984439b3deaSKacheong Poon 985439b3deaSKacheong Poon /* remove eager from listen list... */ 986439b3deaSKacheong Poon mutex_enter(&listener->tcp_eager_lock); 987439b3deaSKacheong Poon tcp_eager_unlink(eager); 988439b3deaSKacheong Poon ASSERT(eager->tcp_eager_next_q == NULL && 989439b3deaSKacheong Poon eager->tcp_eager_last_q == NULL); 990439b3deaSKacheong Poon ASSERT(eager->tcp_eager_next_q0 == NULL && 991439b3deaSKacheong Poon eager->tcp_eager_prev_q0 == NULL); 992439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 993439b3deaSKacheong Poon 994439b3deaSKacheong Poon econnp = eager->tcp_connp; 995439b3deaSKacheong Poon aconnp = acceptor->tcp_connp; 996439b3deaSKacheong Poon econnp->conn_rq = aconnp->conn_rq; 997439b3deaSKacheong Poon econnp->conn_wq = aconnp->conn_wq; 998439b3deaSKacheong Poon econnp->conn_rq->q_ptr = econnp; 999439b3deaSKacheong Poon econnp->conn_wq->q_ptr = econnp; 1000439b3deaSKacheong Poon 1001439b3deaSKacheong Poon /* 1002439b3deaSKacheong Poon * In the TLI/XTI loopback case, we are inside the listener's squeue, 1003439b3deaSKacheong Poon * which might be a different squeue from our peer TCP instance. 1004439b3deaSKacheong Poon * For TCP Fusion, the peer expects that whenever tcp_detached is 1005439b3deaSKacheong Poon * clear, our TCP queues point to the acceptor's queues. Thus, use 1006439b3deaSKacheong Poon * membar_producer() to ensure that the assignments of conn_rq/conn_wq 1007439b3deaSKacheong Poon * above reach global visibility prior to the clearing of tcp_detached. 1008439b3deaSKacheong Poon */ 1009439b3deaSKacheong Poon membar_producer(); 1010439b3deaSKacheong Poon eager->tcp_detached = B_FALSE; 1011439b3deaSKacheong Poon 1012439b3deaSKacheong Poon ASSERT(eager->tcp_ack_tid == 0); 1013439b3deaSKacheong Poon 1014439b3deaSKacheong Poon econnp->conn_dev = aconnp->conn_dev; 1015439b3deaSKacheong Poon econnp->conn_minor_arena = aconnp->conn_minor_arena; 1016439b3deaSKacheong Poon 1017439b3deaSKacheong Poon ASSERT(econnp->conn_minor_arena != NULL); 1018439b3deaSKacheong Poon if (econnp->conn_cred != NULL) 1019439b3deaSKacheong Poon crfree(econnp->conn_cred); 1020439b3deaSKacheong Poon econnp->conn_cred = aconnp->conn_cred; 1021be4c8f74SErik Nordmark ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 1022439b3deaSKacheong Poon econnp->conn_ixa->ixa_cred = econnp->conn_cred; 1023439b3deaSKacheong Poon aconnp->conn_cred = NULL; 1024439b3deaSKacheong Poon econnp->conn_cpid = aconnp->conn_cpid; 1025439b3deaSKacheong Poon ASSERT(econnp->conn_netstack == aconnp->conn_netstack); 1026439b3deaSKacheong Poon ASSERT(eager->tcp_tcps == acceptor->tcp_tcps); 1027439b3deaSKacheong Poon 1028439b3deaSKacheong Poon econnp->conn_zoneid = aconnp->conn_zoneid; 1029439b3deaSKacheong Poon econnp->conn_allzones = aconnp->conn_allzones; 1030439b3deaSKacheong Poon econnp->conn_ixa->ixa_zoneid = aconnp->conn_ixa->ixa_zoneid; 1031439b3deaSKacheong Poon 1032439b3deaSKacheong Poon econnp->conn_mac_mode = aconnp->conn_mac_mode; 1033439b3deaSKacheong Poon econnp->conn_zone_is_global = aconnp->conn_zone_is_global; 1034439b3deaSKacheong Poon aconnp->conn_mac_mode = CONN_MAC_DEFAULT; 1035439b3deaSKacheong Poon 1036439b3deaSKacheong Poon /* Do the IPC initialization */ 1037439b3deaSKacheong Poon CONN_INC_REF(econnp); 1038439b3deaSKacheong Poon 1039439b3deaSKacheong Poon /* Done with old IPC. Drop its ref on its connp */ 1040439b3deaSKacheong Poon CONN_DEC_REF(aconnp); 1041439b3deaSKacheong Poon } 1042439b3deaSKacheong Poon 1043439b3deaSKacheong Poon /* 10443e95bd4aSAnders Persson * This runs at the tail end of accept processing on the squeue of the 10453e95bd4aSAnders Persson * new connection. 10463e95bd4aSAnders Persson */ 10473e95bd4aSAnders Persson /* ARGSUSED */ 10483e95bd4aSAnders Persson static void 10493e95bd4aSAnders Persson tcp_accept_finish(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) 10503e95bd4aSAnders Persson { 10513e95bd4aSAnders Persson conn_t *connp = (conn_t *)arg; 10523e95bd4aSAnders Persson tcp_t *tcp = connp->conn_tcp; 10533e95bd4aSAnders Persson queue_t *q = connp->conn_rq; 10543e95bd4aSAnders Persson tcp_stack_t *tcps = tcp->tcp_tcps; 10553e95bd4aSAnders Persson struct stroptions *stropt; 10563e95bd4aSAnders Persson struct sock_proto_props sopp; 10573e95bd4aSAnders Persson 10583e95bd4aSAnders Persson /* Should never be called for non-STREAMS sockets */ 10593e95bd4aSAnders Persson ASSERT(!IPCL_IS_NONSTR(connp)); 10603e95bd4aSAnders Persson 10613e95bd4aSAnders Persson /* We should just receive a single mblk that fits a T_discon_ind */ 10623e95bd4aSAnders Persson ASSERT(mp->b_cont == NULL); 10633e95bd4aSAnders Persson 10643e95bd4aSAnders Persson /* 10653e95bd4aSAnders Persson * Drop the eager's ref on the listener, that was placed when 10663e95bd4aSAnders Persson * this eager began life in tcp_input_listener. 10673e95bd4aSAnders Persson */ 10683e95bd4aSAnders Persson CONN_DEC_REF(tcp->tcp_saved_listener->tcp_connp); 10693e95bd4aSAnders Persson 10703e95bd4aSAnders Persson tcp->tcp_detached = B_FALSE; 10713e95bd4aSAnders Persson 10723e95bd4aSAnders Persson if (tcp->tcp_state <= TCPS_BOUND || tcp->tcp_accept_error) { 10733e95bd4aSAnders Persson /* 10743e95bd4aSAnders Persson * Someone blewoff the eager before we could finish 10753e95bd4aSAnders Persson * the accept. 10763e95bd4aSAnders Persson * 10773e95bd4aSAnders Persson * The only reason eager exists it because we put in 10783e95bd4aSAnders Persson * a ref on it when conn ind went up. We need to send 10793e95bd4aSAnders Persson * a disconnect indication up while the last reference 10803e95bd4aSAnders Persson * on the eager will be dropped by the squeue when we 10813e95bd4aSAnders Persson * return. 10823e95bd4aSAnders Persson */ 10833e95bd4aSAnders Persson ASSERT(tcp->tcp_listener == NULL); 10843e95bd4aSAnders Persson if (tcp->tcp_issocket || tcp->tcp_send_discon_ind) { 10853e95bd4aSAnders Persson struct T_discon_ind *tdi; 10863e95bd4aSAnders Persson 10873e95bd4aSAnders Persson (void) putnextctl1(q, M_FLUSH, FLUSHRW); 10883e95bd4aSAnders Persson /* 10893e95bd4aSAnders Persson * Let us reuse the incoming mblk to avoid 10903e95bd4aSAnders Persson * memory allocation failure problems. We know 10913e95bd4aSAnders Persson * that the size of the incoming mblk i.e. 10923e95bd4aSAnders Persson * stroptions is greater than sizeof 10933e95bd4aSAnders Persson * T_discon_ind. 10943e95bd4aSAnders Persson */ 10953e95bd4aSAnders Persson ASSERT(DB_REF(mp) == 1); 10963e95bd4aSAnders Persson ASSERT(MBLKSIZE(mp) >= 10973e95bd4aSAnders Persson sizeof (struct T_discon_ind)); 10983e95bd4aSAnders Persson 10993e95bd4aSAnders Persson DB_TYPE(mp) = M_PROTO; 11003e95bd4aSAnders Persson ((union T_primitives *)mp->b_rptr)->type = 11013e95bd4aSAnders Persson T_DISCON_IND; 11023e95bd4aSAnders Persson tdi = (struct T_discon_ind *)mp->b_rptr; 11033e95bd4aSAnders Persson if (tcp->tcp_issocket) { 11043e95bd4aSAnders Persson tdi->DISCON_reason = ECONNREFUSED; 11053e95bd4aSAnders Persson tdi->SEQ_number = 0; 11063e95bd4aSAnders Persson } else { 11073e95bd4aSAnders Persson tdi->DISCON_reason = ENOPROTOOPT; 11083e95bd4aSAnders Persson tdi->SEQ_number = 11093e95bd4aSAnders Persson tcp->tcp_conn_req_seqnum; 11103e95bd4aSAnders Persson } 11113e95bd4aSAnders Persson mp->b_wptr = mp->b_rptr + 11123e95bd4aSAnders Persson sizeof (struct T_discon_ind); 11133e95bd4aSAnders Persson putnext(q, mp); 11143e95bd4aSAnders Persson } 11153e95bd4aSAnders Persson tcp->tcp_hard_binding = B_FALSE; 11163e95bd4aSAnders Persson return; 11173e95bd4aSAnders Persson } 11183e95bd4aSAnders Persson 11193e95bd4aSAnders Persson /* 11203e95bd4aSAnders Persson * This is the first time we run on the correct 11213e95bd4aSAnders Persson * queue after tcp_accept. So fix all the q parameters 11223e95bd4aSAnders Persson * here. 11233e95bd4aSAnders Persson * 11243e95bd4aSAnders Persson * Let us reuse the incoming mblk to avoid 11253e95bd4aSAnders Persson * memory allocation failure problems. We know 11263e95bd4aSAnders Persson * that the size of the incoming mblk is at least 11273e95bd4aSAnders Persson * stroptions 11283e95bd4aSAnders Persson */ 11293e95bd4aSAnders Persson tcp_get_proto_props(tcp, &sopp); 11303e95bd4aSAnders Persson 11313e95bd4aSAnders Persson ASSERT(DB_REF(mp) == 1); 11323e95bd4aSAnders Persson ASSERT(MBLKSIZE(mp) >= sizeof (struct stroptions)); 11333e95bd4aSAnders Persson 11343e95bd4aSAnders Persson DB_TYPE(mp) = M_SETOPTS; 11353e95bd4aSAnders Persson stropt = (struct stroptions *)mp->b_rptr; 11363e95bd4aSAnders Persson mp->b_wptr = mp->b_rptr + sizeof (struct stroptions); 11373e95bd4aSAnders Persson stropt = (struct stroptions *)mp->b_rptr; 11383e95bd4aSAnders Persson ASSERT(sopp.sopp_flags & (SO_HIWAT|SO_WROFF|SO_MAXBLK)); 11393e95bd4aSAnders Persson stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK; 11403e95bd4aSAnders Persson stropt->so_hiwat = sopp.sopp_rxhiwat; 11413e95bd4aSAnders Persson stropt->so_wroff = sopp.sopp_wroff; 11423e95bd4aSAnders Persson stropt->so_maxblk = sopp.sopp_maxblk; 11433e95bd4aSAnders Persson 11443e95bd4aSAnders Persson /* Send the options up */ 11453e95bd4aSAnders Persson putnext(q, mp); 11463e95bd4aSAnders Persson 11473e95bd4aSAnders Persson /* 11483e95bd4aSAnders Persson * Pass up any data and/or a fin that has been received. 11493e95bd4aSAnders Persson * 11503e95bd4aSAnders Persson * Adjust receive window in case it had decreased 11513e95bd4aSAnders Persson * (because there is data <=> tcp_rcv_list != NULL) 11523e95bd4aSAnders Persson * while the connection was detached. Note that 11533e95bd4aSAnders Persson * in case the eager was flow-controlled, w/o this 11543e95bd4aSAnders Persson * code, the rwnd may never open up again! 11553e95bd4aSAnders Persson */ 11563e95bd4aSAnders Persson if (tcp->tcp_rcv_list != NULL) { 11573e95bd4aSAnders Persson /* We drain directly in case of fused tcp loopback */ 11583e95bd4aSAnders Persson 11593e95bd4aSAnders Persson if (!tcp->tcp_fused && canputnext(q)) { 11603e95bd4aSAnders Persson tcp->tcp_rwnd = connp->conn_rcvbuf; 11613e95bd4aSAnders Persson if (tcp->tcp_state >= TCPS_ESTABLISHED && 11623e95bd4aSAnders Persson tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) { 11633e95bd4aSAnders Persson tcp_xmit_ctl(NULL, 11643e95bd4aSAnders Persson tcp, (tcp->tcp_swnd == 0) ? 11653e95bd4aSAnders Persson tcp->tcp_suna : tcp->tcp_snxt, 11663e95bd4aSAnders Persson tcp->tcp_rnxt, TH_ACK); 11673e95bd4aSAnders Persson } 11683e95bd4aSAnders Persson } 11693e95bd4aSAnders Persson 11703e95bd4aSAnders Persson (void) tcp_rcv_drain(tcp); 11713e95bd4aSAnders Persson 11723e95bd4aSAnders Persson /* 11733e95bd4aSAnders Persson * For fused tcp loopback, back-enable peer endpoint 11743e95bd4aSAnders Persson * if it's currently flow-controlled. 11753e95bd4aSAnders Persson */ 11763e95bd4aSAnders Persson if (tcp->tcp_fused) { 11773e95bd4aSAnders Persson tcp_t *peer_tcp = tcp->tcp_loopback_peer; 11783e95bd4aSAnders Persson 11793e95bd4aSAnders Persson ASSERT(peer_tcp != NULL); 11803e95bd4aSAnders Persson ASSERT(peer_tcp->tcp_fused); 11813e95bd4aSAnders Persson 11823e95bd4aSAnders Persson mutex_enter(&peer_tcp->tcp_non_sq_lock); 11833e95bd4aSAnders Persson if (peer_tcp->tcp_flow_stopped) { 11843e95bd4aSAnders Persson tcp_clrqfull(peer_tcp); 11853e95bd4aSAnders Persson TCP_STAT(tcps, tcp_fusion_backenabled); 11863e95bd4aSAnders Persson } 11873e95bd4aSAnders Persson mutex_exit(&peer_tcp->tcp_non_sq_lock); 11883e95bd4aSAnders Persson } 11893e95bd4aSAnders Persson } 11903e95bd4aSAnders Persson ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg); 11913e95bd4aSAnders Persson if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) { 11923e95bd4aSAnders Persson tcp->tcp_ordrel_done = B_TRUE; 11933e95bd4aSAnders Persson mp = tcp->tcp_ordrel_mp; 11943e95bd4aSAnders Persson tcp->tcp_ordrel_mp = NULL; 11953e95bd4aSAnders Persson putnext(q, mp); 11963e95bd4aSAnders Persson } 11973e95bd4aSAnders Persson tcp->tcp_hard_binding = B_FALSE; 11983e95bd4aSAnders Persson 11993e95bd4aSAnders Persson if (connp->conn_keepalive) { 12003e95bd4aSAnders Persson tcp->tcp_ka_last_intrvl = 0; 12013e95bd4aSAnders Persson tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer, 12023e95bd4aSAnders Persson tcp->tcp_ka_interval); 12033e95bd4aSAnders Persson } 12043e95bd4aSAnders Persson 12053e95bd4aSAnders Persson /* 12063e95bd4aSAnders Persson * At this point, eager is fully established and will 12073e95bd4aSAnders Persson * have the following references - 12083e95bd4aSAnders Persson * 12093e95bd4aSAnders Persson * 2 references for connection to exist (1 for TCP and 1 for IP). 12103e95bd4aSAnders Persson * 1 reference for the squeue which will be dropped by the squeue as 12113e95bd4aSAnders Persson * soon as this function returns. 12123e95bd4aSAnders Persson * There will be 1 additonal reference for being in classifier 12133e95bd4aSAnders Persson * hash list provided something bad hasn't happened. 12143e95bd4aSAnders Persson */ 12153e95bd4aSAnders Persson ASSERT((connp->conn_fanout != NULL && connp->conn_ref >= 4) || 12163e95bd4aSAnders Persson (connp->conn_fanout == NULL && connp->conn_ref >= 3)); 12173e95bd4aSAnders Persson } 12183e95bd4aSAnders Persson 1219*dd49f125SAnders Persson /* 1220*dd49f125SAnders Persson * Pull a deferred connection indication off of the listener. The caller 1221*dd49f125SAnders Persson * must verify that there is a deferred conn ind under eager_lock before 1222*dd49f125SAnders Persson * calling this function. 1223*dd49f125SAnders Persson */ 1224*dd49f125SAnders Persson static mblk_t * 1225*dd49f125SAnders Persson tcp_get_def_conn_ind(tcp_t *listener) 1226*dd49f125SAnders Persson { 1227*dd49f125SAnders Persson tcp_t *tail; 1228*dd49f125SAnders Persson tcp_t *tcp; 1229*dd49f125SAnders Persson mblk_t *conn_ind; 1230*dd49f125SAnders Persson 1231*dd49f125SAnders Persson ASSERT(MUTEX_HELD(&listener->tcp_eager_lock)); 1232*dd49f125SAnders Persson ASSERT(listener->tcp_eager_prev_q0->tcp_conn_def_q0); 1233*dd49f125SAnders Persson 1234*dd49f125SAnders Persson tcp = listener->tcp_eager_prev_q0; 1235*dd49f125SAnders Persson /* 1236*dd49f125SAnders Persson * listener->tcp_eager_prev_q0 points to the TAIL of the 1237*dd49f125SAnders Persson * deferred T_conn_ind queue. We need to get to the head 1238*dd49f125SAnders Persson * of the queue in order to send up T_conn_ind the same 1239*dd49f125SAnders Persson * order as how the 3WHS is completed. 1240*dd49f125SAnders Persson */ 1241*dd49f125SAnders Persson while (tcp != listener) { 1242*dd49f125SAnders Persson if (!tcp->tcp_eager_prev_q0->tcp_conn_def_q0) 1243*dd49f125SAnders Persson break; 1244*dd49f125SAnders Persson else 1245*dd49f125SAnders Persson tcp = tcp->tcp_eager_prev_q0; 1246*dd49f125SAnders Persson } 1247*dd49f125SAnders Persson 1248*dd49f125SAnders Persson conn_ind = tcp->tcp_conn.tcp_eager_conn_ind; 1249*dd49f125SAnders Persson tcp->tcp_conn.tcp_eager_conn_ind = NULL; 1250*dd49f125SAnders Persson /* Move from q0 to q */ 1251*dd49f125SAnders Persson ASSERT(listener->tcp_conn_req_cnt_q0 > 0); 1252*dd49f125SAnders Persson listener->tcp_conn_req_cnt_q0--; 1253*dd49f125SAnders Persson listener->tcp_conn_req_cnt_q++; 1254*dd49f125SAnders Persson tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = 1255*dd49f125SAnders Persson tcp->tcp_eager_prev_q0; 1256*dd49f125SAnders Persson tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = 1257*dd49f125SAnders Persson tcp->tcp_eager_next_q0; 1258*dd49f125SAnders Persson tcp->tcp_eager_prev_q0 = NULL; 1259*dd49f125SAnders Persson tcp->tcp_eager_next_q0 = NULL; 1260*dd49f125SAnders Persson tcp->tcp_conn_def_q0 = B_FALSE; 1261*dd49f125SAnders Persson 1262*dd49f125SAnders Persson /* Make sure the tcp isn't in the list of droppables */ 1263*dd49f125SAnders Persson ASSERT(tcp->tcp_eager_next_drop_q0 == NULL && 1264*dd49f125SAnders Persson tcp->tcp_eager_prev_drop_q0 == NULL); 1265*dd49f125SAnders Persson 1266*dd49f125SAnders Persson /* 1267*dd49f125SAnders Persson * Insert at end of the queue because sockfs sends 1268*dd49f125SAnders Persson * down T_CONN_RES in chronological order. Leaving 1269*dd49f125SAnders Persson * the older conn indications at front of the queue 1270*dd49f125SAnders Persson * helps reducing search time. 1271*dd49f125SAnders Persson */ 1272*dd49f125SAnders Persson tail = listener->tcp_eager_last_q; 1273*dd49f125SAnders Persson if (tail != NULL) { 1274*dd49f125SAnders Persson tail->tcp_eager_next_q = tcp; 1275*dd49f125SAnders Persson } else { 1276*dd49f125SAnders Persson listener->tcp_eager_next_q = tcp; 1277*dd49f125SAnders Persson } 1278*dd49f125SAnders Persson listener->tcp_eager_last_q = tcp; 1279*dd49f125SAnders Persson tcp->tcp_eager_next_q = NULL; 1280*dd49f125SAnders Persson 1281*dd49f125SAnders Persson return (conn_ind); 1282*dd49f125SAnders Persson } 1283*dd49f125SAnders Persson 12843e95bd4aSAnders Persson 12853e95bd4aSAnders Persson /* 1286439b3deaSKacheong Poon * Reply to a clients T_CONN_RES TPI message. This function 1287439b3deaSKacheong Poon * is used only for TLI/XTI listener. Sockfs sends T_CONN_RES 1288439b3deaSKacheong Poon * on the acceptor STREAM and processed in tcp_accept_common(). 1289439b3deaSKacheong Poon * Read the block comment on top of tcp_input_listener(). 1290439b3deaSKacheong Poon */ 1291439b3deaSKacheong Poon void 1292439b3deaSKacheong Poon tcp_tli_accept(tcp_t *listener, mblk_t *mp) 1293439b3deaSKacheong Poon { 1294439b3deaSKacheong Poon tcp_t *acceptor; 1295439b3deaSKacheong Poon tcp_t *eager; 1296439b3deaSKacheong Poon struct T_conn_res *tcr; 1297439b3deaSKacheong Poon t_uscalar_t acceptor_id; 1298439b3deaSKacheong Poon t_scalar_t seqnum; 1299439b3deaSKacheong Poon mblk_t *discon_mp = NULL; 1300439b3deaSKacheong Poon mblk_t *ok_mp; 1301439b3deaSKacheong Poon mblk_t *mp1; 1302439b3deaSKacheong Poon tcp_stack_t *tcps = listener->tcp_tcps; 1303439b3deaSKacheong Poon conn_t *econnp; 1304439b3deaSKacheong Poon 1305439b3deaSKacheong Poon if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) { 1306439b3deaSKacheong Poon tcp_err_ack(listener, mp, TPROTO, 0); 1307439b3deaSKacheong Poon return; 1308439b3deaSKacheong Poon } 1309439b3deaSKacheong Poon tcr = (struct T_conn_res *)mp->b_rptr; 1310439b3deaSKacheong Poon 1311439b3deaSKacheong Poon /* 1312439b3deaSKacheong Poon * Under ILP32 the stream head points tcr->ACCEPTOR_id at the 1313439b3deaSKacheong Poon * read side queue of the streams device underneath us i.e. the 1314439b3deaSKacheong Poon * read side queue of 'ip'. Since we can't deference QUEUE_ptr we 1315439b3deaSKacheong Poon * look it up in the queue_hash. Under LP64 it sends down the 1316439b3deaSKacheong Poon * minor_t of the accepting endpoint. 1317439b3deaSKacheong Poon * 1318439b3deaSKacheong Poon * Once the acceptor/eager are modified (in tcp_accept_swap) the 1319439b3deaSKacheong Poon * fanout hash lock is held. 1320439b3deaSKacheong Poon * This prevents any thread from entering the acceptor queue from 1321439b3deaSKacheong Poon * below (since it has not been hard bound yet i.e. any inbound 1322439b3deaSKacheong Poon * packets will arrive on the listener conn_t and 1323439b3deaSKacheong Poon * go through the classifier). 1324439b3deaSKacheong Poon * The CONN_INC_REF will prevent the acceptor from closing. 1325439b3deaSKacheong Poon * 1326439b3deaSKacheong Poon * XXX It is still possible for a tli application to send down data 1327439b3deaSKacheong Poon * on the accepting stream while another thread calls t_accept. 1328439b3deaSKacheong Poon * This should not be a problem for well-behaved applications since 1329439b3deaSKacheong Poon * the T_OK_ACK is sent after the queue swapping is completed. 1330439b3deaSKacheong Poon * 1331439b3deaSKacheong Poon * If the accepting fd is the same as the listening fd, avoid 1332439b3deaSKacheong Poon * queue hash lookup since that will return an eager listener in a 1333439b3deaSKacheong Poon * already established state. 1334439b3deaSKacheong Poon */ 1335439b3deaSKacheong Poon acceptor_id = tcr->ACCEPTOR_id; 1336439b3deaSKacheong Poon mutex_enter(&listener->tcp_eager_lock); 1337439b3deaSKacheong Poon if (listener->tcp_acceptor_id == acceptor_id) { 1338439b3deaSKacheong Poon eager = listener->tcp_eager_next_q; 1339439b3deaSKacheong Poon /* only count how many T_CONN_INDs so don't count q0 */ 1340439b3deaSKacheong Poon if ((listener->tcp_conn_req_cnt_q != 1) || 1341439b3deaSKacheong Poon (eager->tcp_conn_req_seqnum != tcr->SEQ_number)) { 1342439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1343439b3deaSKacheong Poon tcp_err_ack(listener, mp, TBADF, 0); 1344439b3deaSKacheong Poon return; 1345439b3deaSKacheong Poon } 1346439b3deaSKacheong Poon if (listener->tcp_conn_req_cnt_q0 != 0) { 1347439b3deaSKacheong Poon /* Throw away all the eagers on q0. */ 1348439b3deaSKacheong Poon tcp_eager_cleanup(listener, 1); 1349439b3deaSKacheong Poon } 1350439b3deaSKacheong Poon if (listener->tcp_syn_defense) { 1351439b3deaSKacheong Poon listener->tcp_syn_defense = B_FALSE; 1352439b3deaSKacheong Poon if (listener->tcp_ip_addr_cache != NULL) { 1353439b3deaSKacheong Poon kmem_free(listener->tcp_ip_addr_cache, 1354439b3deaSKacheong Poon IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t)); 1355439b3deaSKacheong Poon listener->tcp_ip_addr_cache = NULL; 1356439b3deaSKacheong Poon } 1357439b3deaSKacheong Poon } 1358439b3deaSKacheong Poon /* 1359439b3deaSKacheong Poon * Transfer tcp_conn_req_max to the eager so that when 1360439b3deaSKacheong Poon * a disconnect occurs we can revert the endpoint to the 1361439b3deaSKacheong Poon * listen state. 1362439b3deaSKacheong Poon */ 1363439b3deaSKacheong Poon eager->tcp_conn_req_max = listener->tcp_conn_req_max; 1364439b3deaSKacheong Poon ASSERT(listener->tcp_conn_req_cnt_q0 == 0); 1365439b3deaSKacheong Poon /* 1366439b3deaSKacheong Poon * Get a reference on the acceptor just like the 1367439b3deaSKacheong Poon * tcp_acceptor_hash_lookup below. 1368439b3deaSKacheong Poon */ 1369439b3deaSKacheong Poon acceptor = listener; 1370439b3deaSKacheong Poon CONN_INC_REF(acceptor->tcp_connp); 1371439b3deaSKacheong Poon } else { 1372439b3deaSKacheong Poon acceptor = tcp_acceptor_hash_lookup(acceptor_id, tcps); 1373439b3deaSKacheong Poon if (acceptor == NULL) { 1374439b3deaSKacheong Poon if (listener->tcp_connp->conn_debug) { 1375439b3deaSKacheong Poon (void) strlog(TCP_MOD_ID, 0, 1, 1376439b3deaSKacheong Poon SL_ERROR|SL_TRACE, 1377439b3deaSKacheong Poon "tcp_accept: did not find acceptor 0x%x\n", 1378439b3deaSKacheong Poon acceptor_id); 1379439b3deaSKacheong Poon } 1380439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1381439b3deaSKacheong Poon tcp_err_ack(listener, mp, TPROVMISMATCH, 0); 1382439b3deaSKacheong Poon return; 1383439b3deaSKacheong Poon } 1384439b3deaSKacheong Poon /* 1385439b3deaSKacheong Poon * Verify acceptor state. The acceptable states for an acceptor 1386439b3deaSKacheong Poon * include TCPS_IDLE and TCPS_BOUND. 1387439b3deaSKacheong Poon */ 1388439b3deaSKacheong Poon switch (acceptor->tcp_state) { 1389439b3deaSKacheong Poon case TCPS_IDLE: 1390439b3deaSKacheong Poon /* FALLTHRU */ 1391439b3deaSKacheong Poon case TCPS_BOUND: 1392439b3deaSKacheong Poon break; 1393439b3deaSKacheong Poon default: 1394439b3deaSKacheong Poon CONN_DEC_REF(acceptor->tcp_connp); 1395439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1396439b3deaSKacheong Poon tcp_err_ack(listener, mp, TOUTSTATE, 0); 1397439b3deaSKacheong Poon return; 1398439b3deaSKacheong Poon } 1399439b3deaSKacheong Poon } 1400439b3deaSKacheong Poon 1401439b3deaSKacheong Poon /* The listener must be in TCPS_LISTEN */ 1402439b3deaSKacheong Poon if (listener->tcp_state != TCPS_LISTEN) { 1403439b3deaSKacheong Poon CONN_DEC_REF(acceptor->tcp_connp); 1404439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1405439b3deaSKacheong Poon tcp_err_ack(listener, mp, TOUTSTATE, 0); 1406439b3deaSKacheong Poon return; 1407439b3deaSKacheong Poon } 1408439b3deaSKacheong Poon 1409439b3deaSKacheong Poon /* 1410439b3deaSKacheong Poon * Rendezvous with an eager connection request packet hanging off 1411439b3deaSKacheong Poon * 'tcp' that has the 'seqnum' tag. We tagged the detached open 1412439b3deaSKacheong Poon * tcp structure when the connection packet arrived in 1413439b3deaSKacheong Poon * tcp_input_listener(). 1414439b3deaSKacheong Poon */ 1415439b3deaSKacheong Poon seqnum = tcr->SEQ_number; 1416439b3deaSKacheong Poon eager = listener; 1417439b3deaSKacheong Poon do { 1418439b3deaSKacheong Poon eager = eager->tcp_eager_next_q; 1419439b3deaSKacheong Poon if (eager == NULL) { 1420439b3deaSKacheong Poon CONN_DEC_REF(acceptor->tcp_connp); 1421439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1422439b3deaSKacheong Poon tcp_err_ack(listener, mp, TBADSEQ, 0); 1423439b3deaSKacheong Poon return; 1424439b3deaSKacheong Poon } 1425439b3deaSKacheong Poon } while (eager->tcp_conn_req_seqnum != seqnum); 1426439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1427439b3deaSKacheong Poon 1428439b3deaSKacheong Poon /* 1429439b3deaSKacheong Poon * At this point, both acceptor and listener have 2 ref 1430439b3deaSKacheong Poon * that they begin with. Acceptor has one additional ref 1431439b3deaSKacheong Poon * we placed in lookup while listener has 3 additional 1432439b3deaSKacheong Poon * ref for being behind the squeue (tcp_accept() is 1433439b3deaSKacheong Poon * done on listener's squeue); being in classifier hash; 1434439b3deaSKacheong Poon * and eager's ref on listener. 1435439b3deaSKacheong Poon */ 1436439b3deaSKacheong Poon ASSERT(listener->tcp_connp->conn_ref >= 5); 1437439b3deaSKacheong Poon ASSERT(acceptor->tcp_connp->conn_ref >= 3); 1438439b3deaSKacheong Poon 1439439b3deaSKacheong Poon /* 1440439b3deaSKacheong Poon * The eager at this point is set in its own squeue and 1441439b3deaSKacheong Poon * could easily have been killed (tcp_accept_finish will 1442439b3deaSKacheong Poon * deal with that) because of a TH_RST so we can only 1443439b3deaSKacheong Poon * ASSERT for a single ref. 1444439b3deaSKacheong Poon */ 1445439b3deaSKacheong Poon ASSERT(eager->tcp_connp->conn_ref >= 1); 1446439b3deaSKacheong Poon 1447439b3deaSKacheong Poon /* 1448439b3deaSKacheong Poon * Pre allocate the discon_ind mblk also. tcp_accept_finish will 1449439b3deaSKacheong Poon * use it if something failed. 1450439b3deaSKacheong Poon */ 1451439b3deaSKacheong Poon discon_mp = allocb(MAX(sizeof (struct T_discon_ind), 1452439b3deaSKacheong Poon sizeof (struct stroptions)), BPRI_HI); 1453439b3deaSKacheong Poon if (discon_mp == NULL) { 1454439b3deaSKacheong Poon CONN_DEC_REF(acceptor->tcp_connp); 1455439b3deaSKacheong Poon CONN_DEC_REF(eager->tcp_connp); 1456439b3deaSKacheong Poon tcp_err_ack(listener, mp, TSYSERR, ENOMEM); 1457439b3deaSKacheong Poon return; 1458439b3deaSKacheong Poon } 1459439b3deaSKacheong Poon 1460439b3deaSKacheong Poon econnp = eager->tcp_connp; 1461439b3deaSKacheong Poon 1462439b3deaSKacheong Poon /* Hold a copy of mp, in case reallocb fails */ 1463439b3deaSKacheong Poon if ((mp1 = copymsg(mp)) == NULL) { 1464439b3deaSKacheong Poon CONN_DEC_REF(acceptor->tcp_connp); 1465439b3deaSKacheong Poon CONN_DEC_REF(eager->tcp_connp); 1466439b3deaSKacheong Poon freemsg(discon_mp); 1467439b3deaSKacheong Poon tcp_err_ack(listener, mp, TSYSERR, ENOMEM); 1468439b3deaSKacheong Poon return; 1469439b3deaSKacheong Poon } 1470439b3deaSKacheong Poon 1471439b3deaSKacheong Poon tcr = (struct T_conn_res *)mp1->b_rptr; 1472439b3deaSKacheong Poon 1473439b3deaSKacheong Poon /* 1474439b3deaSKacheong Poon * This is an expanded version of mi_tpi_ok_ack_alloc() 1475439b3deaSKacheong Poon * which allocates a larger mblk and appends the new 1476439b3deaSKacheong Poon * local address to the ok_ack. The address is copied by 1477439b3deaSKacheong Poon * soaccept() for getsockname(). 1478439b3deaSKacheong Poon */ 1479439b3deaSKacheong Poon { 1480439b3deaSKacheong Poon int extra; 1481439b3deaSKacheong Poon 1482439b3deaSKacheong Poon extra = (econnp->conn_family == AF_INET) ? 1483439b3deaSKacheong Poon sizeof (sin_t) : sizeof (sin6_t); 1484439b3deaSKacheong Poon 1485439b3deaSKacheong Poon /* 1486439b3deaSKacheong Poon * Try to re-use mp, if possible. Otherwise, allocate 1487439b3deaSKacheong Poon * an mblk and return it as ok_mp. In any case, mp 1488439b3deaSKacheong Poon * is no longer usable upon return. 1489439b3deaSKacheong Poon */ 1490439b3deaSKacheong Poon if ((ok_mp = mi_tpi_ok_ack_alloc_extra(mp, extra)) == NULL) { 1491439b3deaSKacheong Poon CONN_DEC_REF(acceptor->tcp_connp); 1492439b3deaSKacheong Poon CONN_DEC_REF(eager->tcp_connp); 1493439b3deaSKacheong Poon freemsg(discon_mp); 1494439b3deaSKacheong Poon /* Original mp has been freed by now, so use mp1 */ 1495439b3deaSKacheong Poon tcp_err_ack(listener, mp1, TSYSERR, ENOMEM); 1496439b3deaSKacheong Poon return; 1497439b3deaSKacheong Poon } 1498439b3deaSKacheong Poon 1499439b3deaSKacheong Poon mp = NULL; /* We should never use mp after this point */ 1500439b3deaSKacheong Poon 1501439b3deaSKacheong Poon switch (extra) { 1502439b3deaSKacheong Poon case sizeof (sin_t): { 1503439b3deaSKacheong Poon sin_t *sin = (sin_t *)ok_mp->b_wptr; 1504439b3deaSKacheong Poon 1505439b3deaSKacheong Poon ok_mp->b_wptr += extra; 1506439b3deaSKacheong Poon sin->sin_family = AF_INET; 1507439b3deaSKacheong Poon sin->sin_port = econnp->conn_lport; 1508439b3deaSKacheong Poon sin->sin_addr.s_addr = econnp->conn_laddr_v4; 1509439b3deaSKacheong Poon break; 1510439b3deaSKacheong Poon } 1511439b3deaSKacheong Poon case sizeof (sin6_t): { 1512439b3deaSKacheong Poon sin6_t *sin6 = (sin6_t *)ok_mp->b_wptr; 1513439b3deaSKacheong Poon 1514439b3deaSKacheong Poon ok_mp->b_wptr += extra; 1515439b3deaSKacheong Poon sin6->sin6_family = AF_INET6; 1516439b3deaSKacheong Poon sin6->sin6_port = econnp->conn_lport; 1517439b3deaSKacheong Poon sin6->sin6_addr = econnp->conn_laddr_v6; 1518439b3deaSKacheong Poon sin6->sin6_flowinfo = econnp->conn_flowinfo; 1519439b3deaSKacheong Poon if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) && 1520439b3deaSKacheong Poon (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 1521439b3deaSKacheong Poon sin6->sin6_scope_id = 1522439b3deaSKacheong Poon econnp->conn_ixa->ixa_scopeid; 1523439b3deaSKacheong Poon } else { 1524439b3deaSKacheong Poon sin6->sin6_scope_id = 0; 1525439b3deaSKacheong Poon } 1526439b3deaSKacheong Poon sin6->__sin6_src_id = 0; 1527439b3deaSKacheong Poon break; 1528439b3deaSKacheong Poon } 1529439b3deaSKacheong Poon default: 1530439b3deaSKacheong Poon break; 1531439b3deaSKacheong Poon } 1532439b3deaSKacheong Poon ASSERT(ok_mp->b_wptr <= ok_mp->b_datap->db_lim); 1533439b3deaSKacheong Poon } 1534439b3deaSKacheong Poon 1535439b3deaSKacheong Poon /* 1536439b3deaSKacheong Poon * If there are no options we know that the T_CONN_RES will 1537439b3deaSKacheong Poon * succeed. However, we can't send the T_OK_ACK upstream until 1538439b3deaSKacheong Poon * the tcp_accept_swap is done since it would be dangerous to 1539439b3deaSKacheong Poon * let the application start using the new fd prior to the swap. 1540439b3deaSKacheong Poon */ 1541439b3deaSKacheong Poon tcp_accept_swap(listener, acceptor, eager); 1542439b3deaSKacheong Poon 1543439b3deaSKacheong Poon /* 1544439b3deaSKacheong Poon * tcp_accept_swap unlinks eager from listener but does not drop 1545439b3deaSKacheong Poon * the eager's reference on the listener. 1546439b3deaSKacheong Poon */ 1547439b3deaSKacheong Poon ASSERT(eager->tcp_listener == NULL); 1548439b3deaSKacheong Poon ASSERT(listener->tcp_connp->conn_ref >= 5); 1549439b3deaSKacheong Poon 1550439b3deaSKacheong Poon /* 1551439b3deaSKacheong Poon * The eager is now associated with its own queue. Insert in 1552439b3deaSKacheong Poon * the hash so that the connection can be reused for a future 1553439b3deaSKacheong Poon * T_CONN_RES. 1554439b3deaSKacheong Poon */ 1555439b3deaSKacheong Poon tcp_acceptor_hash_insert(acceptor_id, eager); 1556439b3deaSKacheong Poon 1557439b3deaSKacheong Poon /* 1558439b3deaSKacheong Poon * We now do the processing of options with T_CONN_RES. 1559439b3deaSKacheong Poon * We delay till now since we wanted to have queue to pass to 1560439b3deaSKacheong Poon * option processing routines that points back to the right 1561439b3deaSKacheong Poon * instance structure which does not happen until after 1562439b3deaSKacheong Poon * tcp_accept_swap(). 1563439b3deaSKacheong Poon * 1564439b3deaSKacheong Poon * Note: 1565439b3deaSKacheong Poon * The sanity of the logic here assumes that whatever options 1566439b3deaSKacheong Poon * are appropriate to inherit from listner=>eager are done 1567439b3deaSKacheong Poon * before this point, and whatever were to be overridden (or not) 1568439b3deaSKacheong Poon * in transfer logic from eager=>acceptor in tcp_accept_swap(). 1569439b3deaSKacheong Poon * [ Warning: acceptor endpoint can have T_OPTMGMT_REQ done to it 1570439b3deaSKacheong Poon * before its ACCEPTOR_id comes down in T_CONN_RES ] 1571439b3deaSKacheong Poon * This may not be true at this point in time but can be fixed 1572439b3deaSKacheong Poon * independently. This option processing code starts with 1573439b3deaSKacheong Poon * the instantiated acceptor instance and the final queue at 1574439b3deaSKacheong Poon * this point. 1575439b3deaSKacheong Poon */ 1576439b3deaSKacheong Poon 1577439b3deaSKacheong Poon if (tcr->OPT_length != 0) { 1578439b3deaSKacheong Poon /* Options to process */ 1579439b3deaSKacheong Poon int t_error = 0; 1580439b3deaSKacheong Poon int sys_error = 0; 1581439b3deaSKacheong Poon int do_disconnect = 0; 1582439b3deaSKacheong Poon 1583439b3deaSKacheong Poon if (tcp_conprim_opt_process(eager, mp1, 1584439b3deaSKacheong Poon &do_disconnect, &t_error, &sys_error) < 0) { 1585439b3deaSKacheong Poon eager->tcp_accept_error = 1; 1586439b3deaSKacheong Poon if (do_disconnect) { 1587439b3deaSKacheong Poon /* 1588439b3deaSKacheong Poon * An option failed which does not allow 1589439b3deaSKacheong Poon * connection to be accepted. 1590439b3deaSKacheong Poon * 1591439b3deaSKacheong Poon * We allow T_CONN_RES to succeed and 1592439b3deaSKacheong Poon * put a T_DISCON_IND on the eager queue. 1593439b3deaSKacheong Poon */ 1594439b3deaSKacheong Poon ASSERT(t_error == 0 && sys_error == 0); 1595439b3deaSKacheong Poon eager->tcp_send_discon_ind = 1; 1596439b3deaSKacheong Poon } else { 1597439b3deaSKacheong Poon ASSERT(t_error != 0); 1598439b3deaSKacheong Poon freemsg(ok_mp); 1599439b3deaSKacheong Poon /* 1600439b3deaSKacheong Poon * Original mp was either freed or set 1601439b3deaSKacheong Poon * to ok_mp above, so use mp1 instead. 1602439b3deaSKacheong Poon */ 1603439b3deaSKacheong Poon tcp_err_ack(listener, mp1, t_error, sys_error); 1604439b3deaSKacheong Poon goto finish; 1605439b3deaSKacheong Poon } 1606439b3deaSKacheong Poon } 1607439b3deaSKacheong Poon /* 1608439b3deaSKacheong Poon * Most likely success in setting options (except if 1609439b3deaSKacheong Poon * eager->tcp_send_discon_ind set). 1610439b3deaSKacheong Poon * mp1 option buffer represented by OPT_length/offset 1611439b3deaSKacheong Poon * potentially modified and contains results of setting 1612439b3deaSKacheong Poon * options at this point 1613439b3deaSKacheong Poon */ 1614439b3deaSKacheong Poon } 1615439b3deaSKacheong Poon 1616439b3deaSKacheong Poon /* We no longer need mp1, since all options processing has passed */ 1617439b3deaSKacheong Poon freemsg(mp1); 1618439b3deaSKacheong Poon 1619439b3deaSKacheong Poon putnext(listener->tcp_connp->conn_rq, ok_mp); 1620439b3deaSKacheong Poon 1621439b3deaSKacheong Poon mutex_enter(&listener->tcp_eager_lock); 1622439b3deaSKacheong Poon if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) { 1623439b3deaSKacheong Poon mblk_t *conn_ind; 1624439b3deaSKacheong Poon 1625439b3deaSKacheong Poon /* 1626439b3deaSKacheong Poon * This path should not be executed if listener and 1627439b3deaSKacheong Poon * acceptor streams are the same. 1628439b3deaSKacheong Poon */ 1629439b3deaSKacheong Poon ASSERT(listener != acceptor); 1630*dd49f125SAnders Persson conn_ind = tcp_get_def_conn_ind(listener); 1631439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1632*dd49f125SAnders Persson putnext(listener->tcp_connp->conn_rq, conn_ind); 1633439b3deaSKacheong Poon } else { 1634439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 1635439b3deaSKacheong Poon } 1636439b3deaSKacheong Poon 1637439b3deaSKacheong Poon /* 1638439b3deaSKacheong Poon * Done with the acceptor - free it 1639439b3deaSKacheong Poon * 1640439b3deaSKacheong Poon * Note: from this point on, no access to listener should be made 1641439b3deaSKacheong Poon * as listener can be equal to acceptor. 1642439b3deaSKacheong Poon */ 1643439b3deaSKacheong Poon finish: 1644439b3deaSKacheong Poon ASSERT(acceptor->tcp_detached); 1645439b3deaSKacheong Poon acceptor->tcp_connp->conn_rq = NULL; 1646439b3deaSKacheong Poon ASSERT(!IPCL_IS_NONSTR(acceptor->tcp_connp)); 1647439b3deaSKacheong Poon acceptor->tcp_connp->conn_wq = NULL; 1648439b3deaSKacheong Poon (void) tcp_clean_death(acceptor, 0); 1649439b3deaSKacheong Poon CONN_DEC_REF(acceptor->tcp_connp); 1650439b3deaSKacheong Poon 1651439b3deaSKacheong Poon /* 1652439b3deaSKacheong Poon * We pass discon_mp to tcp_accept_finish to get on the right squeue. 1653439b3deaSKacheong Poon * 1654439b3deaSKacheong Poon * It will update the setting for sockfs/stream head and also take 1655439b3deaSKacheong Poon * care of any data that arrived before accept() wad called. 1656439b3deaSKacheong Poon * In case we already received a FIN then tcp_accept_finish will send up 1657439b3deaSKacheong Poon * the ordrel. It will also send up a window update if the window 1658439b3deaSKacheong Poon * has opened up. 1659439b3deaSKacheong Poon */ 1660439b3deaSKacheong Poon 1661439b3deaSKacheong Poon /* 1662439b3deaSKacheong Poon * XXX: we currently have a problem if XTI application closes the 1663439b3deaSKacheong Poon * acceptor stream in between. This problem exists in on10-gate also 1664439b3deaSKacheong Poon * and is well know but nothing can be done short of major rewrite 1665439b3deaSKacheong Poon * to fix it. Now it is possible to take care of it by assigning TLI/XTI 1666439b3deaSKacheong Poon * eager same squeue as listener (we can distinguish non socket 1667439b3deaSKacheong Poon * listeners at the time of handling a SYN in tcp_input_listener) 1668439b3deaSKacheong Poon * and do most of the work that tcp_accept_finish does here itself 1669439b3deaSKacheong Poon * and then get behind the acceptor squeue to access the acceptor 1670439b3deaSKacheong Poon * queue. 1671439b3deaSKacheong Poon */ 1672439b3deaSKacheong Poon /* 1673439b3deaSKacheong Poon * We already have a ref on tcp so no need to do one before squeue_enter 1674439b3deaSKacheong Poon */ 1675439b3deaSKacheong Poon SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, discon_mp, 1676439b3deaSKacheong Poon tcp_accept_finish, eager->tcp_connp, NULL, SQ_FILL, 1677439b3deaSKacheong Poon SQTAG_TCP_ACCEPT_FINISH); 1678439b3deaSKacheong Poon } 1679439b3deaSKacheong Poon 1680439b3deaSKacheong Poon 1681439b3deaSKacheong Poon /* 1682439b3deaSKacheong Poon * This is the STREAMS entry point for T_CONN_RES coming down on 1683439b3deaSKacheong Poon * Acceptor STREAM when sockfs listener does accept processing. 1684439b3deaSKacheong Poon * Read the block comment on top of tcp_input_listener(). 1685439b3deaSKacheong Poon */ 1686439b3deaSKacheong Poon void 1687439b3deaSKacheong Poon tcp_tpi_accept(queue_t *q, mblk_t *mp) 1688439b3deaSKacheong Poon { 1689439b3deaSKacheong Poon queue_t *rq = RD(q); 1690439b3deaSKacheong Poon struct T_conn_res *conn_res; 1691439b3deaSKacheong Poon tcp_t *eager; 1692439b3deaSKacheong Poon tcp_t *listener; 1693439b3deaSKacheong Poon struct T_ok_ack *ok; 1694439b3deaSKacheong Poon t_scalar_t PRIM_type; 16953e95bd4aSAnders Persson mblk_t *discon_mp; 1696439b3deaSKacheong Poon conn_t *econnp; 1697439b3deaSKacheong Poon cred_t *cr; 1698439b3deaSKacheong Poon 1699439b3deaSKacheong Poon ASSERT(DB_TYPE(mp) == M_PROTO); 1700439b3deaSKacheong Poon 1701439b3deaSKacheong Poon /* 1702439b3deaSKacheong Poon * All Solaris components should pass a db_credp 1703439b3deaSKacheong Poon * for this TPI message, hence we ASSERT. 1704439b3deaSKacheong Poon * But in case there is some other M_PROTO that looks 1705439b3deaSKacheong Poon * like a TPI message sent by some other kernel 1706439b3deaSKacheong Poon * component, we check and return an error. 1707439b3deaSKacheong Poon */ 1708439b3deaSKacheong Poon cr = msg_getcred(mp, NULL); 1709439b3deaSKacheong Poon ASSERT(cr != NULL); 1710439b3deaSKacheong Poon if (cr == NULL) { 1711439b3deaSKacheong Poon mp = mi_tpi_err_ack_alloc(mp, TSYSERR, EINVAL); 1712439b3deaSKacheong Poon if (mp != NULL) 1713439b3deaSKacheong Poon putnext(rq, mp); 1714439b3deaSKacheong Poon return; 1715439b3deaSKacheong Poon } 1716439b3deaSKacheong Poon conn_res = (struct T_conn_res *)mp->b_rptr; 1717439b3deaSKacheong Poon ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX); 1718439b3deaSKacheong Poon if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_res)) { 1719439b3deaSKacheong Poon mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0); 1720439b3deaSKacheong Poon if (mp != NULL) 1721439b3deaSKacheong Poon putnext(rq, mp); 1722439b3deaSKacheong Poon return; 1723439b3deaSKacheong Poon } 1724439b3deaSKacheong Poon switch (conn_res->PRIM_type) { 1725439b3deaSKacheong Poon case O_T_CONN_RES: 1726439b3deaSKacheong Poon case T_CONN_RES: 1727439b3deaSKacheong Poon /* 1728439b3deaSKacheong Poon * We pass up an err ack if allocb fails. This will 1729439b3deaSKacheong Poon * cause sockfs to issue a T_DISCON_REQ which will cause 1730439b3deaSKacheong Poon * tcp_eager_blowoff to be called. sockfs will then call 1731439b3deaSKacheong Poon * rq->q_qinfo->qi_qclose to cleanup the acceptor stream. 1732439b3deaSKacheong Poon * we need to do the allocb up here because we have to 1733439b3deaSKacheong Poon * make sure rq->q_qinfo->qi_qclose still points to the 1734439b3deaSKacheong Poon * correct function (tcp_tpi_close_accept) in case allocb 1735439b3deaSKacheong Poon * fails. 1736439b3deaSKacheong Poon */ 1737439b3deaSKacheong Poon bcopy(mp->b_rptr + conn_res->OPT_offset, 1738439b3deaSKacheong Poon &eager, conn_res->OPT_length); 1739439b3deaSKacheong Poon PRIM_type = conn_res->PRIM_type; 1740439b3deaSKacheong Poon mp->b_datap->db_type = M_PCPROTO; 1741439b3deaSKacheong Poon mp->b_wptr = mp->b_rptr + sizeof (struct T_ok_ack); 1742439b3deaSKacheong Poon ok = (struct T_ok_ack *)mp->b_rptr; 1743439b3deaSKacheong Poon ok->PRIM_type = T_OK_ACK; 1744439b3deaSKacheong Poon ok->CORRECT_prim = PRIM_type; 1745439b3deaSKacheong Poon econnp = eager->tcp_connp; 1746439b3deaSKacheong Poon econnp->conn_dev = (dev_t)RD(q)->q_ptr; 1747439b3deaSKacheong Poon econnp->conn_minor_arena = (vmem_t *)(WR(q)->q_ptr); 1748439b3deaSKacheong Poon econnp->conn_rq = rq; 1749439b3deaSKacheong Poon econnp->conn_wq = q; 1750439b3deaSKacheong Poon rq->q_ptr = econnp; 1751439b3deaSKacheong Poon rq->q_qinfo = &tcp_rinitv4; /* No open - same as rinitv6 */ 1752439b3deaSKacheong Poon q->q_ptr = econnp; 1753439b3deaSKacheong Poon q->q_qinfo = &tcp_winit; 1754439b3deaSKacheong Poon listener = eager->tcp_listener; 1755439b3deaSKacheong Poon 17563e95bd4aSAnders Persson /* 17573e95bd4aSAnders Persson * Pre allocate the discon_ind mblk also. tcp_accept_finish will 17583e95bd4aSAnders Persson * use it if something failed. 17593e95bd4aSAnders Persson */ 17603e95bd4aSAnders Persson discon_mp = allocb(MAX(sizeof (struct T_discon_ind), 17613e95bd4aSAnders Persson sizeof (struct stroptions)), BPRI_HI); 17623e95bd4aSAnders Persson 17633e95bd4aSAnders Persson if (discon_mp == NULL) { 1764439b3deaSKacheong Poon mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0); 1765439b3deaSKacheong Poon if (mp != NULL) 1766439b3deaSKacheong Poon putnext(rq, mp); 1767439b3deaSKacheong Poon return; 1768439b3deaSKacheong Poon } 1769439b3deaSKacheong Poon 17703e95bd4aSAnders Persson eager->tcp_issocket = B_TRUE; 17713e95bd4aSAnders Persson 17723e95bd4aSAnders Persson ASSERT(econnp->conn_netstack == 17733e95bd4aSAnders Persson listener->tcp_connp->conn_netstack); 17743e95bd4aSAnders Persson ASSERT(eager->tcp_tcps == listener->tcp_tcps); 17753e95bd4aSAnders Persson 17763e95bd4aSAnders Persson /* Put the ref for IP */ 17773e95bd4aSAnders Persson CONN_INC_REF(econnp); 17783e95bd4aSAnders Persson 17793e95bd4aSAnders Persson /* 17803e95bd4aSAnders Persson * We should have minimum of 3 references on the conn 17813e95bd4aSAnders Persson * at this point. One each for TCP and IP and one for 17823e95bd4aSAnders Persson * the T_conn_ind that was sent up when the 3-way handshake 17833e95bd4aSAnders Persson * completed. In the normal case we would also have another 17843e95bd4aSAnders Persson * reference (making a total of 4) for the conn being in the 17853e95bd4aSAnders Persson * classifier hash list. However the eager could have received 17863e95bd4aSAnders Persson * an RST subsequently and tcp_closei_local could have removed 17873e95bd4aSAnders Persson * the eager from the classifier hash list, hence we can't 17883e95bd4aSAnders Persson * assert that reference. 17893e95bd4aSAnders Persson */ 17903e95bd4aSAnders Persson ASSERT(econnp->conn_ref >= 3); 17913e95bd4aSAnders Persson 17923e95bd4aSAnders Persson mutex_enter(&listener->tcp_eager_lock); 17933e95bd4aSAnders Persson if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) { 1794*dd49f125SAnders Persson mblk_t *conn_ind = tcp_get_def_conn_ind(listener); 17953e95bd4aSAnders Persson 17963e95bd4aSAnders Persson /* Need to get inside the listener perimeter */ 17973e95bd4aSAnders Persson CONN_INC_REF(listener->tcp_connp); 1798*dd49f125SAnders Persson SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp, 1799*dd49f125SAnders Persson conn_ind, tcp_send_pending, listener->tcp_connp, 1800*dd49f125SAnders Persson NULL, SQ_FILL, SQTAG_TCP_SEND_PENDING); 18013e95bd4aSAnders Persson } 18023e95bd4aSAnders Persson tcp_eager_unlink(eager); 18033e95bd4aSAnders Persson mutex_exit(&listener->tcp_eager_lock); 18043e95bd4aSAnders Persson 18053e95bd4aSAnders Persson /* 18063e95bd4aSAnders Persson * At this point, the eager is detached from the listener 18073e95bd4aSAnders Persson * but we still have an extra refs on eager (apart from the 18083e95bd4aSAnders Persson * usual tcp references). The ref was placed in tcp_input_data 18093e95bd4aSAnders Persson * before sending the conn_ind in tcp_send_conn_ind. 18103e95bd4aSAnders Persson * The ref will be dropped in tcp_accept_finish(). 18113e95bd4aSAnders Persson */ 18123e95bd4aSAnders Persson SQUEUE_ENTER_ONE(econnp->conn_sqp, discon_mp, tcp_accept_finish, 18133e95bd4aSAnders Persson econnp, NULL, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0); 18143e95bd4aSAnders Persson 1815439b3deaSKacheong Poon /* 1816439b3deaSKacheong Poon * Send the new local address also up to sockfs. There 1817439b3deaSKacheong Poon * should already be enough space in the mp that came 1818439b3deaSKacheong Poon * down from soaccept(). 1819439b3deaSKacheong Poon */ 1820439b3deaSKacheong Poon if (econnp->conn_family == AF_INET) { 1821439b3deaSKacheong Poon sin_t *sin; 1822439b3deaSKacheong Poon 1823439b3deaSKacheong Poon ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >= 1824439b3deaSKacheong Poon (sizeof (struct T_ok_ack) + sizeof (sin_t))); 1825439b3deaSKacheong Poon sin = (sin_t *)mp->b_wptr; 1826439b3deaSKacheong Poon mp->b_wptr += sizeof (sin_t); 1827439b3deaSKacheong Poon sin->sin_family = AF_INET; 1828439b3deaSKacheong Poon sin->sin_port = econnp->conn_lport; 1829439b3deaSKacheong Poon sin->sin_addr.s_addr = econnp->conn_laddr_v4; 1830439b3deaSKacheong Poon } else { 1831439b3deaSKacheong Poon sin6_t *sin6; 1832439b3deaSKacheong Poon 1833439b3deaSKacheong Poon ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >= 1834439b3deaSKacheong Poon sizeof (struct T_ok_ack) + sizeof (sin6_t)); 1835439b3deaSKacheong Poon sin6 = (sin6_t *)mp->b_wptr; 1836439b3deaSKacheong Poon mp->b_wptr += sizeof (sin6_t); 1837439b3deaSKacheong Poon sin6->sin6_family = AF_INET6; 1838439b3deaSKacheong Poon sin6->sin6_port = econnp->conn_lport; 1839439b3deaSKacheong Poon sin6->sin6_addr = econnp->conn_laddr_v6; 1840439b3deaSKacheong Poon if (econnp->conn_ipversion == IPV4_VERSION) 1841439b3deaSKacheong Poon sin6->sin6_flowinfo = 0; 1842439b3deaSKacheong Poon else 1843439b3deaSKacheong Poon sin6->sin6_flowinfo = econnp->conn_flowinfo; 1844439b3deaSKacheong Poon if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) && 1845439b3deaSKacheong Poon (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 1846439b3deaSKacheong Poon sin6->sin6_scope_id = 1847439b3deaSKacheong Poon econnp->conn_ixa->ixa_scopeid; 1848439b3deaSKacheong Poon } else { 1849439b3deaSKacheong Poon sin6->sin6_scope_id = 0; 1850439b3deaSKacheong Poon } 1851439b3deaSKacheong Poon sin6->__sin6_src_id = 0; 1852439b3deaSKacheong Poon } 1853439b3deaSKacheong Poon 1854439b3deaSKacheong Poon putnext(rq, mp); 1855439b3deaSKacheong Poon return; 1856439b3deaSKacheong Poon default: 1857439b3deaSKacheong Poon mp = mi_tpi_err_ack_alloc(mp, TNOTSUPPORT, 0); 1858439b3deaSKacheong Poon if (mp != NULL) 1859439b3deaSKacheong Poon putnext(rq, mp); 1860439b3deaSKacheong Poon return; 1861439b3deaSKacheong Poon } 1862439b3deaSKacheong Poon } 1863439b3deaSKacheong Poon 1864439b3deaSKacheong Poon /* 1865439b3deaSKacheong Poon * The function called through squeue to get behind listener's perimeter to 1866439b3deaSKacheong Poon * send a deferred conn_ind. 1867439b3deaSKacheong Poon */ 1868439b3deaSKacheong Poon /* ARGSUSED */ 1869439b3deaSKacheong Poon void 1870439b3deaSKacheong Poon tcp_send_pending(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) 1871439b3deaSKacheong Poon { 1872439b3deaSKacheong Poon conn_t *lconnp = (conn_t *)arg; 1873439b3deaSKacheong Poon tcp_t *listener = lconnp->conn_tcp; 1874439b3deaSKacheong Poon struct T_conn_ind *conn_ind; 1875439b3deaSKacheong Poon tcp_t *tcp; 1876439b3deaSKacheong Poon 1877439b3deaSKacheong Poon conn_ind = (struct T_conn_ind *)mp->b_rptr; 1878439b3deaSKacheong Poon bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp, 1879439b3deaSKacheong Poon conn_ind->OPT_length); 1880439b3deaSKacheong Poon 1881439b3deaSKacheong Poon if (listener->tcp_state != TCPS_LISTEN) { 1882439b3deaSKacheong Poon /* 1883439b3deaSKacheong Poon * If listener has closed, it would have caused a 1884439b3deaSKacheong Poon * a cleanup/blowoff to happen for the eager, so 1885439b3deaSKacheong Poon * we don't need to do anything more. 1886439b3deaSKacheong Poon */ 1887439b3deaSKacheong Poon freemsg(mp); 1888439b3deaSKacheong Poon return; 1889439b3deaSKacheong Poon } 1890439b3deaSKacheong Poon 18913e95bd4aSAnders Persson putnext(lconnp->conn_rq, mp); 1892439b3deaSKacheong Poon } 1893439b3deaSKacheong Poon 1894439b3deaSKacheong Poon /* 1895439b3deaSKacheong Poon * Sends the T_CONN_IND to the listener. The caller calls this 1896439b3deaSKacheong Poon * functions via squeue to get inside the listener's perimeter 1897439b3deaSKacheong Poon * once the 3 way hand shake is done a T_CONN_IND needs to be 1898439b3deaSKacheong Poon * sent. As an optimization, the caller can call this directly 1899439b3deaSKacheong Poon * if listener's perimeter is same as eager's. 1900439b3deaSKacheong Poon */ 1901439b3deaSKacheong Poon /* ARGSUSED */ 1902439b3deaSKacheong Poon void 1903439b3deaSKacheong Poon tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2) 1904439b3deaSKacheong Poon { 1905439b3deaSKacheong Poon conn_t *lconnp = (conn_t *)arg; 1906439b3deaSKacheong Poon tcp_t *listener = lconnp->conn_tcp; 1907439b3deaSKacheong Poon tcp_t *tcp; 1908439b3deaSKacheong Poon struct T_conn_ind *conn_ind; 1909439b3deaSKacheong Poon ipaddr_t *addr_cache; 1910439b3deaSKacheong Poon boolean_t need_send_conn_ind = B_FALSE; 1911439b3deaSKacheong Poon tcp_stack_t *tcps = listener->tcp_tcps; 1912439b3deaSKacheong Poon 1913439b3deaSKacheong Poon /* retrieve the eager */ 1914439b3deaSKacheong Poon conn_ind = (struct T_conn_ind *)mp->b_rptr; 1915439b3deaSKacheong Poon ASSERT(conn_ind->OPT_offset != 0 && 1916439b3deaSKacheong Poon conn_ind->OPT_length == sizeof (intptr_t)); 1917439b3deaSKacheong Poon bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp, 1918439b3deaSKacheong Poon conn_ind->OPT_length); 1919439b3deaSKacheong Poon 1920439b3deaSKacheong Poon /* 1921439b3deaSKacheong Poon * TLI/XTI applications will get confused by 1922439b3deaSKacheong Poon * sending eager as an option since it violates 1923439b3deaSKacheong Poon * the option semantics. So remove the eager as 1924439b3deaSKacheong Poon * option since TLI/XTI app doesn't need it anyway. 1925439b3deaSKacheong Poon */ 1926439b3deaSKacheong Poon if (!TCP_IS_SOCKET(listener)) { 1927439b3deaSKacheong Poon conn_ind->OPT_length = 0; 1928439b3deaSKacheong Poon conn_ind->OPT_offset = 0; 1929439b3deaSKacheong Poon } 1930439b3deaSKacheong Poon if (listener->tcp_state != TCPS_LISTEN) { 1931439b3deaSKacheong Poon /* 1932439b3deaSKacheong Poon * If listener has closed, it would have caused a 1933439b3deaSKacheong Poon * a cleanup/blowoff to happen for the eager. We 1934439b3deaSKacheong Poon * just need to return. 1935439b3deaSKacheong Poon */ 1936439b3deaSKacheong Poon freemsg(mp); 1937439b3deaSKacheong Poon return; 1938439b3deaSKacheong Poon } 1939439b3deaSKacheong Poon 1940439b3deaSKacheong Poon 1941439b3deaSKacheong Poon /* 1942439b3deaSKacheong Poon * if the conn_req_q is full defer passing up the 1943439b3deaSKacheong Poon * T_CONN_IND until space is availabe after t_accept() 1944439b3deaSKacheong Poon * processing 1945439b3deaSKacheong Poon */ 1946439b3deaSKacheong Poon mutex_enter(&listener->tcp_eager_lock); 1947439b3deaSKacheong Poon 1948439b3deaSKacheong Poon /* 1949439b3deaSKacheong Poon * Take the eager out, if it is in the list of droppable eagers 1950439b3deaSKacheong Poon * as we are here because the 3W handshake is over. 1951439b3deaSKacheong Poon */ 1952439b3deaSKacheong Poon MAKE_UNDROPPABLE(tcp); 1953439b3deaSKacheong Poon 1954439b3deaSKacheong Poon if (listener->tcp_conn_req_cnt_q < listener->tcp_conn_req_max) { 1955439b3deaSKacheong Poon tcp_t *tail; 1956439b3deaSKacheong Poon 1957439b3deaSKacheong Poon /* 1958439b3deaSKacheong Poon * The eager already has an extra ref put in tcp_input_data 1959439b3deaSKacheong Poon * so that it stays till accept comes back even though it 1960439b3deaSKacheong Poon * might get into TCPS_CLOSED as a result of a TH_RST etc. 1961439b3deaSKacheong Poon */ 1962439b3deaSKacheong Poon ASSERT(listener->tcp_conn_req_cnt_q0 > 0); 1963439b3deaSKacheong Poon listener->tcp_conn_req_cnt_q0--; 1964439b3deaSKacheong Poon listener->tcp_conn_req_cnt_q++; 1965439b3deaSKacheong Poon 1966439b3deaSKacheong Poon /* Move from SYN_RCVD to ESTABLISHED list */ 1967439b3deaSKacheong Poon tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = 1968439b3deaSKacheong Poon tcp->tcp_eager_prev_q0; 1969439b3deaSKacheong Poon tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = 1970439b3deaSKacheong Poon tcp->tcp_eager_next_q0; 1971439b3deaSKacheong Poon tcp->tcp_eager_prev_q0 = NULL; 1972439b3deaSKacheong Poon tcp->tcp_eager_next_q0 = NULL; 1973439b3deaSKacheong Poon 1974439b3deaSKacheong Poon /* 1975439b3deaSKacheong Poon * Insert at end of the queue because sockfs 1976439b3deaSKacheong Poon * sends down T_CONN_RES in chronological 1977439b3deaSKacheong Poon * order. Leaving the older conn indications 1978439b3deaSKacheong Poon * at front of the queue helps reducing search 1979439b3deaSKacheong Poon * time. 1980439b3deaSKacheong Poon */ 1981439b3deaSKacheong Poon tail = listener->tcp_eager_last_q; 1982439b3deaSKacheong Poon if (tail != NULL) 1983439b3deaSKacheong Poon tail->tcp_eager_next_q = tcp; 1984439b3deaSKacheong Poon else 1985439b3deaSKacheong Poon listener->tcp_eager_next_q = tcp; 1986439b3deaSKacheong Poon listener->tcp_eager_last_q = tcp; 1987439b3deaSKacheong Poon tcp->tcp_eager_next_q = NULL; 1988439b3deaSKacheong Poon /* 1989439b3deaSKacheong Poon * Delay sending up the T_conn_ind until we are 1990439b3deaSKacheong Poon * done with the eager. Once we have have sent up 1991439b3deaSKacheong Poon * the T_conn_ind, the accept can potentially complete 1992439b3deaSKacheong Poon * any time and release the refhold we have on the eager. 1993439b3deaSKacheong Poon */ 1994439b3deaSKacheong Poon need_send_conn_ind = B_TRUE; 1995439b3deaSKacheong Poon } else { 1996439b3deaSKacheong Poon /* 1997439b3deaSKacheong Poon * Defer connection on q0 and set deferred 1998439b3deaSKacheong Poon * connection bit true 1999439b3deaSKacheong Poon */ 2000439b3deaSKacheong Poon tcp->tcp_conn_def_q0 = B_TRUE; 2001439b3deaSKacheong Poon 2002439b3deaSKacheong Poon /* take tcp out of q0 ... */ 2003439b3deaSKacheong Poon tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = 2004439b3deaSKacheong Poon tcp->tcp_eager_next_q0; 2005439b3deaSKacheong Poon tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = 2006439b3deaSKacheong Poon tcp->tcp_eager_prev_q0; 2007439b3deaSKacheong Poon 2008439b3deaSKacheong Poon /* ... and place it at the end of q0 */ 2009439b3deaSKacheong Poon tcp->tcp_eager_prev_q0 = listener->tcp_eager_prev_q0; 2010439b3deaSKacheong Poon tcp->tcp_eager_next_q0 = listener; 2011439b3deaSKacheong Poon listener->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp; 2012439b3deaSKacheong Poon listener->tcp_eager_prev_q0 = tcp; 2013439b3deaSKacheong Poon tcp->tcp_conn.tcp_eager_conn_ind = mp; 2014439b3deaSKacheong Poon } 2015439b3deaSKacheong Poon 2016439b3deaSKacheong Poon /* we have timed out before */ 2017439b3deaSKacheong Poon if (tcp->tcp_syn_rcvd_timeout != 0) { 2018439b3deaSKacheong Poon tcp->tcp_syn_rcvd_timeout = 0; 2019439b3deaSKacheong Poon listener->tcp_syn_rcvd_timeout--; 2020439b3deaSKacheong Poon if (listener->tcp_syn_defense && 2021439b3deaSKacheong Poon listener->tcp_syn_rcvd_timeout <= 2022439b3deaSKacheong Poon (tcps->tcps_conn_req_max_q0 >> 5) && 2023439b3deaSKacheong Poon 10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() - 2024439b3deaSKacheong Poon listener->tcp_last_rcv_lbolt)) { 2025439b3deaSKacheong Poon /* 2026439b3deaSKacheong Poon * Turn off the defense mode if we 2027439b3deaSKacheong Poon * believe the SYN attack is over. 2028439b3deaSKacheong Poon */ 2029439b3deaSKacheong Poon listener->tcp_syn_defense = B_FALSE; 2030439b3deaSKacheong Poon if (listener->tcp_ip_addr_cache) { 2031439b3deaSKacheong Poon kmem_free((void *)listener->tcp_ip_addr_cache, 2032439b3deaSKacheong Poon IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t)); 2033439b3deaSKacheong Poon listener->tcp_ip_addr_cache = NULL; 2034439b3deaSKacheong Poon } 2035439b3deaSKacheong Poon } 2036439b3deaSKacheong Poon } 2037439b3deaSKacheong Poon addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache); 2038439b3deaSKacheong Poon if (addr_cache != NULL) { 2039439b3deaSKacheong Poon /* 2040439b3deaSKacheong Poon * We have finished a 3-way handshake with this 2041439b3deaSKacheong Poon * remote host. This proves the IP addr is good. 2042439b3deaSKacheong Poon * Cache it! 2043439b3deaSKacheong Poon */ 2044439b3deaSKacheong Poon addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] = 2045439b3deaSKacheong Poon tcp->tcp_connp->conn_faddr_v4; 2046439b3deaSKacheong Poon } 2047439b3deaSKacheong Poon mutex_exit(&listener->tcp_eager_lock); 2048439b3deaSKacheong Poon if (need_send_conn_ind) 20493e95bd4aSAnders Persson putnext(lconnp->conn_rq, mp); 2050439b3deaSKacheong Poon } 2051