1721fffe3SKacheong Poon /* 2721fffe3SKacheong Poon * CDDL HEADER START 3721fffe3SKacheong Poon * 4721fffe3SKacheong Poon * The contents of this file are subject to the terms of the 5721fffe3SKacheong Poon * Common Development and Distribution License (the "License"). 6721fffe3SKacheong Poon * You may not use this file except in compliance with the License. 7721fffe3SKacheong Poon * 8721fffe3SKacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9721fffe3SKacheong Poon * or http://www.opensolaris.org/os/licensing. 10721fffe3SKacheong Poon * See the License for the specific language governing permissions 11721fffe3SKacheong Poon * and limitations under the License. 12721fffe3SKacheong Poon * 13721fffe3SKacheong Poon * When distributing Covered Code, include this CDDL HEADER in each 14721fffe3SKacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15721fffe3SKacheong Poon * If applicable, add the following below this CDDL HEADER, with the 16721fffe3SKacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying 17721fffe3SKacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner] 18721fffe3SKacheong Poon * 19721fffe3SKacheong Poon * CDDL HEADER END 20721fffe3SKacheong Poon */ 21721fffe3SKacheong Poon 22721fffe3SKacheong Poon /* 2366cd0f60SKacheong Poon * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 243d0a255cSGarrett D'Amore * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. 257f2dc2cfSBryan Cantrill * Copyright 2011 Joyent, Inc. All rights reserved. 26*633fc3a6SSebastien Roy * Copyright (c) 2014 by Delphix. All rights reserved. 27721fffe3SKacheong Poon */ 28721fffe3SKacheong Poon 29721fffe3SKacheong Poon #include <sys/types.h> 30721fffe3SKacheong Poon #include <sys/strlog.h> 31721fffe3SKacheong Poon #include <sys/strsun.h> 32721fffe3SKacheong Poon #include <sys/squeue_impl.h> 33721fffe3SKacheong Poon #include <sys/squeue.h> 34721fffe3SKacheong Poon #include <sys/callo.h> 35721fffe3SKacheong Poon #include <sys/strsubr.h> 36721fffe3SKacheong Poon 37721fffe3SKacheong Poon #include <inet/common.h> 38721fffe3SKacheong Poon #include <inet/ip.h> 39721fffe3SKacheong Poon #include <inet/ip_ire.h> 40721fffe3SKacheong Poon #include <inet/ip_rts.h> 41721fffe3SKacheong Poon #include <inet/tcp.h> 42721fffe3SKacheong Poon #include <inet/tcp_impl.h> 43721fffe3SKacheong Poon 44721fffe3SKacheong Poon /* 45721fffe3SKacheong Poon * Implementation of TCP Timers. 46721fffe3SKacheong Poon * ============================= 47721fffe3SKacheong Poon * 48721fffe3SKacheong Poon * INTERFACE: 49721fffe3SKacheong Poon * 50721fffe3SKacheong Poon * There are two basic functions dealing with tcp timers: 51721fffe3SKacheong Poon * 52721fffe3SKacheong Poon * timeout_id_t tcp_timeout(connp, func, time) 53721fffe3SKacheong Poon * clock_t tcp_timeout_cancel(connp, timeout_id) 54721fffe3SKacheong Poon * TCP_TIMER_RESTART(tcp, intvl) 55721fffe3SKacheong Poon * 56721fffe3SKacheong Poon * tcp_timeout() starts a timer for the 'tcp' instance arranging to call 'func' 57721fffe3SKacheong Poon * after 'time' ticks passed. The function called by timeout() must adhere to 58721fffe3SKacheong Poon * the same restrictions as a driver soft interrupt handler - it must not sleep 59721fffe3SKacheong Poon * or call other functions that might sleep. The value returned is the opaque 60721fffe3SKacheong Poon * non-zero timeout identifier that can be passed to tcp_timeout_cancel() to 61721fffe3SKacheong Poon * cancel the request. The call to tcp_timeout() may fail in which case it 62721fffe3SKacheong Poon * returns zero. This is different from the timeout(9F) function which never 63721fffe3SKacheong Poon * fails. 64721fffe3SKacheong Poon * 65721fffe3SKacheong Poon * The call-back function 'func' always receives 'connp' as its single 66721fffe3SKacheong Poon * argument. It is always executed in the squeue corresponding to the tcp 67721fffe3SKacheong Poon * structure. The tcp structure is guaranteed to be present at the time the 68721fffe3SKacheong Poon * call-back is called. 69721fffe3SKacheong Poon * 70721fffe3SKacheong Poon * NOTE: The call-back function 'func' is never called if tcp is in 71721fffe3SKacheong Poon * the TCPS_CLOSED state. 72721fffe3SKacheong Poon * 73721fffe3SKacheong Poon * tcp_timeout_cancel() attempts to cancel a pending tcp_timeout() 74721fffe3SKacheong Poon * request. locks acquired by the call-back routine should not be held across 75721fffe3SKacheong Poon * the call to tcp_timeout_cancel() or a deadlock may result. 76721fffe3SKacheong Poon * 777f2dc2cfSBryan Cantrill * tcp_timeout_cancel() returns -1 if the timeout request is invalid. 787f2dc2cfSBryan Cantrill * Otherwise, it returns an integer value greater than or equal to 0. 79721fffe3SKacheong Poon * 80721fffe3SKacheong Poon * NOTE: both tcp_timeout() and tcp_timeout_cancel() should always be called 81721fffe3SKacheong Poon * within squeue context corresponding to the tcp instance. Since the 82721fffe3SKacheong Poon * call-back is also called via the same squeue, there are no race 83721fffe3SKacheong Poon * conditions described in untimeout(9F) manual page since all calls are 84721fffe3SKacheong Poon * strictly serialized. 85721fffe3SKacheong Poon * 86721fffe3SKacheong Poon * TCP_TIMER_RESTART() is a macro that attempts to cancel a pending timeout 87721fffe3SKacheong Poon * stored in tcp_timer_tid and starts a new one using 88721fffe3SKacheong Poon * MSEC_TO_TICK(intvl). It always uses tcp_timer() function as a call-back 89721fffe3SKacheong Poon * and stores the return value of tcp_timeout() in the tcp->tcp_timer_tid 90721fffe3SKacheong Poon * field. 91721fffe3SKacheong Poon * 92721fffe3SKacheong Poon * IMPLEMENTATION: 93721fffe3SKacheong Poon * 94721fffe3SKacheong Poon * TCP timers are implemented using three-stage process. The call to 95721fffe3SKacheong Poon * tcp_timeout() uses timeout(9F) function to call tcp_timer_callback() function 96721fffe3SKacheong Poon * when the timer expires. The tcp_timer_callback() arranges the call of the 97721fffe3SKacheong Poon * tcp_timer_handler() function via squeue corresponding to the tcp 98721fffe3SKacheong Poon * instance. The tcp_timer_handler() calls actual requested timeout call-back 99721fffe3SKacheong Poon * and passes tcp instance as an argument to it. Information is passed between 100721fffe3SKacheong Poon * stages using the tcp_timer_t structure which contains the connp pointer, the 101721fffe3SKacheong Poon * tcp call-back to call and the timeout id returned by the timeout(9F). 102721fffe3SKacheong Poon * 103721fffe3SKacheong Poon * The tcp_timer_t structure is not used directly, it is embedded in an mblk_t - 104721fffe3SKacheong Poon * like structure that is used to enter an squeue. The mp->b_rptr of this pseudo 105721fffe3SKacheong Poon * mblk points to the beginning of tcp_timer_t structure. The tcp_timeout() 106721fffe3SKacheong Poon * returns the pointer to this mblk. 107721fffe3SKacheong Poon * 108721fffe3SKacheong Poon * The pseudo mblk is allocated from a special tcp_timer_cache kmem cache. It 109721fffe3SKacheong Poon * looks like a normal mblk without actual dblk attached to it. 110721fffe3SKacheong Poon * 111721fffe3SKacheong Poon * To optimize performance each tcp instance holds a small cache of timer 112721fffe3SKacheong Poon * mblocks. In the current implementation it caches up to two timer mblocks per 113721fffe3SKacheong Poon * tcp instance. The cache is preserved over tcp frees and is only freed when 114721fffe3SKacheong Poon * the whole tcp structure is destroyed by its kmem destructor. Since all tcp 115721fffe3SKacheong Poon * timer processing happens on a corresponding squeue, the cache manipulation 116721fffe3SKacheong Poon * does not require any locks. Experiments show that majority of timer mblocks 117721fffe3SKacheong Poon * allocations are satisfied from the tcp cache and do not involve kmem calls. 118721fffe3SKacheong Poon * 119721fffe3SKacheong Poon * The tcp_timeout() places a refhold on the connp instance which guarantees 120721fffe3SKacheong Poon * that it will be present at the time the call-back function fires. The 121721fffe3SKacheong Poon * tcp_timer_handler() drops the reference after calling the call-back, so the 122721fffe3SKacheong Poon * call-back function does not need to manipulate the references explicitly. 123721fffe3SKacheong Poon */ 124721fffe3SKacheong Poon 125721fffe3SKacheong Poon kmem_cache_t *tcp_timercache; 126721fffe3SKacheong Poon 127721fffe3SKacheong Poon static void tcp_ip_notify(tcp_t *); 128721fffe3SKacheong Poon static void tcp_timer_callback(void *); 129721fffe3SKacheong Poon static void tcp_timer_free(tcp_t *, mblk_t *); 130721fffe3SKacheong Poon static void tcp_timer_handler(void *, mblk_t *, void *, ip_recv_attr_t *); 131721fffe3SKacheong Poon 13266cd0f60SKacheong Poon /* 13366cd0f60SKacheong Poon * tim is in millisec. 13466cd0f60SKacheong Poon */ 135721fffe3SKacheong Poon timeout_id_t 13666cd0f60SKacheong Poon tcp_timeout(conn_t *connp, void (*f)(void *), hrtime_t tim) 137721fffe3SKacheong Poon { 138721fffe3SKacheong Poon mblk_t *mp; 139721fffe3SKacheong Poon tcp_timer_t *tcpt; 140721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 141721fffe3SKacheong Poon 142721fffe3SKacheong Poon ASSERT(connp->conn_sqp != NULL); 143721fffe3SKacheong Poon 144721fffe3SKacheong Poon TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_calls); 145721fffe3SKacheong Poon 146721fffe3SKacheong Poon if (tcp->tcp_timercache == NULL) { 147721fffe3SKacheong Poon mp = tcp_timermp_alloc(KM_NOSLEEP | KM_PANIC); 148721fffe3SKacheong Poon } else { 149721fffe3SKacheong Poon TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_cached_alloc); 150721fffe3SKacheong Poon mp = tcp->tcp_timercache; 151721fffe3SKacheong Poon tcp->tcp_timercache = mp->b_next; 152721fffe3SKacheong Poon mp->b_next = NULL; 153721fffe3SKacheong Poon ASSERT(mp->b_wptr == NULL); 154721fffe3SKacheong Poon } 155721fffe3SKacheong Poon 156721fffe3SKacheong Poon CONN_INC_REF(connp); 157721fffe3SKacheong Poon tcpt = (tcp_timer_t *)mp->b_rptr; 158721fffe3SKacheong Poon tcpt->connp = connp; 159721fffe3SKacheong Poon tcpt->tcpt_proc = f; 160721fffe3SKacheong Poon /* 161721fffe3SKacheong Poon * TCP timers are normal timeouts. Plus, they do not require more than 162721fffe3SKacheong Poon * a 10 millisecond resolution. By choosing a coarser resolution and by 163721fffe3SKacheong Poon * rounding up the expiration to the next resolution boundary, we can 164721fffe3SKacheong Poon * batch timers in the callout subsystem to make TCP timers more 165721fffe3SKacheong Poon * efficient. The roundup also protects short timers from expiring too 166721fffe3SKacheong Poon * early before they have a chance to be cancelled. 167721fffe3SKacheong Poon */ 168721fffe3SKacheong Poon tcpt->tcpt_tid = timeout_generic(CALLOUT_NORMAL, tcp_timer_callback, mp, 16966cd0f60SKacheong Poon tim * MICROSEC, CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); 1707f2dc2cfSBryan Cantrill VERIFY(!(tcpt->tcpt_tid & CALLOUT_ID_FREE)); 171721fffe3SKacheong Poon 172721fffe3SKacheong Poon return ((timeout_id_t)mp); 173721fffe3SKacheong Poon } 174721fffe3SKacheong Poon 175721fffe3SKacheong Poon static void 176721fffe3SKacheong Poon tcp_timer_callback(void *arg) 177721fffe3SKacheong Poon { 178721fffe3SKacheong Poon mblk_t *mp = (mblk_t *)arg; 179721fffe3SKacheong Poon tcp_timer_t *tcpt; 180721fffe3SKacheong Poon conn_t *connp; 181721fffe3SKacheong Poon 182721fffe3SKacheong Poon tcpt = (tcp_timer_t *)mp->b_rptr; 183721fffe3SKacheong Poon connp = tcpt->connp; 184721fffe3SKacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timer_handler, connp, 185721fffe3SKacheong Poon NULL, SQ_FILL, SQTAG_TCP_TIMER); 186721fffe3SKacheong Poon } 187721fffe3SKacheong Poon 188721fffe3SKacheong Poon /* ARGSUSED */ 189721fffe3SKacheong Poon static void 190721fffe3SKacheong Poon tcp_timer_handler(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) 191721fffe3SKacheong Poon { 192721fffe3SKacheong Poon tcp_timer_t *tcpt; 193721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 194721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 195721fffe3SKacheong Poon 196721fffe3SKacheong Poon tcpt = (tcp_timer_t *)mp->b_rptr; 197721fffe3SKacheong Poon ASSERT(connp == tcpt->connp); 198721fffe3SKacheong Poon ASSERT((squeue_t *)arg2 == connp->conn_sqp); 199721fffe3SKacheong Poon 2007f2dc2cfSBryan Cantrill if (tcpt->tcpt_tid & CALLOUT_ID_FREE) { 2017f2dc2cfSBryan Cantrill /* 2027f2dc2cfSBryan Cantrill * This timeout was cancelled after it was enqueued to the 2037f2dc2cfSBryan Cantrill * squeue; free the timer and return. 2047f2dc2cfSBryan Cantrill */ 2057f2dc2cfSBryan Cantrill tcp_timer_free(connp->conn_tcp, mp); 2067f2dc2cfSBryan Cantrill return; 2077f2dc2cfSBryan Cantrill } 2087f2dc2cfSBryan Cantrill 209721fffe3SKacheong Poon /* 210721fffe3SKacheong Poon * If the TCP has reached the closed state, don't proceed any 211721fffe3SKacheong Poon * further. This TCP logically does not exist on the system. 212721fffe3SKacheong Poon * tcpt_proc could for example access queues, that have already 213721fffe3SKacheong Poon * been qprocoff'ed off. 214721fffe3SKacheong Poon */ 215721fffe3SKacheong Poon if (tcp->tcp_state != TCPS_CLOSED) { 216721fffe3SKacheong Poon (*tcpt->tcpt_proc)(connp); 217721fffe3SKacheong Poon } else { 218721fffe3SKacheong Poon tcp->tcp_timer_tid = 0; 219721fffe3SKacheong Poon } 2207f2dc2cfSBryan Cantrill 221721fffe3SKacheong Poon tcp_timer_free(connp->conn_tcp, mp); 222721fffe3SKacheong Poon } 223721fffe3SKacheong Poon 224721fffe3SKacheong Poon /* 225721fffe3SKacheong Poon * There is potential race with untimeout and the handler firing at the same 226721fffe3SKacheong Poon * time. The mblock may be freed by the handler while we are trying to use 227721fffe3SKacheong Poon * it. But since both should execute on the same squeue, this race should not 228721fffe3SKacheong Poon * occur. 229721fffe3SKacheong Poon */ 230721fffe3SKacheong Poon clock_t 231721fffe3SKacheong Poon tcp_timeout_cancel(conn_t *connp, timeout_id_t id) 232721fffe3SKacheong Poon { 233721fffe3SKacheong Poon mblk_t *mp = (mblk_t *)id; 234721fffe3SKacheong Poon tcp_timer_t *tcpt; 235721fffe3SKacheong Poon clock_t delta; 236721fffe3SKacheong Poon 237721fffe3SKacheong Poon TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_cancel_reqs); 238721fffe3SKacheong Poon 239721fffe3SKacheong Poon if (mp == NULL) 240721fffe3SKacheong Poon return (-1); 241721fffe3SKacheong Poon 242721fffe3SKacheong Poon tcpt = (tcp_timer_t *)mp->b_rptr; 243721fffe3SKacheong Poon ASSERT(tcpt->connp == connp); 244721fffe3SKacheong Poon 245721fffe3SKacheong Poon delta = untimeout_default(tcpt->tcpt_tid, 0); 246721fffe3SKacheong Poon 247721fffe3SKacheong Poon if (delta >= 0) { 248721fffe3SKacheong Poon TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_canceled); 249721fffe3SKacheong Poon tcp_timer_free(connp->conn_tcp, mp); 250721fffe3SKacheong Poon CONN_DEC_REF(connp); 2517f2dc2cfSBryan Cantrill } else { 2527f2dc2cfSBryan Cantrill /* 2537f2dc2cfSBryan Cantrill * If we were unable to untimeout successfully, it has already 2547f2dc2cfSBryan Cantrill * been enqueued on the squeue; mark the ID with the free 2557f2dc2cfSBryan Cantrill * bit. This bit can never be set in a valid identifier, and 2567f2dc2cfSBryan Cantrill * we'll use it to prevent the timeout from being executed. 2577f2dc2cfSBryan Cantrill * And note that we're within the squeue perimeter here, so 2587f2dc2cfSBryan Cantrill * we don't need to worry about racing with timer handling 2597f2dc2cfSBryan Cantrill * (which also executes within the perimeter). 2607f2dc2cfSBryan Cantrill */ 2617f2dc2cfSBryan Cantrill tcpt->tcpt_tid |= CALLOUT_ID_FREE; 2627f2dc2cfSBryan Cantrill delta = 0; 263721fffe3SKacheong Poon } 264721fffe3SKacheong Poon 26566cd0f60SKacheong Poon return (TICK_TO_MSEC(delta)); 266721fffe3SKacheong Poon } 267721fffe3SKacheong Poon 268721fffe3SKacheong Poon /* 269721fffe3SKacheong Poon * Allocate space for the timer event. The allocation looks like mblk, but it is 270721fffe3SKacheong Poon * not a proper mblk. To avoid confusion we set b_wptr to NULL. 271721fffe3SKacheong Poon * 272721fffe3SKacheong Poon * Dealing with failures: If we can't allocate from the timer cache we try 273721fffe3SKacheong Poon * allocating from dblock caches using allocb_tryhard(). In this case b_wptr 274721fffe3SKacheong Poon * points to b_rptr. 275721fffe3SKacheong Poon * If we can't allocate anything using allocb_tryhard(), we perform a last 276721fffe3SKacheong Poon * attempt and use kmem_alloc_tryhard(). In this case we set b_wptr to -1 and 277721fffe3SKacheong Poon * save the actual allocation size in b_datap. 278721fffe3SKacheong Poon */ 279721fffe3SKacheong Poon mblk_t * 280721fffe3SKacheong Poon tcp_timermp_alloc(int kmflags) 281721fffe3SKacheong Poon { 282721fffe3SKacheong Poon mblk_t *mp = (mblk_t *)kmem_cache_alloc(tcp_timercache, 283721fffe3SKacheong Poon kmflags & ~KM_PANIC); 284721fffe3SKacheong Poon 285721fffe3SKacheong Poon if (mp != NULL) { 286721fffe3SKacheong Poon mp->b_next = mp->b_prev = NULL; 287721fffe3SKacheong Poon mp->b_rptr = (uchar_t *)(&mp[1]); 288721fffe3SKacheong Poon mp->b_wptr = NULL; 289721fffe3SKacheong Poon mp->b_datap = NULL; 290721fffe3SKacheong Poon mp->b_queue = NULL; 291721fffe3SKacheong Poon mp->b_cont = NULL; 292721fffe3SKacheong Poon } else if (kmflags & KM_PANIC) { 293721fffe3SKacheong Poon /* 294721fffe3SKacheong Poon * Failed to allocate memory for the timer. Try allocating from 295721fffe3SKacheong Poon * dblock caches. 296721fffe3SKacheong Poon */ 297721fffe3SKacheong Poon /* ipclassifier calls this from a constructor - hence no tcps */ 298721fffe3SKacheong Poon TCP_G_STAT(tcp_timermp_allocfail); 299721fffe3SKacheong Poon mp = allocb_tryhard(sizeof (tcp_timer_t)); 300721fffe3SKacheong Poon if (mp == NULL) { 301721fffe3SKacheong Poon size_t size = 0; 302721fffe3SKacheong Poon /* 303721fffe3SKacheong Poon * Memory is really low. Try tryhard allocation. 304721fffe3SKacheong Poon * 305721fffe3SKacheong Poon * ipclassifier calls this from a constructor - 306721fffe3SKacheong Poon * hence no tcps 307721fffe3SKacheong Poon */ 308721fffe3SKacheong Poon TCP_G_STAT(tcp_timermp_allocdblfail); 309721fffe3SKacheong Poon mp = kmem_alloc_tryhard(sizeof (mblk_t) + 310721fffe3SKacheong Poon sizeof (tcp_timer_t), &size, kmflags); 311721fffe3SKacheong Poon mp->b_rptr = (uchar_t *)(&mp[1]); 312721fffe3SKacheong Poon mp->b_next = mp->b_prev = NULL; 313721fffe3SKacheong Poon mp->b_wptr = (uchar_t *)-1; 314721fffe3SKacheong Poon mp->b_datap = (dblk_t *)size; 315721fffe3SKacheong Poon mp->b_queue = NULL; 316721fffe3SKacheong Poon mp->b_cont = NULL; 317721fffe3SKacheong Poon } 318721fffe3SKacheong Poon ASSERT(mp->b_wptr != NULL); 319721fffe3SKacheong Poon } 320721fffe3SKacheong Poon /* ipclassifier calls this from a constructor - hence no tcps */ 321721fffe3SKacheong Poon TCP_G_DBGSTAT(tcp_timermp_alloced); 322721fffe3SKacheong Poon 323721fffe3SKacheong Poon return (mp); 324721fffe3SKacheong Poon } 325721fffe3SKacheong Poon 326721fffe3SKacheong Poon /* 327721fffe3SKacheong Poon * Free per-tcp timer cache. 328721fffe3SKacheong Poon * It can only contain entries from tcp_timercache. 329721fffe3SKacheong Poon */ 330721fffe3SKacheong Poon void 331721fffe3SKacheong Poon tcp_timermp_free(tcp_t *tcp) 332721fffe3SKacheong Poon { 333721fffe3SKacheong Poon mblk_t *mp; 334721fffe3SKacheong Poon 335721fffe3SKacheong Poon while ((mp = tcp->tcp_timercache) != NULL) { 336721fffe3SKacheong Poon ASSERT(mp->b_wptr == NULL); 337721fffe3SKacheong Poon tcp->tcp_timercache = tcp->tcp_timercache->b_next; 338721fffe3SKacheong Poon kmem_cache_free(tcp_timercache, mp); 339721fffe3SKacheong Poon } 340721fffe3SKacheong Poon } 341721fffe3SKacheong Poon 342721fffe3SKacheong Poon /* 343721fffe3SKacheong Poon * Free timer event. Put it on the per-tcp timer cache if there is not too many 344721fffe3SKacheong Poon * events there already (currently at most two events are cached). 345721fffe3SKacheong Poon * If the event is not allocated from the timer cache, free it right away. 346721fffe3SKacheong Poon */ 347721fffe3SKacheong Poon static void 348721fffe3SKacheong Poon tcp_timer_free(tcp_t *tcp, mblk_t *mp) 349721fffe3SKacheong Poon { 350721fffe3SKacheong Poon mblk_t *mp1 = tcp->tcp_timercache; 351721fffe3SKacheong Poon 352721fffe3SKacheong Poon if (mp->b_wptr != NULL) { 353721fffe3SKacheong Poon /* 354721fffe3SKacheong Poon * This allocation is not from a timer cache, free it right 355721fffe3SKacheong Poon * away. 356721fffe3SKacheong Poon */ 357721fffe3SKacheong Poon if (mp->b_wptr != (uchar_t *)-1) 358721fffe3SKacheong Poon freeb(mp); 359721fffe3SKacheong Poon else 360721fffe3SKacheong Poon kmem_free(mp, (size_t)mp->b_datap); 361721fffe3SKacheong Poon } else if (mp1 == NULL || mp1->b_next == NULL) { 362721fffe3SKacheong Poon /* Cache this timer block for future allocations */ 363721fffe3SKacheong Poon mp->b_rptr = (uchar_t *)(&mp[1]); 364721fffe3SKacheong Poon mp->b_next = mp1; 365721fffe3SKacheong Poon tcp->tcp_timercache = mp; 366721fffe3SKacheong Poon } else { 367721fffe3SKacheong Poon kmem_cache_free(tcp_timercache, mp); 368721fffe3SKacheong Poon TCP_DBGSTAT(tcp->tcp_tcps, tcp_timermp_freed); 369721fffe3SKacheong Poon } 370721fffe3SKacheong Poon } 371721fffe3SKacheong Poon 372721fffe3SKacheong Poon /* 373721fffe3SKacheong Poon * Stop all TCP timers. 374721fffe3SKacheong Poon */ 375721fffe3SKacheong Poon void 376721fffe3SKacheong Poon tcp_timers_stop(tcp_t *tcp) 377721fffe3SKacheong Poon { 378721fffe3SKacheong Poon if (tcp->tcp_timer_tid != 0) { 379721fffe3SKacheong Poon (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_timer_tid); 380721fffe3SKacheong Poon tcp->tcp_timer_tid = 0; 381721fffe3SKacheong Poon } 382721fffe3SKacheong Poon if (tcp->tcp_ka_tid != 0) { 383721fffe3SKacheong Poon (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_ka_tid); 384721fffe3SKacheong Poon tcp->tcp_ka_tid = 0; 385721fffe3SKacheong Poon } 386721fffe3SKacheong Poon if (tcp->tcp_ack_tid != 0) { 387721fffe3SKacheong Poon (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_ack_tid); 388721fffe3SKacheong Poon tcp->tcp_ack_tid = 0; 389721fffe3SKacheong Poon } 390721fffe3SKacheong Poon if (tcp->tcp_push_tid != 0) { 391721fffe3SKacheong Poon (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid); 392721fffe3SKacheong Poon tcp->tcp_push_tid = 0; 393721fffe3SKacheong Poon } 394721fffe3SKacheong Poon if (tcp->tcp_reass_tid != 0) { 395721fffe3SKacheong Poon (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_reass_tid); 396721fffe3SKacheong Poon tcp->tcp_reass_tid = 0; 397721fffe3SKacheong Poon } 398721fffe3SKacheong Poon } 399721fffe3SKacheong Poon 400721fffe3SKacheong Poon /* 401721fffe3SKacheong Poon * Timer callback routine for keepalive probe. We do a fake resend of 402721fffe3SKacheong Poon * last ACKed byte. Then set a timer using RTO. When the timer expires, 403721fffe3SKacheong Poon * check to see if we have heard anything from the other end for the last 404721fffe3SKacheong Poon * RTO period. If we have, set the timer to expire for another 405721fffe3SKacheong Poon * tcp_keepalive_intrvl and check again. If we have not, set a timer using 406721fffe3SKacheong Poon * RTO << 1 and check again when it expires. Keep exponentially increasing 407721fffe3SKacheong Poon * the timeout if we have not heard from the other side. If for more than 408721fffe3SKacheong Poon * (tcp_ka_interval + tcp_ka_abort_thres) we have not heard anything, 409721fffe3SKacheong Poon * kill the connection unless the keepalive abort threshold is 0. In 410721fffe3SKacheong Poon * that case, we will probe "forever." 4113d0a255cSGarrett D'Amore * If tcp_ka_cnt and tcp_ka_rinterval are non-zero, then we do not follow 4123d0a255cSGarrett D'Amore * the exponential backoff, but send probes tcp_ka_cnt times in regular 4133d0a255cSGarrett D'Amore * intervals of tcp_ka_rinterval milliseconds until we hear back from peer. 4143d0a255cSGarrett D'Amore * Kill the connection if we don't hear back from peer after tcp_ka_cnt 4153d0a255cSGarrett D'Amore * probes are sent. 416721fffe3SKacheong Poon */ 417721fffe3SKacheong Poon void 418721fffe3SKacheong Poon tcp_keepalive_timer(void *arg) 419721fffe3SKacheong Poon { 420721fffe3SKacheong Poon mblk_t *mp; 421721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 422721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 423721fffe3SKacheong Poon int32_t firetime; 424721fffe3SKacheong Poon int32_t idletime; 425721fffe3SKacheong Poon int32_t ka_intrvl; 426721fffe3SKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps; 427721fffe3SKacheong Poon 428721fffe3SKacheong Poon tcp->tcp_ka_tid = 0; 429721fffe3SKacheong Poon 430721fffe3SKacheong Poon if (tcp->tcp_fused) 431721fffe3SKacheong Poon return; 432721fffe3SKacheong Poon 433721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpTimKeepalive); 434721fffe3SKacheong Poon ka_intrvl = tcp->tcp_ka_interval; 435721fffe3SKacheong Poon 436721fffe3SKacheong Poon /* 437721fffe3SKacheong Poon * Keepalive probe should only be sent if the application has not 438721fffe3SKacheong Poon * done a close on the connection. 439721fffe3SKacheong Poon */ 440721fffe3SKacheong Poon if (tcp->tcp_state > TCPS_CLOSE_WAIT) { 441721fffe3SKacheong Poon return; 442721fffe3SKacheong Poon } 443721fffe3SKacheong Poon /* Timer fired too early, restart it. */ 444721fffe3SKacheong Poon if (tcp->tcp_state < TCPS_ESTABLISHED) { 445721fffe3SKacheong Poon tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer, 44666cd0f60SKacheong Poon ka_intrvl); 447721fffe3SKacheong Poon return; 448721fffe3SKacheong Poon } 449721fffe3SKacheong Poon 450721fffe3SKacheong Poon idletime = TICK_TO_MSEC(ddi_get_lbolt() - tcp->tcp_last_recv_time); 451721fffe3SKacheong Poon /* 452721fffe3SKacheong Poon * If we have not heard from the other side for a long 453721fffe3SKacheong Poon * time, kill the connection unless the keepalive abort 454721fffe3SKacheong Poon * threshold is 0. In that case, we will probe "forever." 455721fffe3SKacheong Poon */ 456721fffe3SKacheong Poon if (tcp->tcp_ka_abort_thres != 0 && 457721fffe3SKacheong Poon idletime > (ka_intrvl + tcp->tcp_ka_abort_thres)) { 458721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpTimKeepaliveDrop); 459721fffe3SKacheong Poon (void) tcp_clean_death(tcp, tcp->tcp_client_errno ? 460721fffe3SKacheong Poon tcp->tcp_client_errno : ETIMEDOUT); 461721fffe3SKacheong Poon return; 462721fffe3SKacheong Poon } 463721fffe3SKacheong Poon 464721fffe3SKacheong Poon if (tcp->tcp_snxt == tcp->tcp_suna && 465721fffe3SKacheong Poon idletime >= ka_intrvl) { 466721fffe3SKacheong Poon /* Fake resend of last ACKed byte. */ 467721fffe3SKacheong Poon mblk_t *mp1 = allocb(1, BPRI_LO); 468721fffe3SKacheong Poon 469721fffe3SKacheong Poon if (mp1 != NULL) { 470721fffe3SKacheong Poon *mp1->b_wptr++ = '\0'; 471721fffe3SKacheong Poon mp = tcp_xmit_mp(tcp, mp1, 1, NULL, NULL, 472721fffe3SKacheong Poon tcp->tcp_suna - 1, B_FALSE, NULL, B_TRUE); 473721fffe3SKacheong Poon freeb(mp1); 474721fffe3SKacheong Poon /* 475721fffe3SKacheong Poon * if allocation failed, fall through to start the 476721fffe3SKacheong Poon * timer back. 477721fffe3SKacheong Poon */ 478721fffe3SKacheong Poon if (mp != NULL) { 479721fffe3SKacheong Poon tcp_send_data(tcp, mp); 480721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpTimKeepaliveProbe); 4813d0a255cSGarrett D'Amore if (tcp->tcp_ka_rinterval) { 4823d0a255cSGarrett D'Amore firetime = tcp->tcp_ka_rinterval; 4833d0a255cSGarrett D'Amore } else if (tcp->tcp_ka_last_intrvl != 0) { 484721fffe3SKacheong Poon int max; 485721fffe3SKacheong Poon /* 486721fffe3SKacheong Poon * We should probe again at least 487721fffe3SKacheong Poon * in ka_intrvl, but not more than 488707e74bcSKacheong Poon * tcp_rto_max. 489721fffe3SKacheong Poon */ 490707e74bcSKacheong Poon max = tcp->tcp_rto_max; 491721fffe3SKacheong Poon firetime = MIN(ka_intrvl - 1, 492721fffe3SKacheong Poon tcp->tcp_ka_last_intrvl << 1); 493721fffe3SKacheong Poon if (firetime > max) 494721fffe3SKacheong Poon firetime = max; 495721fffe3SKacheong Poon } else { 496721fffe3SKacheong Poon firetime = tcp->tcp_rto; 497721fffe3SKacheong Poon } 498721fffe3SKacheong Poon tcp->tcp_ka_tid = TCP_TIMER(tcp, 49966cd0f60SKacheong Poon tcp_keepalive_timer, firetime); 500721fffe3SKacheong Poon tcp->tcp_ka_last_intrvl = firetime; 501721fffe3SKacheong Poon return; 502721fffe3SKacheong Poon } 503721fffe3SKacheong Poon } 504721fffe3SKacheong Poon } else { 505721fffe3SKacheong Poon tcp->tcp_ka_last_intrvl = 0; 506721fffe3SKacheong Poon } 507721fffe3SKacheong Poon 508721fffe3SKacheong Poon /* firetime can be negative if (mp1 == NULL || mp == NULL) */ 509721fffe3SKacheong Poon if ((firetime = ka_intrvl - idletime) < 0) { 510721fffe3SKacheong Poon firetime = ka_intrvl; 511721fffe3SKacheong Poon } 51266cd0f60SKacheong Poon tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer, firetime); 513721fffe3SKacheong Poon } 514721fffe3SKacheong Poon 515721fffe3SKacheong Poon void 516721fffe3SKacheong Poon tcp_reass_timer(void *arg) 517721fffe3SKacheong Poon { 518721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 519721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 520721fffe3SKacheong Poon 521721fffe3SKacheong Poon tcp->tcp_reass_tid = 0; 522721fffe3SKacheong Poon if (tcp->tcp_reass_head == NULL) 523721fffe3SKacheong Poon return; 524721fffe3SKacheong Poon ASSERT(tcp->tcp_reass_tail != NULL); 525721fffe3SKacheong Poon if (tcp->tcp_snd_sack_ok && tcp->tcp_num_sack_blk > 0) { 526721fffe3SKacheong Poon tcp_sack_remove(tcp->tcp_sack_list, 527721fffe3SKacheong Poon TCP_REASS_END(tcp->tcp_reass_tail), &tcp->tcp_num_sack_blk); 528721fffe3SKacheong Poon } 529721fffe3SKacheong Poon tcp_close_mpp(&tcp->tcp_reass_head); 530721fffe3SKacheong Poon tcp->tcp_reass_tail = NULL; 531721fffe3SKacheong Poon TCP_STAT(tcp->tcp_tcps, tcp_reass_timeout); 532721fffe3SKacheong Poon } 533721fffe3SKacheong Poon 534721fffe3SKacheong Poon /* This function handles the push timeout. */ 535721fffe3SKacheong Poon void 536721fffe3SKacheong Poon tcp_push_timer(void *arg) 537721fffe3SKacheong Poon { 538721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 539721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 540721fffe3SKacheong Poon 541721fffe3SKacheong Poon TCP_DBGSTAT(tcp->tcp_tcps, tcp_push_timer_cnt); 542721fffe3SKacheong Poon 543721fffe3SKacheong Poon ASSERT(tcp->tcp_listener == NULL); 544721fffe3SKacheong Poon 545721fffe3SKacheong Poon ASSERT(!IPCL_IS_NONSTR(connp)); 546721fffe3SKacheong Poon 547721fffe3SKacheong Poon tcp->tcp_push_tid = 0; 548721fffe3SKacheong Poon 549721fffe3SKacheong Poon if (tcp->tcp_rcv_list != NULL && 550721fffe3SKacheong Poon tcp_rcv_drain(tcp) == TH_ACK_NEEDED) 551721fffe3SKacheong Poon tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); 552721fffe3SKacheong Poon } 553721fffe3SKacheong Poon 554721fffe3SKacheong Poon /* 555721fffe3SKacheong Poon * This function handles delayed ACK timeout. 556721fffe3SKacheong Poon */ 557721fffe3SKacheong Poon void 558721fffe3SKacheong Poon tcp_ack_timer(void *arg) 559721fffe3SKacheong Poon { 560721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 561721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 562721fffe3SKacheong Poon mblk_t *mp; 563721fffe3SKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps; 564721fffe3SKacheong Poon 565721fffe3SKacheong Poon TCP_DBGSTAT(tcps, tcp_ack_timer_cnt); 566721fffe3SKacheong Poon 567721fffe3SKacheong Poon tcp->tcp_ack_tid = 0; 568721fffe3SKacheong Poon 569721fffe3SKacheong Poon if (tcp->tcp_fused) 570721fffe3SKacheong Poon return; 571721fffe3SKacheong Poon 572721fffe3SKacheong Poon /* 573721fffe3SKacheong Poon * Do not send ACK if there is no outstanding unack'ed data. 574721fffe3SKacheong Poon */ 575721fffe3SKacheong Poon if (tcp->tcp_rnxt == tcp->tcp_rack) { 576721fffe3SKacheong Poon return; 577721fffe3SKacheong Poon } 578721fffe3SKacheong Poon 579721fffe3SKacheong Poon if ((tcp->tcp_rnxt - tcp->tcp_rack) > tcp->tcp_mss) { 580721fffe3SKacheong Poon /* 581721fffe3SKacheong Poon * Make sure we don't allow deferred ACKs to result in 582721fffe3SKacheong Poon * timer-based ACKing. If we have held off an ACK 583721fffe3SKacheong Poon * when there was more than an mss here, and the timer 584721fffe3SKacheong Poon * goes off, we have to worry about the possibility 585721fffe3SKacheong Poon * that the sender isn't doing slow-start, or is out 586721fffe3SKacheong Poon * of step with us for some other reason. We fall 587721fffe3SKacheong Poon * permanently back in the direction of 588721fffe3SKacheong Poon * ACK-every-other-packet as suggested in RFC 1122. 589721fffe3SKacheong Poon */ 590721fffe3SKacheong Poon if (tcp->tcp_rack_abs_max > 2) 591721fffe3SKacheong Poon tcp->tcp_rack_abs_max--; 592721fffe3SKacheong Poon tcp->tcp_rack_cur_max = 2; 593721fffe3SKacheong Poon } 594721fffe3SKacheong Poon mp = tcp_ack_mp(tcp); 595721fffe3SKacheong Poon 596721fffe3SKacheong Poon if (mp != NULL) { 597721fffe3SKacheong Poon BUMP_LOCAL(tcp->tcp_obsegs); 598721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpOutAck); 599721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpOutAckDelayed); 600721fffe3SKacheong Poon tcp_send_data(tcp, mp); 601721fffe3SKacheong Poon } 602721fffe3SKacheong Poon } 603721fffe3SKacheong Poon 604721fffe3SKacheong Poon /* 605721fffe3SKacheong Poon * Notify IP that we are having trouble with this connection. IP should 606721fffe3SKacheong Poon * make note so it can potentially use a different IRE. 607721fffe3SKacheong Poon */ 608721fffe3SKacheong Poon static void 609721fffe3SKacheong Poon tcp_ip_notify(tcp_t *tcp) 610721fffe3SKacheong Poon { 611721fffe3SKacheong Poon conn_t *connp = tcp->tcp_connp; 612721fffe3SKacheong Poon ire_t *ire; 613721fffe3SKacheong Poon 614721fffe3SKacheong Poon /* 615721fffe3SKacheong Poon * Note: in the case of source routing we want to blow away the 616721fffe3SKacheong Poon * route to the first source route hop. 617721fffe3SKacheong Poon */ 618721fffe3SKacheong Poon ire = connp->conn_ixa->ixa_ire; 619721fffe3SKacheong Poon if (ire != NULL && !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 620721fffe3SKacheong Poon if (ire->ire_ipversion == IPV4_VERSION) { 621721fffe3SKacheong Poon /* 622721fffe3SKacheong Poon * As per RFC 1122, we send an RTM_LOSING to inform 623721fffe3SKacheong Poon * routing protocols. 624721fffe3SKacheong Poon */ 625721fffe3SKacheong Poon ip_rts_change(RTM_LOSING, ire->ire_addr, 626721fffe3SKacheong Poon ire->ire_gateway_addr, ire->ire_mask, 627721fffe3SKacheong Poon connp->conn_laddr_v4, 0, 0, 0, 628721fffe3SKacheong Poon (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA), 629721fffe3SKacheong Poon ire->ire_ipst); 630721fffe3SKacheong Poon } 631721fffe3SKacheong Poon (void) ire_no_good(ire); 632721fffe3SKacheong Poon } 633721fffe3SKacheong Poon } 634721fffe3SKacheong Poon 635721fffe3SKacheong Poon /* 636721fffe3SKacheong Poon * tcp_timer is the timer service routine. It handles the retransmission, 637721fffe3SKacheong Poon * FIN_WAIT_2 flush, and zero window probe timeout events. It figures out 638721fffe3SKacheong Poon * from the state of the tcp instance what kind of action needs to be done 639721fffe3SKacheong Poon * at the time it is called. 640721fffe3SKacheong Poon */ 641721fffe3SKacheong Poon void 642721fffe3SKacheong Poon tcp_timer(void *arg) 643721fffe3SKacheong Poon { 644721fffe3SKacheong Poon mblk_t *mp; 645721fffe3SKacheong Poon clock_t first_threshold; 646721fffe3SKacheong Poon clock_t second_threshold; 647721fffe3SKacheong Poon clock_t ms; 648721fffe3SKacheong Poon uint32_t mss; 649721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 650721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 651721fffe3SKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps; 652707e74bcSKacheong Poon boolean_t dont_timeout = B_FALSE; 653721fffe3SKacheong Poon 654721fffe3SKacheong Poon tcp->tcp_timer_tid = 0; 655721fffe3SKacheong Poon 656721fffe3SKacheong Poon if (tcp->tcp_fused) 657721fffe3SKacheong Poon return; 658721fffe3SKacheong Poon 659721fffe3SKacheong Poon first_threshold = tcp->tcp_first_timer_threshold; 660721fffe3SKacheong Poon second_threshold = tcp->tcp_second_timer_threshold; 661721fffe3SKacheong Poon switch (tcp->tcp_state) { 662721fffe3SKacheong Poon case TCPS_IDLE: 663721fffe3SKacheong Poon case TCPS_BOUND: 664721fffe3SKacheong Poon case TCPS_LISTEN: 665721fffe3SKacheong Poon return; 666721fffe3SKacheong Poon case TCPS_SYN_RCVD: { 667721fffe3SKacheong Poon tcp_t *listener = tcp->tcp_listener; 668721fffe3SKacheong Poon 669721fffe3SKacheong Poon if (tcp->tcp_syn_rcvd_timeout == 0 && (listener != NULL)) { 670721fffe3SKacheong Poon /* it's our first timeout */ 671721fffe3SKacheong Poon tcp->tcp_syn_rcvd_timeout = 1; 672721fffe3SKacheong Poon mutex_enter(&listener->tcp_eager_lock); 673721fffe3SKacheong Poon listener->tcp_syn_rcvd_timeout++; 674721fffe3SKacheong Poon if (!tcp->tcp_dontdrop && !tcp->tcp_closemp_used) { 675721fffe3SKacheong Poon /* 676721fffe3SKacheong Poon * Make this eager available for drop if we 677721fffe3SKacheong Poon * need to drop one to accomodate a new 678721fffe3SKacheong Poon * incoming SYN request. 679721fffe3SKacheong Poon */ 680721fffe3SKacheong Poon MAKE_DROPPABLE(listener, tcp); 681721fffe3SKacheong Poon } 682721fffe3SKacheong Poon if (!listener->tcp_syn_defense && 683721fffe3SKacheong Poon (listener->tcp_syn_rcvd_timeout > 684721fffe3SKacheong Poon (tcps->tcps_conn_req_max_q0 >> 2)) && 685721fffe3SKacheong Poon (tcps->tcps_conn_req_max_q0 > 200)) { 686721fffe3SKacheong Poon /* We may be under attack. Put on a defense. */ 687721fffe3SKacheong Poon listener->tcp_syn_defense = B_TRUE; 688721fffe3SKacheong Poon cmn_err(CE_WARN, "High TCP connect timeout " 689721fffe3SKacheong Poon "rate! System (port %d) may be under a " 690721fffe3SKacheong Poon "SYN flood attack!", 691721fffe3SKacheong Poon ntohs(listener->tcp_connp->conn_lport)); 692721fffe3SKacheong Poon 693721fffe3SKacheong Poon listener->tcp_ip_addr_cache = kmem_zalloc( 694721fffe3SKacheong Poon IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t), 695721fffe3SKacheong Poon KM_NOSLEEP); 696721fffe3SKacheong Poon } 697721fffe3SKacheong Poon mutex_exit(&listener->tcp_eager_lock); 698721fffe3SKacheong Poon } else if (listener != NULL) { 699721fffe3SKacheong Poon mutex_enter(&listener->tcp_eager_lock); 700721fffe3SKacheong Poon tcp->tcp_syn_rcvd_timeout++; 701721fffe3SKacheong Poon if (tcp->tcp_syn_rcvd_timeout > 1 && 702721fffe3SKacheong Poon !tcp->tcp_closemp_used) { 703721fffe3SKacheong Poon /* 704721fffe3SKacheong Poon * This is our second timeout. Put the tcp in 705721fffe3SKacheong Poon * the list of droppable eagers to allow it to 706721fffe3SKacheong Poon * be dropped, if needed. We don't check 707721fffe3SKacheong Poon * whether tcp_dontdrop is set or not to 708721fffe3SKacheong Poon * protect ourselve from a SYN attack where a 709721fffe3SKacheong Poon * remote host can spoof itself as one of the 710721fffe3SKacheong Poon * good IP source and continue to hold 711721fffe3SKacheong Poon * resources too long. 712721fffe3SKacheong Poon */ 713721fffe3SKacheong Poon MAKE_DROPPABLE(listener, tcp); 714721fffe3SKacheong Poon } 715721fffe3SKacheong Poon mutex_exit(&listener->tcp_eager_lock); 716721fffe3SKacheong Poon } 717721fffe3SKacheong Poon } 718721fffe3SKacheong Poon /* FALLTHRU */ 719721fffe3SKacheong Poon case TCPS_SYN_SENT: 720721fffe3SKacheong Poon first_threshold = tcp->tcp_first_ctimer_threshold; 721721fffe3SKacheong Poon second_threshold = tcp->tcp_second_ctimer_threshold; 722707e74bcSKacheong Poon 7235dd46ab5SKacheong Poon /* 7245dd46ab5SKacheong Poon * If an app has set the second_threshold to 0, it means that 7255dd46ab5SKacheong Poon * we need to retransmit forever, unless this is a passive 7265dd46ab5SKacheong Poon * open. We need to set second_threshold back to a normal 7275dd46ab5SKacheong Poon * value such that later comparison with it still makes 7285dd46ab5SKacheong Poon * sense. But we set dont_timeout to B_TRUE so that we will 7295dd46ab5SKacheong Poon * never time out. 7305dd46ab5SKacheong Poon */ 731707e74bcSKacheong Poon if (second_threshold == 0) { 7325dd46ab5SKacheong Poon second_threshold = tcps->tcps_ip_abort_linterval; 7335dd46ab5SKacheong Poon if (tcp->tcp_active_open) 734707e74bcSKacheong Poon dont_timeout = B_TRUE; 735707e74bcSKacheong Poon } 736721fffe3SKacheong Poon break; 737721fffe3SKacheong Poon case TCPS_ESTABLISHED: 738707e74bcSKacheong Poon case TCPS_CLOSE_WAIT: 739707e74bcSKacheong Poon /* 740707e74bcSKacheong Poon * If the end point has not been closed, TCP can retransmit 741707e74bcSKacheong Poon * forever. But if the end point is closed, the normal 742707e74bcSKacheong Poon * timeout applies. 743707e74bcSKacheong Poon */ 7445dd46ab5SKacheong Poon if (second_threshold == 0) { 7455dd46ab5SKacheong Poon second_threshold = tcps->tcps_ip_abort_linterval; 746707e74bcSKacheong Poon dont_timeout = B_TRUE; 7475dd46ab5SKacheong Poon } 748707e74bcSKacheong Poon /* FALLTHRU */ 749721fffe3SKacheong Poon case TCPS_FIN_WAIT_1: 750721fffe3SKacheong Poon case TCPS_CLOSING: 751721fffe3SKacheong Poon case TCPS_LAST_ACK: 752721fffe3SKacheong Poon /* If we have data to rexmit */ 753721fffe3SKacheong Poon if (tcp->tcp_suna != tcp->tcp_snxt) { 754721fffe3SKacheong Poon clock_t time_to_wait; 755721fffe3SKacheong Poon 756721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpTimRetrans); 757721fffe3SKacheong Poon if (!tcp->tcp_xmit_head) 758721fffe3SKacheong Poon break; 759721fffe3SKacheong Poon time_to_wait = ddi_get_lbolt() - 760721fffe3SKacheong Poon (clock_t)tcp->tcp_xmit_head->b_prev; 761721fffe3SKacheong Poon time_to_wait = tcp->tcp_rto - 762721fffe3SKacheong Poon TICK_TO_MSEC(time_to_wait); 763721fffe3SKacheong Poon /* 764721fffe3SKacheong Poon * If the timer fires too early, 1 clock tick earlier, 765721fffe3SKacheong Poon * restart the timer. 766721fffe3SKacheong Poon */ 767721fffe3SKacheong Poon if (time_to_wait > msec_per_tick) { 768721fffe3SKacheong Poon TCP_STAT(tcps, tcp_timer_fire_early); 769721fffe3SKacheong Poon TCP_TIMER_RESTART(tcp, time_to_wait); 770721fffe3SKacheong Poon return; 771721fffe3SKacheong Poon } 772721fffe3SKacheong Poon /* 773721fffe3SKacheong Poon * When we probe zero windows, we force the swnd open. 774721fffe3SKacheong Poon * If our peer acks with a closed window swnd will be 775721fffe3SKacheong Poon * set to zero by tcp_rput(). As long as we are 776721fffe3SKacheong Poon * receiving acks tcp_rput will 777721fffe3SKacheong Poon * reset 'tcp_ms_we_have_waited' so as not to trip the 778721fffe3SKacheong Poon * first and second interval actions. NOTE: the timer 779721fffe3SKacheong Poon * interval is allowed to continue its exponential 780721fffe3SKacheong Poon * backoff. 781721fffe3SKacheong Poon */ 782721fffe3SKacheong Poon if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) { 783721fffe3SKacheong Poon if (connp->conn_debug) { 784721fffe3SKacheong Poon (void) strlog(TCP_MOD_ID, 0, 1, 785721fffe3SKacheong Poon SL_TRACE, "tcp_timer: zero win"); 786721fffe3SKacheong Poon } 787721fffe3SKacheong Poon } else { 788721fffe3SKacheong Poon /* 789721fffe3SKacheong Poon * After retransmission, we need to do 790721fffe3SKacheong Poon * slow start. Set the ssthresh to one 791721fffe3SKacheong Poon * half of current effective window and 792721fffe3SKacheong Poon * cwnd to one MSS. Also reset 793721fffe3SKacheong Poon * tcp_cwnd_cnt. 794721fffe3SKacheong Poon * 795721fffe3SKacheong Poon * Note that if tcp_ssthresh is reduced because 796721fffe3SKacheong Poon * of ECN, do not reduce it again unless it is 797721fffe3SKacheong Poon * already one window of data away (tcp_cwr 798721fffe3SKacheong Poon * should then be cleared) or this is a 799721fffe3SKacheong Poon * timeout for a retransmitted segment. 800721fffe3SKacheong Poon */ 801721fffe3SKacheong Poon uint32_t npkt; 802721fffe3SKacheong Poon 803721fffe3SKacheong Poon if (!tcp->tcp_cwr || tcp->tcp_rexmit) { 804721fffe3SKacheong Poon npkt = ((tcp->tcp_timer_backoff ? 805721fffe3SKacheong Poon tcp->tcp_cwnd_ssthresh : 806721fffe3SKacheong Poon tcp->tcp_snxt - 807721fffe3SKacheong Poon tcp->tcp_suna) >> 1) / tcp->tcp_mss; 808721fffe3SKacheong Poon tcp->tcp_cwnd_ssthresh = MAX(npkt, 2) * 809721fffe3SKacheong Poon tcp->tcp_mss; 810721fffe3SKacheong Poon } 811721fffe3SKacheong Poon tcp->tcp_cwnd = tcp->tcp_mss; 812721fffe3SKacheong Poon tcp->tcp_cwnd_cnt = 0; 813721fffe3SKacheong Poon if (tcp->tcp_ecn_ok) { 814721fffe3SKacheong Poon tcp->tcp_cwr = B_TRUE; 815721fffe3SKacheong Poon tcp->tcp_cwr_snd_max = tcp->tcp_snxt; 816721fffe3SKacheong Poon tcp->tcp_ecn_cwr_sent = B_FALSE; 817721fffe3SKacheong Poon } 818721fffe3SKacheong Poon } 819721fffe3SKacheong Poon break; 820721fffe3SKacheong Poon } 821721fffe3SKacheong Poon /* 822721fffe3SKacheong Poon * We have something to send yet we cannot send. The 823721fffe3SKacheong Poon * reason can be: 824721fffe3SKacheong Poon * 825721fffe3SKacheong Poon * 1. Zero send window: we need to do zero window probe. 826721fffe3SKacheong Poon * 2. Zero cwnd: because of ECN, we need to "clock out 827721fffe3SKacheong Poon * segments. 828721fffe3SKacheong Poon * 3. SWS avoidance: receiver may have shrunk window, 829721fffe3SKacheong Poon * reset our knowledge. 830721fffe3SKacheong Poon * 831721fffe3SKacheong Poon * Note that condition 2 can happen with either 1 or 832721fffe3SKacheong Poon * 3. But 1 and 3 are exclusive. 833721fffe3SKacheong Poon */ 834721fffe3SKacheong Poon if (tcp->tcp_unsent != 0) { 835721fffe3SKacheong Poon /* 836721fffe3SKacheong Poon * Should not hold the zero-copy messages for too long. 837721fffe3SKacheong Poon */ 838721fffe3SKacheong Poon if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean) 839721fffe3SKacheong Poon tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp, 840721fffe3SKacheong Poon tcp->tcp_xmit_head, B_TRUE); 841721fffe3SKacheong Poon 842721fffe3SKacheong Poon if (tcp->tcp_cwnd == 0) { 843721fffe3SKacheong Poon /* 844721fffe3SKacheong Poon * Set tcp_cwnd to 1 MSS so that a 845721fffe3SKacheong Poon * new segment can be sent out. We 846721fffe3SKacheong Poon * are "clocking out" new data when 847721fffe3SKacheong Poon * the network is really congested. 848721fffe3SKacheong Poon */ 849721fffe3SKacheong Poon ASSERT(tcp->tcp_ecn_ok); 850721fffe3SKacheong Poon tcp->tcp_cwnd = tcp->tcp_mss; 851721fffe3SKacheong Poon } 852721fffe3SKacheong Poon if (tcp->tcp_swnd == 0) { 853721fffe3SKacheong Poon /* Extend window for zero window probe */ 854721fffe3SKacheong Poon tcp->tcp_swnd++; 855721fffe3SKacheong Poon tcp->tcp_zero_win_probe = B_TRUE; 856721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpOutWinProbe); 857721fffe3SKacheong Poon } else { 858721fffe3SKacheong Poon /* 859721fffe3SKacheong Poon * Handle timeout from sender SWS avoidance. 860721fffe3SKacheong Poon * Reset our knowledge of the max send window 861721fffe3SKacheong Poon * since the receiver might have reduced its 862721fffe3SKacheong Poon * receive buffer. Avoid setting tcp_max_swnd 863721fffe3SKacheong Poon * to one since that will essentially disable 864721fffe3SKacheong Poon * the SWS checks. 865721fffe3SKacheong Poon * 866721fffe3SKacheong Poon * Note that since we don't have a SWS 867721fffe3SKacheong Poon * state variable, if the timeout is set 868721fffe3SKacheong Poon * for ECN but not for SWS, this 869721fffe3SKacheong Poon * code will also be executed. This is 870721fffe3SKacheong Poon * fine as tcp_max_swnd is updated 871721fffe3SKacheong Poon * constantly and it will not affect 872721fffe3SKacheong Poon * anything. 873721fffe3SKacheong Poon */ 874721fffe3SKacheong Poon tcp->tcp_max_swnd = MAX(tcp->tcp_swnd, 2); 875721fffe3SKacheong Poon } 876721fffe3SKacheong Poon tcp_wput_data(tcp, NULL, B_FALSE); 877721fffe3SKacheong Poon return; 878721fffe3SKacheong Poon } 879721fffe3SKacheong Poon /* Is there a FIN that needs to be to re retransmitted? */ 880721fffe3SKacheong Poon if ((tcp->tcp_valid_bits & TCP_FSS_VALID) && 881721fffe3SKacheong Poon !tcp->tcp_fin_acked) 882721fffe3SKacheong Poon break; 883721fffe3SKacheong Poon /* Nothing to do, return without restarting timer. */ 884721fffe3SKacheong Poon TCP_STAT(tcps, tcp_timer_fire_miss); 885721fffe3SKacheong Poon return; 886721fffe3SKacheong Poon case TCPS_FIN_WAIT_2: 887721fffe3SKacheong Poon /* 888721fffe3SKacheong Poon * User closed the TCP endpoint and peer ACK'ed our FIN. 889721fffe3SKacheong Poon * We waited some time for for peer's FIN, but it hasn't 890721fffe3SKacheong Poon * arrived. We flush the connection now to avoid 891721fffe3SKacheong Poon * case where the peer has rebooted. 892721fffe3SKacheong Poon */ 893721fffe3SKacheong Poon if (TCP_IS_DETACHED(tcp)) { 894721fffe3SKacheong Poon (void) tcp_clean_death(tcp, 0); 895721fffe3SKacheong Poon } else { 896721fffe3SKacheong Poon TCP_TIMER_RESTART(tcp, 897707e74bcSKacheong Poon tcp->tcp_fin_wait_2_flush_interval); 898721fffe3SKacheong Poon } 899721fffe3SKacheong Poon return; 900721fffe3SKacheong Poon case TCPS_TIME_WAIT: 901721fffe3SKacheong Poon (void) tcp_clean_death(tcp, 0); 902721fffe3SKacheong Poon return; 903721fffe3SKacheong Poon default: 904721fffe3SKacheong Poon if (connp->conn_debug) { 905721fffe3SKacheong Poon (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR, 906721fffe3SKacheong Poon "tcp_timer: strange state (%d) %s", 907721fffe3SKacheong Poon tcp->tcp_state, tcp_display(tcp, NULL, 908721fffe3SKacheong Poon DISP_PORT_ONLY)); 909721fffe3SKacheong Poon } 910721fffe3SKacheong Poon return; 911721fffe3SKacheong Poon } 912721fffe3SKacheong Poon 913721fffe3SKacheong Poon /* 914721fffe3SKacheong Poon * If the system is under memory pressure or the max number of 915721fffe3SKacheong Poon * connections have been established for the listener, be more 916721fffe3SKacheong Poon * aggressive in aborting connections. 917721fffe3SKacheong Poon */ 918721fffe3SKacheong Poon if (tcps->tcps_reclaim || (tcp->tcp_listen_cnt != NULL && 919721fffe3SKacheong Poon tcp->tcp_listen_cnt->tlc_cnt > tcp->tcp_listen_cnt->tlc_max)) { 920721fffe3SKacheong Poon second_threshold = tcp_early_abort * SECONDS; 921707e74bcSKacheong Poon 922707e74bcSKacheong Poon /* We will ignore the never timeout promise in this case... */ 923707e74bcSKacheong Poon dont_timeout = B_FALSE; 924721fffe3SKacheong Poon } 925721fffe3SKacheong Poon 9265dd46ab5SKacheong Poon ASSERT(second_threshold != 0); 927707e74bcSKacheong Poon 928721fffe3SKacheong Poon if ((ms = tcp->tcp_ms_we_have_waited) > second_threshold) { 929721fffe3SKacheong Poon /* 930721fffe3SKacheong Poon * Should not hold the zero-copy messages for too long. 931721fffe3SKacheong Poon */ 932721fffe3SKacheong Poon if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean) 933721fffe3SKacheong Poon tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp, 934721fffe3SKacheong Poon tcp->tcp_xmit_head, B_TRUE); 935721fffe3SKacheong Poon 9365dd46ab5SKacheong Poon if (dont_timeout) { 9375dd46ab5SKacheong Poon /* 9385dd46ab5SKacheong Poon * Reset tcp_ms_we_have_waited to avoid overflow since 9395dd46ab5SKacheong Poon * we are going to retransmit forever. 9405dd46ab5SKacheong Poon */ 9415dd46ab5SKacheong Poon tcp->tcp_ms_we_have_waited = second_threshold; 942707e74bcSKacheong Poon goto timer_rexmit; 9435dd46ab5SKacheong Poon } 944707e74bcSKacheong Poon 945721fffe3SKacheong Poon /* 946721fffe3SKacheong Poon * For zero window probe, we need to send indefinitely, 947721fffe3SKacheong Poon * unless we have not heard from the other side for some 948721fffe3SKacheong Poon * time... 949721fffe3SKacheong Poon */ 950721fffe3SKacheong Poon if ((tcp->tcp_zero_win_probe == 0) || 951721fffe3SKacheong Poon (TICK_TO_MSEC(ddi_get_lbolt() - tcp->tcp_last_recv_time) > 952721fffe3SKacheong Poon second_threshold)) { 953721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpTimRetransDrop); 954721fffe3SKacheong Poon /* 955721fffe3SKacheong Poon * If TCP is in SYN_RCVD state, send back a 956721fffe3SKacheong Poon * RST|ACK as BSD does. Note that tcp_zero_win_probe 957721fffe3SKacheong Poon * should be zero in TCPS_SYN_RCVD state. 958721fffe3SKacheong Poon */ 959721fffe3SKacheong Poon if (tcp->tcp_state == TCPS_SYN_RCVD) { 960721fffe3SKacheong Poon tcp_xmit_ctl("tcp_timer: RST sent on timeout " 961721fffe3SKacheong Poon "in SYN_RCVD", 962721fffe3SKacheong Poon tcp, tcp->tcp_snxt, 963721fffe3SKacheong Poon tcp->tcp_rnxt, TH_RST | TH_ACK); 964721fffe3SKacheong Poon } 965721fffe3SKacheong Poon (void) tcp_clean_death(tcp, 966721fffe3SKacheong Poon tcp->tcp_client_errno ? 967721fffe3SKacheong Poon tcp->tcp_client_errno : ETIMEDOUT); 968721fffe3SKacheong Poon return; 969721fffe3SKacheong Poon } else { 970721fffe3SKacheong Poon /* 971721fffe3SKacheong Poon * If the system is under memory pressure, we also 972721fffe3SKacheong Poon * abort connection in zero window probing. 973721fffe3SKacheong Poon */ 974721fffe3SKacheong Poon if (tcps->tcps_reclaim) { 975721fffe3SKacheong Poon (void) tcp_clean_death(tcp, 976721fffe3SKacheong Poon tcp->tcp_client_errno ? 977721fffe3SKacheong Poon tcp->tcp_client_errno : ETIMEDOUT); 978721fffe3SKacheong Poon TCP_STAT(tcps, tcp_zwin_mem_drop); 979721fffe3SKacheong Poon return; 980721fffe3SKacheong Poon } 981721fffe3SKacheong Poon /* 982721fffe3SKacheong Poon * Set tcp_ms_we_have_waited to second_threshold 983721fffe3SKacheong Poon * so that in next timeout, we will do the above 984721fffe3SKacheong Poon * check (ddi_get_lbolt() - tcp_last_recv_time). 985721fffe3SKacheong Poon * This is also to avoid overflow. 986721fffe3SKacheong Poon * 987721fffe3SKacheong Poon * We don't need to decrement tcp_timer_backoff 988721fffe3SKacheong Poon * to avoid overflow because it will be decremented 989721fffe3SKacheong Poon * later if new timeout value is greater than 990707e74bcSKacheong Poon * tcp_rto_max. In the case when tcp_rto_max is 991707e74bcSKacheong Poon * greater than second_threshold, it means that we 992707e74bcSKacheong Poon * will wait longer than second_threshold to send 993707e74bcSKacheong Poon * the next 994721fffe3SKacheong Poon * window probe. 995721fffe3SKacheong Poon */ 996721fffe3SKacheong Poon tcp->tcp_ms_we_have_waited = second_threshold; 997721fffe3SKacheong Poon } 998721fffe3SKacheong Poon } else if (ms > first_threshold) { 999721fffe3SKacheong Poon /* 1000721fffe3SKacheong Poon * Should not hold the zero-copy messages for too long. 1001721fffe3SKacheong Poon */ 1002721fffe3SKacheong Poon if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean) 1003721fffe3SKacheong Poon tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp, 1004721fffe3SKacheong Poon tcp->tcp_xmit_head, B_TRUE); 1005721fffe3SKacheong Poon 1006721fffe3SKacheong Poon /* 1007721fffe3SKacheong Poon * We have been retransmitting for too long... The RTT 1008721fffe3SKacheong Poon * we calculated is probably incorrect. Reinitialize it. 1009721fffe3SKacheong Poon * Need to compensate for 0 tcp_rtt_sa. Reset 1010721fffe3SKacheong Poon * tcp_rtt_update so that we won't accidentally cache a 1011721fffe3SKacheong Poon * bad value. But only do this if this is not a zero 1012721fffe3SKacheong Poon * window probe. 1013721fffe3SKacheong Poon */ 1014721fffe3SKacheong Poon if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) { 1015721fffe3SKacheong Poon tcp->tcp_rtt_sd += (tcp->tcp_rtt_sa >> 3) + 1016721fffe3SKacheong Poon (tcp->tcp_rtt_sa >> 5); 1017721fffe3SKacheong Poon tcp->tcp_rtt_sa = 0; 1018721fffe3SKacheong Poon tcp_ip_notify(tcp); 1019721fffe3SKacheong Poon tcp->tcp_rtt_update = 0; 1020721fffe3SKacheong Poon } 1021721fffe3SKacheong Poon } 1022707e74bcSKacheong Poon 1023707e74bcSKacheong Poon timer_rexmit: 1024721fffe3SKacheong Poon tcp->tcp_timer_backoff++; 1025721fffe3SKacheong Poon if ((ms = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd + 1026721fffe3SKacheong Poon tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5)) < 1027707e74bcSKacheong Poon tcp->tcp_rto_min) { 1028721fffe3SKacheong Poon /* 1029721fffe3SKacheong Poon * This means the original RTO is tcp_rexmit_interval_min. 1030721fffe3SKacheong Poon * So we will use tcp_rexmit_interval_min as the RTO value 1031721fffe3SKacheong Poon * and do the backoff. 1032721fffe3SKacheong Poon */ 1033707e74bcSKacheong Poon ms = tcp->tcp_rto_min << tcp->tcp_timer_backoff; 1034721fffe3SKacheong Poon } else { 1035721fffe3SKacheong Poon ms <<= tcp->tcp_timer_backoff; 1036721fffe3SKacheong Poon } 1037707e74bcSKacheong Poon if (ms > tcp->tcp_rto_max) { 1038707e74bcSKacheong Poon ms = tcp->tcp_rto_max; 1039721fffe3SKacheong Poon /* 1040721fffe3SKacheong Poon * ms is at max, decrement tcp_timer_backoff to avoid 1041721fffe3SKacheong Poon * overflow. 1042721fffe3SKacheong Poon */ 1043721fffe3SKacheong Poon tcp->tcp_timer_backoff--; 1044721fffe3SKacheong Poon } 1045721fffe3SKacheong Poon tcp->tcp_ms_we_have_waited += ms; 1046721fffe3SKacheong Poon if (tcp->tcp_zero_win_probe == 0) { 1047721fffe3SKacheong Poon tcp->tcp_rto = ms; 1048721fffe3SKacheong Poon } 1049721fffe3SKacheong Poon TCP_TIMER_RESTART(tcp, ms); 1050721fffe3SKacheong Poon /* 1051721fffe3SKacheong Poon * This is after a timeout and tcp_rto is backed off. Set 1052721fffe3SKacheong Poon * tcp_set_timer to 1 so that next time RTO is updated, we will 1053721fffe3SKacheong Poon * restart the timer with a correct value. 1054721fffe3SKacheong Poon */ 1055721fffe3SKacheong Poon tcp->tcp_set_timer = 1; 1056721fffe3SKacheong Poon mss = tcp->tcp_snxt - tcp->tcp_suna; 1057721fffe3SKacheong Poon if (mss > tcp->tcp_mss) 1058721fffe3SKacheong Poon mss = tcp->tcp_mss; 1059721fffe3SKacheong Poon if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0) 1060721fffe3SKacheong Poon mss = tcp->tcp_swnd; 1061721fffe3SKacheong Poon 1062721fffe3SKacheong Poon if ((mp = tcp->tcp_xmit_head) != NULL) 1063721fffe3SKacheong Poon mp->b_prev = (mblk_t *)ddi_get_lbolt(); 1064721fffe3SKacheong Poon mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss, 1065721fffe3SKacheong Poon B_TRUE); 1066721fffe3SKacheong Poon 1067721fffe3SKacheong Poon /* 1068721fffe3SKacheong Poon * When slow start after retransmission begins, start with 1069721fffe3SKacheong Poon * this seq no. tcp_rexmit_max marks the end of special slow 1070*633fc3a6SSebastien Roy * start phase. 1071721fffe3SKacheong Poon */ 1072721fffe3SKacheong Poon tcp->tcp_rexmit_nxt = tcp->tcp_suna; 1073721fffe3SKacheong Poon if ((tcp->tcp_valid_bits & TCP_FSS_VALID) && 1074721fffe3SKacheong Poon (tcp->tcp_unsent == 0)) { 1075721fffe3SKacheong Poon tcp->tcp_rexmit_max = tcp->tcp_fss; 1076721fffe3SKacheong Poon } else { 1077721fffe3SKacheong Poon tcp->tcp_rexmit_max = tcp->tcp_snxt; 1078721fffe3SKacheong Poon } 1079721fffe3SKacheong Poon tcp->tcp_rexmit = B_TRUE; 1080721fffe3SKacheong Poon tcp->tcp_dupack_cnt = 0; 1081721fffe3SKacheong Poon 1082721fffe3SKacheong Poon /* 1083721fffe3SKacheong Poon * Remove all rexmit SACK blk to start from fresh. 1084721fffe3SKacheong Poon */ 108566cd0f60SKacheong Poon if (tcp->tcp_snd_sack_ok) 1086721fffe3SKacheong Poon TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp); 1087721fffe3SKacheong Poon if (mp == NULL) { 1088721fffe3SKacheong Poon return; 1089721fffe3SKacheong Poon } 1090721fffe3SKacheong Poon 1091721fffe3SKacheong Poon tcp->tcp_csuna = tcp->tcp_snxt; 1092721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpRetransSegs); 1093721fffe3SKacheong Poon TCPS_UPDATE_MIB(tcps, tcpRetransBytes, mss); 1094721fffe3SKacheong Poon tcp_send_data(tcp, mp); 1095721fffe3SKacheong Poon 1096721fffe3SKacheong Poon } 1097721fffe3SKacheong Poon 1098721fffe3SKacheong Poon /* 1099721fffe3SKacheong Poon * Handle lingering timeouts. This function is called when the SO_LINGER timeout 1100721fffe3SKacheong Poon * expires. 1101721fffe3SKacheong Poon */ 1102721fffe3SKacheong Poon void 1103721fffe3SKacheong Poon tcp_close_linger_timeout(void *arg) 1104721fffe3SKacheong Poon { 1105721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 1106721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 1107721fffe3SKacheong Poon 1108721fffe3SKacheong Poon tcp->tcp_client_errno = ETIMEDOUT; 1109721fffe3SKacheong Poon tcp_stop_lingering(tcp); 1110721fffe3SKacheong Poon } 1111