1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _INET_TCP_IMPL_H 27 #define _INET_TCP_IMPL_H 28 29 /* 30 * TCP implementation private declarations. These interfaces are 31 * used to build the IP module and are not meant to be accessed 32 * by any modules except IP itself. They are undocumented and are 33 * subject to change without notice. 34 */ 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #ifdef _KERNEL 41 42 #include <inet/optcom.h> 43 #include <inet/tcp.h> 44 45 #define TCP_MOD_ID 5105 46 47 /* 48 * Was this tcp created via socket() interface? 49 */ 50 #define TCP_IS_SOCKET(tcp) ((tcp)->tcp_issocket) 51 52 /* 53 * Is this tcp not attached to any upper client? 54 */ 55 #define TCP_IS_DETACHED(tcp) ((tcp)->tcp_detached) 56 57 #define TCP_TIMER(tcp, f, tim) \ 58 tcp_timeout(tcp->tcp_connp, f, tim) 59 #define TCP_TIMER_CANCEL(tcp, id) \ 60 tcp_timeout_cancel(tcp->tcp_connp, id) 61 62 /* 63 * To restart the TCP retransmission timer. 64 */ 65 #define TCP_TIMER_RESTART(tcp, intvl) { \ 66 if ((tcp)->tcp_timer_tid != 0) \ 67 (void) TCP_TIMER_CANCEL((tcp), (tcp)->tcp_timer_tid); \ 68 (tcp)->tcp_timer_tid = TCP_TIMER((tcp), tcp_timer, \ 69 MSEC_TO_TICK(intvl)); \ 70 } 71 72 /* 73 * This stops synchronous streams for a fused tcp endpoint 74 * and prevents tcp_fuse_rrw() from pulling data from it. 75 */ 76 #define TCP_FUSE_SYNCSTR_STOP(tcp) { \ 77 if ((tcp)->tcp_direct_sockfs) { \ 78 mutex_enter(&(tcp)->tcp_non_sq_lock); \ 79 (tcp)->tcp_fuse_syncstr_stopped = B_TRUE; \ 80 mutex_exit(&(tcp)->tcp_non_sq_lock); \ 81 } \ 82 } 83 84 /* 85 * This causes all calls to tcp_fuse_rrw() to block until 86 * TCP_FUSE_SYNCSTR_UNPLUG_DRAIN() is called. 87 */ 88 #define TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp) { \ 89 if ((tcp)->tcp_direct_sockfs) { \ 90 mutex_enter(&(tcp)->tcp_non_sq_lock); \ 91 ASSERT(!(tcp)->tcp_fuse_syncstr_plugged); \ 92 (tcp)->tcp_fuse_syncstr_plugged = B_TRUE; \ 93 mutex_exit(&(tcp)->tcp_non_sq_lock); \ 94 } \ 95 } 96 97 /* 98 * This unplugs the draining of data through tcp_fuse_rrw(); see 99 * the comments in tcp_fuse_rrw() for how we preserve ordering. 100 */ 101 #define TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp) { \ 102 if ((tcp)->tcp_direct_sockfs) { \ 103 mutex_enter(&(tcp)->tcp_non_sq_lock); \ 104 (tcp)->tcp_fuse_syncstr_plugged = B_FALSE; \ 105 (void) cv_broadcast(&(tcp)->tcp_fuse_plugcv); \ 106 mutex_exit(&(tcp)->tcp_non_sq_lock); \ 107 } \ 108 } 109 110 /* 111 * Before caching the conn IRE, we need to make sure certain TCP 112 * states are in sync with the ire. The mismatch could occur if the 113 * TCP state has been set in tcp_adapt_ire() using a different IRE, 114 * e.g if an address was not present during an initial connect(), 115 * tcp_adapt_ire() will set the state using the interface route. 116 * Subsequently, if the address is added to the local machine, the 117 * retransmitted SYN will get the correct (loopback) IRE, but the TCP 118 * state (tcp_loopback and tcp_localnet) will remain out of sync. 119 * This is especially an issue with TCP fusion which relies on the 120 * TCP state to be accurate. 121 * 122 * This check/change should be made only if the TCP is not yet in 123 * the established state, else it would lead to inconsistencies. 124 */ 125 #define TCP_CHECK_IREINFO(tcp, ire) { \ 126 if ((tcp)->tcp_state < TCPS_ESTABLISHED) { \ 127 if (((ire)->ire_type & (IRE_LOOPBACK | \ 128 IRE_LOCAL)) && !(tcp)->tcp_loopback) { \ 129 (tcp)->tcp_loopback = B_TRUE; \ 130 } else if ((tcp)->tcp_loopback && \ 131 !((ire)->ire_type & (IRE_LOOPBACK | IRE_LOCAL))) { \ 132 (tcp)->tcp_loopback = B_FALSE; \ 133 } \ 134 if ((tcp)->tcp_ipversion == IPV4_VERSION) { \ 135 (tcp)->tcp_localnet = \ 136 ((ire)->ire_gateway_addr == 0); \ 137 } else { \ 138 (tcp)->tcp_localnet = \ 139 IN6_IS_ADDR_UNSPECIFIED( \ 140 &(ire)->ire_gateway_addr_v6); \ 141 } \ 142 } \ 143 } 144 145 /* 146 * Write-side flow-control is implemented via the per instance STREAMS 147 * write-side Q by explicitly setting QFULL to stop the flow of mblk_t(s) 148 * and clearing QFULL and calling qbackenable() to restart the flow based 149 * on the number of TCP unsent bytes (i.e. those not on the wire waiting 150 * for a remote ACK). 151 * 152 * This is different than a standard STREAMS kmod which when using the 153 * STREAMS Q the framework would automatictly flow-control based on the 154 * defined hiwat/lowat values as mblk_t's are enqueued/dequeued. 155 * 156 * As of FireEngine TCP write-side flow-control needs to take into account 157 * both the unsent tcp_xmit list bytes but also any squeue_t enqueued bytes 158 * (i.e. from tcp_wput() -> tcp_output()). 159 * 160 * This is accomplished by adding a new tcp_t fields, tcp_squeue_bytes, to 161 * count the number of bytes enqueued by tcp_wput() and the number of bytes 162 * dequeued and processed by tcp_output(). 163 * 164 * So, the total number of bytes unsent is (squeue_bytes + unsent) with all 165 * flow-control uses of unsent replaced with the macro TCP_UNSENT_BYTES. 166 */ 167 extern void tcp_clrqfull(tcp_t *); 168 extern void tcp_setqfull(tcp_t *); 169 170 #define TCP_UNSENT_BYTES(tcp) \ 171 ((tcp)->tcp_squeue_bytes + (tcp)->tcp_unsent) 172 173 /* Named Dispatch Parameter Management Structure */ 174 typedef struct tcpparam_s { 175 uint32_t tcp_param_min; 176 uint32_t tcp_param_max; 177 uint32_t tcp_param_val; 178 char *tcp_param_name; 179 } tcpparam_t; 180 181 182 #define tcps_time_wait_interval tcps_params[0].tcp_param_val 183 #define tcps_conn_req_max_q tcps_params[1].tcp_param_val 184 #define tcps_conn_req_max_q0 tcps_params[2].tcp_param_val 185 #define tcps_conn_req_min tcps_params[3].tcp_param_val 186 #define tcps_conn_grace_period tcps_params[4].tcp_param_val 187 #define tcps_cwnd_max_ tcps_params[5].tcp_param_val 188 #define tcps_dbg tcps_params[6].tcp_param_val 189 #define tcps_smallest_nonpriv_port tcps_params[7].tcp_param_val 190 #define tcps_ip_abort_cinterval tcps_params[8].tcp_param_val 191 #define tcps_ip_abort_linterval tcps_params[9].tcp_param_val 192 #define tcps_ip_abort_interval tcps_params[10].tcp_param_val 193 #define tcps_ip_notify_cinterval tcps_params[11].tcp_param_val 194 #define tcps_ip_notify_interval tcps_params[12].tcp_param_val 195 #define tcps_ipv4_ttl tcps_params[13].tcp_param_val 196 #define tcps_keepalive_interval_high tcps_params[14].tcp_param_max 197 #define tcps_keepalive_interval tcps_params[14].tcp_param_val 198 #define tcps_keepalive_interval_low tcps_params[14].tcp_param_min 199 #define tcps_maxpsz_multiplier tcps_params[15].tcp_param_val 200 #define tcps_mss_def_ipv4 tcps_params[16].tcp_param_val 201 #define tcps_mss_max_ipv4 tcps_params[17].tcp_param_val 202 #define tcps_mss_min tcps_params[18].tcp_param_val 203 #define tcps_naglim_def tcps_params[19].tcp_param_val 204 #define tcps_rexmit_interval_initial tcps_params[20].tcp_param_val 205 #define tcps_rexmit_interval_max tcps_params[21].tcp_param_val 206 #define tcps_rexmit_interval_min tcps_params[22].tcp_param_val 207 #define tcps_deferred_ack_interval tcps_params[23].tcp_param_val 208 #define tcps_snd_lowat_fraction tcps_params[24].tcp_param_val 209 #define tcps_sth_rcv_hiwat tcps_params[25].tcp_param_val 210 #define __tcps_not_used1 tcps_params[26].tcp_param_val 211 #define tcps_dupack_fast_retransmit tcps_params[27].tcp_param_val 212 #define tcps_ignore_path_mtu tcps_params[28].tcp_param_val 213 #define tcps_smallest_anon_port tcps_params[29].tcp_param_val 214 #define tcps_largest_anon_port tcps_params[30].tcp_param_val 215 #define tcps_xmit_hiwat tcps_params[31].tcp_param_val 216 #define tcps_xmit_lowat tcps_params[32].tcp_param_val 217 #define tcps_recv_hiwat tcps_params[33].tcp_param_val 218 #define tcps_recv_hiwat_minmss tcps_params[34].tcp_param_val 219 #define tcps_fin_wait_2_flush_interval tcps_params[35].tcp_param_val 220 #define tcps_max_buf tcps_params[36].tcp_param_val 221 #define tcps_strong_iss tcps_params[37].tcp_param_val 222 #define tcps_rtt_updates tcps_params[38].tcp_param_val 223 #define tcps_wscale_always tcps_params[39].tcp_param_val 224 #define tcps_tstamp_always tcps_params[40].tcp_param_val 225 #define tcps_tstamp_if_wscale tcps_params[41].tcp_param_val 226 #define tcps_rexmit_interval_extra tcps_params[42].tcp_param_val 227 #define tcps_deferred_acks_max tcps_params[43].tcp_param_val 228 #define tcps_slow_start_after_idle tcps_params[44].tcp_param_val 229 #define tcps_slow_start_initial tcps_params[45].tcp_param_val 230 #define tcps_sack_permitted tcps_params[46].tcp_param_val 231 #define __tcps_not_used2 tcps_params[47].tcp_param_val 232 #define tcps_ipv6_hoplimit tcps_params[48].tcp_param_val 233 #define tcps_mss_def_ipv6 tcps_params[49].tcp_param_val 234 #define tcps_mss_max_ipv6 tcps_params[50].tcp_param_val 235 #define tcps_rev_src_routes tcps_params[51].tcp_param_val 236 #define tcps_local_dack_interval tcps_params[52].tcp_param_val 237 #define tcps_ndd_get_info_interval tcps_params[53].tcp_param_val 238 #define tcps_local_dacks_max tcps_params[54].tcp_param_val 239 #define tcps_ecn_permitted tcps_params[55].tcp_param_val 240 #define tcps_rst_sent_rate_enabled tcps_params[56].tcp_param_val 241 #define tcps_rst_sent_rate tcps_params[57].tcp_param_val 242 #define tcps_push_timer_interval tcps_params[58].tcp_param_val 243 #define tcps_use_smss_as_mss_opt tcps_params[59].tcp_param_val 244 #define tcps_keepalive_abort_interval_high tcps_params[60].tcp_param_max 245 #define tcps_keepalive_abort_interval tcps_params[60].tcp_param_val 246 #define tcps_keepalive_abort_interval_low tcps_params[60].tcp_param_min 247 248 extern struct qinit tcp_loopback_rinit, tcp_rinitv4, tcp_rinitv6; 249 extern boolean_t do_tcp_fusion; 250 251 extern int tcp_maxpsz_set(tcp_t *, boolean_t); 252 extern void tcp_timers_stop(tcp_t *); 253 extern void tcp_rcv_enqueue(tcp_t *, mblk_t *, uint_t); 254 extern void tcp_push_timer(void *); 255 extern timeout_id_t tcp_timeout(conn_t *, void (*)(void *), clock_t); 256 extern clock_t tcp_timeout_cancel(conn_t *, timeout_id_t); 257 258 extern void tcp_fuse(tcp_t *, uchar_t *, tcph_t *); 259 extern void tcp_unfuse(tcp_t *); 260 extern boolean_t tcp_fuse_output(tcp_t *, mblk_t *, uint32_t); 261 extern void tcp_fuse_output_urg(tcp_t *, mblk_t *); 262 extern boolean_t tcp_fuse_rcv_drain(queue_t *, tcp_t *, mblk_t **); 263 extern void tcp_fuse_syncstr_enable_pair(tcp_t *); 264 extern void tcp_fuse_disable_pair(tcp_t *, boolean_t); 265 extern int tcp_fuse_rrw(queue_t *, struiod_t *); 266 extern int tcp_fuse_rinfop(queue_t *, infod_t *); 267 extern size_t tcp_fuse_set_rcv_hiwat(tcp_t *, size_t); 268 extern int tcp_fuse_maxpsz_set(tcp_t *); 269 270 /* 271 * Object to represent database of options to search passed to 272 * {sock,tpi}optcom_req() interface routine to take care of option 273 * management and associated methods. 274 */ 275 extern optdb_obj_t tcp_opt_obj; 276 extern uint_t tcp_max_optsize; 277 278 extern sock_lower_handle_t tcp_create(int, int, int, sock_downcalls_t **, 279 uint_t *, int *, int, cred_t *); 280 extern int tcp_fallback(sock_lower_handle_t, queue_t *, boolean_t, 281 so_proto_quiesced_cb_t); 282 283 extern sock_downcalls_t sock_tcp_downcalls; 284 285 286 #endif /* _KERNEL */ 287 288 #ifdef __cplusplus 289 } 290 #endif 291 292 #endif /* _INET_TCP_IMPL_H */ 293