xref: /titanic_51/usr/src/uts/common/inet/tcp_impl.h (revision c8343062f6e25afd9c2a31b65df357030e69fa55)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_INET_TCP_IMPL_H
28 #define	_INET_TCP_IMPL_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * TCP implementation private declarations.  These interfaces are
34  * used to build the IP module and are not meant to be accessed
35  * by any modules except IP itself.  They are undocumented and are
36  * subject to change without notice.
37  */
38 
39 #ifdef	__cplusplus
40 extern "C" {
41 #endif
42 
43 #ifdef _KERNEL
44 
45 #include <inet/tcp.h>
46 
47 #define	TCP_MOD_ID	5105
48 
49 /*
50  * Was this tcp created via socket() interface?
51  */
52 #define	TCP_IS_SOCKET(tcp)	((tcp)->tcp_issocket)
53 
54 /*
55  * Is this tcp not attached to any upper client?
56  */
57 #define	TCP_IS_DETACHED(tcp)	((tcp)->tcp_detached)
58 
59 #define	TCP_TIMER(tcp, f, tim)		\
60 	tcp_timeout(tcp->tcp_connp, f, tim)
61 #define	TCP_TIMER_CANCEL(tcp, id)	\
62 	tcp_timeout_cancel(tcp->tcp_connp, id)
63 
64 /*
65  * To restart the TCP retransmission timer.
66  */
67 #define	TCP_TIMER_RESTART(tcp, intvl) {					\
68 	if ((tcp)->tcp_timer_tid != 0)					\
69 		(void) TCP_TIMER_CANCEL((tcp), (tcp)->tcp_timer_tid);	\
70 	(tcp)->tcp_timer_tid = TCP_TIMER((tcp), tcp_timer,		\
71 	    MSEC_TO_TICK(intvl));					\
72 }
73 
74 /*
75  * This stops synchronous streams for a fused tcp endpoint
76  * and prevents tcp_rrw() from pulling data from it.
77  */
78 #define	TCP_FUSE_SYNCSTR_STOP(tcp) {				\
79 	if ((tcp)->tcp_direct_sockfs) {				\
80 		mutex_enter(&(tcp)->tcp_fuse_lock);		\
81 		(tcp)->tcp_fuse_syncstr_stopped = B_TRUE;	\
82 		mutex_exit(&(tcp)->tcp_fuse_lock);		\
83 	}							\
84 }
85 
86 /*
87  * This resumes synchronous streams for this fused tcp endpoint
88  * and allows tcp_rrw() to pull data from it again.
89  */
90 #define	TCP_FUSE_SYNCSTR_RESUME(tcp) {				\
91 	if ((tcp)->tcp_direct_sockfs) {				\
92 		mutex_enter(&(tcp)->tcp_fuse_lock);		\
93 		(tcp)->tcp_fuse_syncstr_stopped = B_FALSE;	\
94 		mutex_exit(&(tcp)->tcp_fuse_lock);		\
95 	}							\
96 }
97 
98 /*
99  * Write-side flow-control is implemented via the per instance STREAMS
100  * write-side Q by explicitly setting QFULL to stop the flow of mblk_t(s)
101  * and clearing QFULL and calling qbackenable() to restart the flow based
102  * on the number of TCP unsent bytes (i.e. those not on the wire waiting
103  * for a remote ACK).
104  *
105  * This is different than a standard STREAMS kmod which when using the
106  * STREAMS Q the framework would automatictly flow-control based on the
107  * defined hiwat/lowat values as mblk_t's are enqueued/dequeued.
108  *
109  * As of FireEngine TCP write-side flow-control needs to take into account
110  * both the unsent tcp_xmit list bytes but also any squeue_t enqueued bytes
111  * (i.e. from tcp_wput() -> tcp_output()).
112  *
113  * This is accomplished by adding a new tcp_t fields, tcp_squeue_bytes, to
114  * count the number of bytes enqueued by tcp_wput() and the number of bytes
115  * dequeued and processed by tcp_output().
116  *
117  * So, the total number of bytes unsent is (squeue_bytes + unsent) with all
118  * flow-control uses of unsent replaced with the macro TCP_UNSENT_BYTES.
119  */
120 extern void	tcp_clrqfull(tcp_t *);
121 extern void	tcp_setqfull(tcp_t *);
122 
123 #define	TCP_UNSENT_BYTES(tcp) \
124 	((tcp)->tcp_squeue_bytes + (tcp)->tcp_unsent)
125 
126 /* Named Dispatch Parameter Management Structure */
127 typedef struct tcpparam_s {
128 	uint32_t	tcp_param_min;
129 	uint32_t	tcp_param_max;
130 	uint32_t	tcp_param_val;
131 	char		*tcp_param_name;
132 } tcpparam_t;
133 
134 extern tcpparam_t tcp_param_arr[];
135 
136 #define	tcp_time_wait_interval			tcp_param_arr[0].tcp_param_val
137 #define	tcp_conn_req_max_q			tcp_param_arr[1].tcp_param_val
138 #define	tcp_conn_req_max_q0			tcp_param_arr[2].tcp_param_val
139 #define	tcp_conn_req_min			tcp_param_arr[3].tcp_param_val
140 #define	tcp_conn_grace_period			tcp_param_arr[4].tcp_param_val
141 #define	tcp_cwnd_max_				tcp_param_arr[5].tcp_param_val
142 #define	tcp_dbg					tcp_param_arr[6].tcp_param_val
143 #define	tcp_smallest_nonpriv_port		tcp_param_arr[7].tcp_param_val
144 #define	tcp_ip_abort_cinterval			tcp_param_arr[8].tcp_param_val
145 #define	tcp_ip_abort_linterval			tcp_param_arr[9].tcp_param_val
146 #define	tcp_ip_abort_interval			tcp_param_arr[10].tcp_param_val
147 #define	tcp_ip_notify_cinterval			tcp_param_arr[11].tcp_param_val
148 #define	tcp_ip_notify_interval			tcp_param_arr[12].tcp_param_val
149 #define	tcp_ipv4_ttl				tcp_param_arr[13].tcp_param_val
150 #define	tcp_keepalive_interval_high		tcp_param_arr[14].tcp_param_max
151 #define	tcp_keepalive_interval			tcp_param_arr[14].tcp_param_val
152 #define	tcp_keepalive_interval_low		tcp_param_arr[14].tcp_param_min
153 #define	tcp_maxpsz_multiplier			tcp_param_arr[15].tcp_param_val
154 #define	tcp_mss_def_ipv4			tcp_param_arr[16].tcp_param_val
155 #define	tcp_mss_max_ipv4			tcp_param_arr[17].tcp_param_val
156 #define	tcp_mss_min				tcp_param_arr[18].tcp_param_val
157 #define	tcp_naglim_def				tcp_param_arr[19].tcp_param_val
158 #define	tcp_rexmit_interval_initial		tcp_param_arr[20].tcp_param_val
159 #define	tcp_rexmit_interval_max			tcp_param_arr[21].tcp_param_val
160 #define	tcp_rexmit_interval_min			tcp_param_arr[22].tcp_param_val
161 #define	tcp_deferred_ack_interval		tcp_param_arr[23].tcp_param_val
162 #define	tcp_snd_lowat_fraction			tcp_param_arr[24].tcp_param_val
163 #define	tcp_sth_rcv_hiwat			tcp_param_arr[25].tcp_param_val
164 #define	tcp_sth_rcv_lowat			tcp_param_arr[26].tcp_param_val
165 #define	tcp_dupack_fast_retransmit		tcp_param_arr[27].tcp_param_val
166 #define	tcp_ignore_path_mtu			tcp_param_arr[28].tcp_param_val
167 #define	tcp_smallest_anon_port			tcp_param_arr[29].tcp_param_val
168 #define	tcp_largest_anon_port			tcp_param_arr[30].tcp_param_val
169 #define	tcp_xmit_hiwat				tcp_param_arr[31].tcp_param_val
170 #define	tcp_xmit_lowat				tcp_param_arr[32].tcp_param_val
171 #define	tcp_recv_hiwat				tcp_param_arr[33].tcp_param_val
172 #define	tcp_recv_hiwat_minmss			tcp_param_arr[34].tcp_param_val
173 #define	tcp_fin_wait_2_flush_interval		tcp_param_arr[35].tcp_param_val
174 #define	tcp_co_min				tcp_param_arr[36].tcp_param_val
175 #define	tcp_max_buf				tcp_param_arr[37].tcp_param_val
176 #define	tcp_strong_iss				tcp_param_arr[38].tcp_param_val
177 #define	tcp_rtt_updates				tcp_param_arr[39].tcp_param_val
178 #define	tcp_wscale_always			tcp_param_arr[40].tcp_param_val
179 #define	tcp_tstamp_always			tcp_param_arr[41].tcp_param_val
180 #define	tcp_tstamp_if_wscale			tcp_param_arr[42].tcp_param_val
181 #define	tcp_rexmit_interval_extra		tcp_param_arr[43].tcp_param_val
182 #define	tcp_deferred_acks_max			tcp_param_arr[44].tcp_param_val
183 #define	tcp_slow_start_after_idle		tcp_param_arr[45].tcp_param_val
184 #define	tcp_slow_start_initial			tcp_param_arr[46].tcp_param_val
185 #define	tcp_co_timer_interval			tcp_param_arr[47].tcp_param_val
186 #define	tcp_sack_permitted			tcp_param_arr[48].tcp_param_val
187 #define	tcp_trace				tcp_param_arr[49].tcp_param_val
188 #define	tcp_compression_enabled			tcp_param_arr[50].tcp_param_val
189 #define	tcp_ipv6_hoplimit			tcp_param_arr[51].tcp_param_val
190 #define	tcp_mss_def_ipv6			tcp_param_arr[52].tcp_param_val
191 #define	tcp_mss_max_ipv6			tcp_param_arr[53].tcp_param_val
192 #define	tcp_rev_src_routes			tcp_param_arr[54].tcp_param_val
193 #define	tcp_local_dack_interval			tcp_param_arr[55].tcp_param_val
194 #define	tcp_ndd_get_info_interval		tcp_param_arr[56].tcp_param_val
195 #define	tcp_local_dacks_max			tcp_param_arr[57].tcp_param_val
196 #define	tcp_ecn_permitted			tcp_param_arr[58].tcp_param_val
197 #define	tcp_rst_sent_rate_enabled		tcp_param_arr[59].tcp_param_val
198 #define	tcp_rst_sent_rate			tcp_param_arr[60].tcp_param_val
199 #define	tcp_push_timer_interval			tcp_param_arr[61].tcp_param_val
200 #define	tcp_use_smss_as_mss_opt			tcp_param_arr[62].tcp_param_val
201 #define	tcp_keepalive_abort_interval_high	tcp_param_arr[63].tcp_param_max
202 #define	tcp_keepalive_abort_interval		tcp_param_arr[63].tcp_param_val
203 #define	tcp_keepalive_abort_interval_low	tcp_param_arr[63].tcp_param_min
204 
205 /* Kstats */
206 typedef struct tcp_stat {
207 	kstat_named_t	tcp_time_wait;
208 	kstat_named_t	tcp_time_wait_syn;
209 	kstat_named_t	tcp_time_wait_syn_success;
210 	kstat_named_t	tcp_time_wait_syn_fail;
211 	kstat_named_t	tcp_reinput_syn;
212 	kstat_named_t	tcp_ip_output;
213 	kstat_named_t	tcp_detach_non_time_wait;
214 	kstat_named_t	tcp_detach_time_wait;
215 	kstat_named_t	tcp_time_wait_reap;
216 	kstat_named_t	tcp_clean_death_nondetached;
217 	kstat_named_t	tcp_reinit_calls;
218 	kstat_named_t	tcp_eager_err1;
219 	kstat_named_t	tcp_eager_err2;
220 	kstat_named_t	tcp_eager_blowoff_calls;
221 	kstat_named_t	tcp_eager_blowoff_q;
222 	kstat_named_t	tcp_eager_blowoff_q0;
223 	kstat_named_t	tcp_not_hard_bound;
224 	kstat_named_t	tcp_no_listener;
225 	kstat_named_t	tcp_found_eager;
226 	kstat_named_t	tcp_wrong_queue;
227 	kstat_named_t	tcp_found_eager_binding1;
228 	kstat_named_t	tcp_found_eager_bound1;
229 	kstat_named_t	tcp_eager_has_listener1;
230 	kstat_named_t	tcp_open_alloc;
231 	kstat_named_t	tcp_open_detached_alloc;
232 	kstat_named_t	tcp_rput_time_wait;
233 	kstat_named_t	tcp_listendrop;
234 	kstat_named_t	tcp_listendropq0;
235 	kstat_named_t	tcp_wrong_rq;
236 	kstat_named_t	tcp_rsrv_calls;
237 	kstat_named_t	tcp_eagerfree2;
238 	kstat_named_t	tcp_eagerfree3;
239 	kstat_named_t	tcp_eagerfree4;
240 	kstat_named_t	tcp_eagerfree5;
241 	kstat_named_t	tcp_timewait_syn_fail;
242 	kstat_named_t	tcp_listen_badflags;
243 	kstat_named_t	tcp_timeout_calls;
244 	kstat_named_t	tcp_timeout_cached_alloc;
245 	kstat_named_t	tcp_timeout_cancel_reqs;
246 	kstat_named_t	tcp_timeout_canceled;
247 	kstat_named_t	tcp_timermp_alloced;
248 	kstat_named_t	tcp_timermp_freed;
249 	kstat_named_t	tcp_timermp_allocfail;
250 	kstat_named_t	tcp_timermp_allocdblfail;
251 	kstat_named_t	tcp_push_timer_cnt;
252 	kstat_named_t	tcp_ack_timer_cnt;
253 	kstat_named_t	tcp_ire_null1;
254 	kstat_named_t	tcp_ire_null;
255 	kstat_named_t	tcp_ip_send;
256 	kstat_named_t	tcp_ip_ire_send;
257 	kstat_named_t   tcp_wsrv_called;
258 	kstat_named_t   tcp_flwctl_on;
259 	kstat_named_t	tcp_timer_fire_early;
260 	kstat_named_t	tcp_timer_fire_miss;
261 	kstat_named_t	tcp_freelist_cleanup;
262 	kstat_named_t	tcp_rput_v6_error;
263 	kstat_named_t	tcp_out_sw_cksum;
264 	kstat_named_t	tcp_out_sw_cksum_bytes;
265 	kstat_named_t	tcp_zcopy_on;
266 	kstat_named_t	tcp_zcopy_off;
267 	kstat_named_t	tcp_zcopy_backoff;
268 	kstat_named_t	tcp_zcopy_disable;
269 	kstat_named_t	tcp_mdt_pkt_out;
270 	kstat_named_t	tcp_mdt_pkt_out_v4;
271 	kstat_named_t	tcp_mdt_pkt_out_v6;
272 	kstat_named_t	tcp_mdt_discarded;
273 	kstat_named_t	tcp_mdt_conn_halted1;
274 	kstat_named_t	tcp_mdt_conn_halted2;
275 	kstat_named_t	tcp_mdt_conn_halted3;
276 	kstat_named_t	tcp_mdt_conn_resumed1;
277 	kstat_named_t	tcp_mdt_conn_resumed2;
278 	kstat_named_t	tcp_mdt_legacy_small;
279 	kstat_named_t	tcp_mdt_legacy_all;
280 	kstat_named_t	tcp_mdt_legacy_ret;
281 	kstat_named_t	tcp_mdt_allocfail;
282 	kstat_named_t	tcp_mdt_addpdescfail;
283 	kstat_named_t	tcp_mdt_allocd;
284 	kstat_named_t	tcp_mdt_linked;
285 	kstat_named_t	tcp_fusion_flowctl;
286 	kstat_named_t	tcp_fusion_backenabled;
287 	kstat_named_t	tcp_fusion_urg;
288 	kstat_named_t	tcp_fusion_putnext;
289 	kstat_named_t	tcp_fusion_unfusable;
290 	kstat_named_t	tcp_fusion_aborted;
291 	kstat_named_t	tcp_fusion_unqualified;
292 	kstat_named_t	tcp_fusion_rrw_busy;
293 	kstat_named_t	tcp_fusion_rrw_msgcnt;
294 	kstat_named_t	tcp_in_ack_unsent_drop;
295 	kstat_named_t	tcp_sock_fallback;
296 } tcp_stat_t;
297 
298 extern tcp_stat_t tcp_statistics;
299 
300 #define	TCP_STAT(x)		(tcp_statistics.x.value.ui64++)
301 #define	TCP_STAT_UPDATE(x, n)	(tcp_statistics.x.value.ui64 += (n))
302 #define	TCP_STAT_SET(x, n)	(tcp_statistics.x.value.ui64 = (n))
303 
304 extern struct qinit tcp_loopback_rinit, tcp_rinit;
305 extern boolean_t do_tcp_fusion;
306 
307 extern int	tcp_maxpsz_set(tcp_t *, boolean_t);
308 extern void	tcp_timers_stop(tcp_t *);
309 extern void	tcp_rcv_enqueue(tcp_t *, mblk_t *, uint_t);
310 extern void	tcp_push_timer(void *);
311 extern timeout_id_t tcp_timeout(conn_t *, void (*)(void *), clock_t);
312 extern clock_t	tcp_timeout_cancel(conn_t *, timeout_id_t);
313 
314 extern void	tcp_fuse(tcp_t *, uchar_t *, tcph_t *);
315 extern void	tcp_unfuse(tcp_t *);
316 extern boolean_t tcp_fuse_output(tcp_t *, mblk_t *, uint32_t);
317 extern void	tcp_fuse_output_urg(tcp_t *, mblk_t *);
318 extern boolean_t tcp_fuse_rcv_drain(queue_t *, tcp_t *, mblk_t **);
319 extern void	tcp_fuse_syncstr_enable_pair(tcp_t *);
320 extern void	tcp_fuse_disable_pair(tcp_t *, boolean_t);
321 extern int	tcp_fuse_rrw(queue_t *, struiod_t *);
322 extern int	tcp_fuse_rinfop(queue_t *, infod_t *);
323 extern size_t	tcp_fuse_set_rcv_hiwat(tcp_t *, size_t);
324 extern int	tcp_fuse_maxpsz_set(tcp_t *);
325 
326 #endif	/* _KERNEL */
327 
328 #ifdef	__cplusplus
329 }
330 #endif
331 
332 #endif	/* _INET_TCP_IMPL_H */
333