xref: /titanic_50/usr/src/uts/common/inet/tcp_stack.h (revision 257873cfc1dd3337766407f80397db60a56f2f5a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_INET_TCP_STACK_H
28 #define	_INET_TCP_STACK_H
29 
30 #include <sys/netstack.h>
31 #include <inet/ip.h>
32 #include <inet/ipdrop.h>
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 /* Kstats */
39 typedef struct tcp_stat {
40 	kstat_named_t	tcp_time_wait;
41 	kstat_named_t	tcp_time_wait_syn;
42 	kstat_named_t	tcp_time_wait_syn_success;
43 	kstat_named_t	tcp_time_wait_syn_fail;
44 	kstat_named_t	tcp_reinput_syn;
45 	kstat_named_t	tcp_ip_output;
46 	kstat_named_t	tcp_detach_non_time_wait;
47 	kstat_named_t	tcp_detach_time_wait;
48 	kstat_named_t	tcp_time_wait_reap;
49 	kstat_named_t	tcp_clean_death_nondetached;
50 	kstat_named_t	tcp_reinit_calls;
51 	kstat_named_t	tcp_eager_err1;
52 	kstat_named_t	tcp_eager_err2;
53 	kstat_named_t	tcp_eager_blowoff_calls;
54 	kstat_named_t	tcp_eager_blowoff_q;
55 	kstat_named_t	tcp_eager_blowoff_q0;
56 	kstat_named_t	tcp_not_hard_bound;
57 	kstat_named_t	tcp_no_listener;
58 	kstat_named_t	tcp_found_eager;
59 	kstat_named_t	tcp_wrong_queue;
60 	kstat_named_t	tcp_found_eager_binding1;
61 	kstat_named_t	tcp_found_eager_bound1;
62 	kstat_named_t	tcp_eager_has_listener1;
63 	kstat_named_t	tcp_open_alloc;
64 	kstat_named_t	tcp_open_detached_alloc;
65 	kstat_named_t	tcp_rput_time_wait;
66 	kstat_named_t	tcp_listendrop;
67 	kstat_named_t	tcp_listendropq0;
68 	kstat_named_t	tcp_wrong_rq;
69 	kstat_named_t	tcp_rsrv_calls;
70 	kstat_named_t	tcp_eagerfree2;
71 	kstat_named_t	tcp_eagerfree3;
72 	kstat_named_t	tcp_eagerfree4;
73 	kstat_named_t	tcp_eagerfree5;
74 	kstat_named_t	tcp_timewait_syn_fail;
75 	kstat_named_t	tcp_listen_badflags;
76 	kstat_named_t	tcp_timeout_calls;
77 	kstat_named_t	tcp_timeout_cached_alloc;
78 	kstat_named_t	tcp_timeout_cancel_reqs;
79 	kstat_named_t	tcp_timeout_canceled;
80 	kstat_named_t	tcp_timermp_freed;
81 	kstat_named_t	tcp_push_timer_cnt;
82 	kstat_named_t	tcp_ack_timer_cnt;
83 	kstat_named_t	tcp_ire_null1;
84 	kstat_named_t	tcp_ire_null;
85 	kstat_named_t	tcp_ip_send;
86 	kstat_named_t	tcp_ip_ire_send;
87 	kstat_named_t   tcp_wsrv_called;
88 	kstat_named_t   tcp_flwctl_on;
89 	kstat_named_t	tcp_timer_fire_early;
90 	kstat_named_t	tcp_timer_fire_miss;
91 	kstat_named_t	tcp_rput_v6_error;
92 	kstat_named_t	tcp_out_sw_cksum;
93 	kstat_named_t	tcp_out_sw_cksum_bytes;
94 	kstat_named_t	tcp_zcopy_on;
95 	kstat_named_t	tcp_zcopy_off;
96 	kstat_named_t	tcp_zcopy_backoff;
97 	kstat_named_t	tcp_zcopy_disable;
98 	kstat_named_t	tcp_mdt_pkt_out;
99 	kstat_named_t	tcp_mdt_pkt_out_v4;
100 	kstat_named_t	tcp_mdt_pkt_out_v6;
101 	kstat_named_t	tcp_mdt_discarded;
102 	kstat_named_t	tcp_mdt_conn_halted1;
103 	kstat_named_t	tcp_mdt_conn_halted2;
104 	kstat_named_t	tcp_mdt_conn_halted3;
105 	kstat_named_t	tcp_mdt_conn_resumed1;
106 	kstat_named_t	tcp_mdt_conn_resumed2;
107 	kstat_named_t	tcp_mdt_legacy_small;
108 	kstat_named_t	tcp_mdt_legacy_all;
109 	kstat_named_t	tcp_mdt_legacy_ret;
110 	kstat_named_t	tcp_mdt_allocfail;
111 	kstat_named_t	tcp_mdt_addpdescfail;
112 	kstat_named_t	tcp_mdt_allocd;
113 	kstat_named_t	tcp_mdt_linked;
114 	kstat_named_t	tcp_fusion_flowctl;
115 	kstat_named_t	tcp_fusion_backenabled;
116 	kstat_named_t	tcp_fusion_urg;
117 	kstat_named_t	tcp_fusion_putnext;
118 	kstat_named_t	tcp_fusion_unfusable;
119 	kstat_named_t	tcp_fusion_aborted;
120 	kstat_named_t	tcp_fusion_unqualified;
121 	kstat_named_t	tcp_fusion_rrw_busy;
122 	kstat_named_t	tcp_fusion_rrw_msgcnt;
123 	kstat_named_t	tcp_fusion_rrw_plugged;
124 	kstat_named_t	tcp_in_ack_unsent_drop;
125 	kstat_named_t	tcp_sock_fallback;
126 	kstat_named_t	tcp_lso_enabled;
127 	kstat_named_t	tcp_lso_disabled;
128 	kstat_named_t	tcp_lso_times;
129 	kstat_named_t	tcp_lso_pkt_out;
130 } tcp_stat_t;
131 
132 #define	TCP_STAT(tcps, x)	((tcps)->tcps_statistics.x.value.ui64++)
133 #define	TCP_STAT_UPDATE(tcps, x, n)	\
134 	((tcps)->tcps_statistics.x.value.ui64 += (n))
135 #define	TCP_STAT_SET(tcps, x, n)	\
136 	((tcps)->tcps_statistics.x.value.ui64 = (n))
137 
138 typedef struct tcp_g_stat {
139 	kstat_named_t	tcp_timermp_alloced;
140 	kstat_named_t	tcp_timermp_allocfail;
141 	kstat_named_t	tcp_timermp_allocdblfail;
142 	kstat_named_t	tcp_freelist_cleanup;
143 } tcp_g_stat_t;
144 
145 #ifdef _KERNEL
146 
147 /*
148  * TCP stack instances
149  */
150 struct tcp_stack {
151 	netstack_t	*tcps_netstack;	/* Common netstack */
152 
153 	mib2_tcp_t	tcps_mib;
154 
155 	/* Protected by tcps_g_q_lock */
156 	queue_t		*tcps_g_q;	/* Default queue */
157 	uint_t		tcps_refcnt;	/* Total number of tcp_t's */
158 	kmutex_t	tcps_g_q_lock;
159 	kcondvar_t	tcps_g_q_cv;
160 	kthread_t	*tcps_g_q_creator;
161 	struct __ldi_handle *tcps_g_q_lh;
162 	cred_t		*tcps_g_q_cr;    /* For _inactive close call */
163 
164 	/* Protected by tcp_hsp_lock */
165 	struct tcp_hsp	**tcps_hsp_hash;	/* Hash table for HSPs */
166 	krwlock_t	 tcps_hsp_lock;
167 
168 	/*
169 	 * Extra privileged ports. In host byte order.
170 	 * Protected by tcp_epriv_port_lock.
171 	 */
172 #define	TCP_NUM_EPRIV_PORTS	64
173 	int		tcps_g_num_epriv_ports;
174 	uint16_t	tcps_g_epriv_ports[TCP_NUM_EPRIV_PORTS];
175 	kmutex_t	tcps_epriv_port_lock;
176 
177 	/*
178 	 * The smallest anonymous port in the priviledged port range which TCP
179 	 * looks for free port.  Use in the option TCP_ANONPRIVBIND.
180 	 */
181 	in_port_t	tcps_min_anonpriv_port;
182 
183 	/* Only modified during _init and _fini thus no locking is needed. */
184 	caddr_t		tcps_g_nd;
185 	struct tcpparam_s *tcps_params;	/* ndd parameters */
186 	struct tcpparam_s *tcps_wroff_xtra_param;
187 	struct tcpparam_s *tcps_mdt_head_param;
188 	struct tcpparam_s *tcps_mdt_tail_param;
189 	struct tcpparam_s *tcps_mdt_max_pbufs_param;
190 
191 	/* Hint not protected by any lock */
192 	uint_t		tcps_next_port_to_try;
193 
194 	/* TCP bind hash list - all tcp_t with state >= BOUND. */
195 	struct tf_s	*tcps_bind_fanout;
196 
197 	/* TCP queue hash list - all tcp_t in case they will be an acceptor. */
198 	struct tf_s	*tcps_acceptor_fanout;
199 
200 	/*
201 	 * MIB-2 stuff for SNMP
202 	 * Note: tcpInErrs {tcp 15} is accumulated in ip.c
203 	 */
204 	kstat_t		*tcps_mibkp;	/* kstat exporting tcp_mib data */
205 	kstat_t		*tcps_kstat;
206 	tcp_stat_t	tcps_statistics;
207 
208 	uint32_t	tcps_iss_incr_extra;
209 				/* Incremented for each connection */
210 	kmutex_t	tcps_iss_key_lock;
211 	MD5_CTX		tcps_iss_key;
212 
213 	/* Packet dropper for TCP IPsec policy drops. */
214 	ipdropper_t	tcps_dropper;
215 
216 	/*
217 	 * This controls the rate some ndd info report functions can be used
218 	 * by non-priviledged users.  It stores the last time such info is
219 	 * requested.  When those report functions are called again, this
220 	 * is checked with the current time and compare with the ndd param
221 	 * tcp_ndd_get_info_interval.
222 	 */
223 	clock_t		tcps_last_ndd_get_info_time;
224 
225 	/*
226 	 * These two variables control the rate for TCP to generate RSTs in
227 	 * response to segments not belonging to any connections.  We limit
228 	 * TCP to sent out tcp_rst_sent_rate (ndd param) number of RSTs in
229 	 * each 1 second interval.  This is to protect TCP against DoS attack.
230 	 */
231 	clock_t		tcps_last_rst_intrvl;
232 	uint32_t	tcps_rst_cnt;
233 	/* The number of RST not sent because of the rate limit. */
234 	uint32_t	tcps_rst_unsent;
235 };
236 typedef struct tcp_stack tcp_stack_t;
237 
238 #endif /* _KERNEL */
239 #ifdef	__cplusplus
240 }
241 #endif
242 
243 #endif	/* _INET_TCP_STACK_H */
244