xref: /illumos-gate/usr/src/uts/common/inet/tcp_stack.h (revision fec509a05ddbf645268fe2e537314def7d1b67c8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_INET_TCP_STACK_H
28 #define	_INET_TCP_STACK_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/netstack.h>
33 #include <inet/ip.h>
34 #include <inet/ipdrop.h>
35 
36 #ifdef	__cplusplus
37 extern "C" {
38 #endif
39 
40 /* Kstats */
41 typedef struct tcp_stat {
42 	kstat_named_t	tcp_time_wait;
43 	kstat_named_t	tcp_time_wait_syn;
44 	kstat_named_t	tcp_time_wait_syn_success;
45 	kstat_named_t	tcp_time_wait_syn_fail;
46 	kstat_named_t	tcp_reinput_syn;
47 	kstat_named_t	tcp_ip_output;
48 	kstat_named_t	tcp_detach_non_time_wait;
49 	kstat_named_t	tcp_detach_time_wait;
50 	kstat_named_t	tcp_time_wait_reap;
51 	kstat_named_t	tcp_clean_death_nondetached;
52 	kstat_named_t	tcp_reinit_calls;
53 	kstat_named_t	tcp_eager_err1;
54 	kstat_named_t	tcp_eager_err2;
55 	kstat_named_t	tcp_eager_blowoff_calls;
56 	kstat_named_t	tcp_eager_blowoff_q;
57 	kstat_named_t	tcp_eager_blowoff_q0;
58 	kstat_named_t	tcp_not_hard_bound;
59 	kstat_named_t	tcp_no_listener;
60 	kstat_named_t	tcp_found_eager;
61 	kstat_named_t	tcp_wrong_queue;
62 	kstat_named_t	tcp_found_eager_binding1;
63 	kstat_named_t	tcp_found_eager_bound1;
64 	kstat_named_t	tcp_eager_has_listener1;
65 	kstat_named_t	tcp_open_alloc;
66 	kstat_named_t	tcp_open_detached_alloc;
67 	kstat_named_t	tcp_rput_time_wait;
68 	kstat_named_t	tcp_listendrop;
69 	kstat_named_t	tcp_listendropq0;
70 	kstat_named_t	tcp_wrong_rq;
71 	kstat_named_t	tcp_rsrv_calls;
72 	kstat_named_t	tcp_eagerfree2;
73 	kstat_named_t	tcp_eagerfree3;
74 	kstat_named_t	tcp_eagerfree4;
75 	kstat_named_t	tcp_eagerfree5;
76 	kstat_named_t	tcp_timewait_syn_fail;
77 	kstat_named_t	tcp_listen_badflags;
78 	kstat_named_t	tcp_timeout_calls;
79 	kstat_named_t	tcp_timeout_cached_alloc;
80 	kstat_named_t	tcp_timeout_cancel_reqs;
81 	kstat_named_t	tcp_timeout_canceled;
82 	kstat_named_t	tcp_timermp_freed;
83 	kstat_named_t	tcp_push_timer_cnt;
84 	kstat_named_t	tcp_ack_timer_cnt;
85 	kstat_named_t	tcp_ire_null1;
86 	kstat_named_t	tcp_ire_null;
87 	kstat_named_t	tcp_ip_send;
88 	kstat_named_t	tcp_ip_ire_send;
89 	kstat_named_t   tcp_wsrv_called;
90 	kstat_named_t   tcp_flwctl_on;
91 	kstat_named_t	tcp_timer_fire_early;
92 	kstat_named_t	tcp_timer_fire_miss;
93 	kstat_named_t	tcp_rput_v6_error;
94 	kstat_named_t	tcp_out_sw_cksum;
95 	kstat_named_t	tcp_out_sw_cksum_bytes;
96 	kstat_named_t	tcp_zcopy_on;
97 	kstat_named_t	tcp_zcopy_off;
98 	kstat_named_t	tcp_zcopy_backoff;
99 	kstat_named_t	tcp_zcopy_disable;
100 	kstat_named_t	tcp_mdt_pkt_out;
101 	kstat_named_t	tcp_mdt_pkt_out_v4;
102 	kstat_named_t	tcp_mdt_pkt_out_v6;
103 	kstat_named_t	tcp_mdt_discarded;
104 	kstat_named_t	tcp_mdt_conn_halted1;
105 	kstat_named_t	tcp_mdt_conn_halted2;
106 	kstat_named_t	tcp_mdt_conn_halted3;
107 	kstat_named_t	tcp_mdt_conn_resumed1;
108 	kstat_named_t	tcp_mdt_conn_resumed2;
109 	kstat_named_t	tcp_mdt_legacy_small;
110 	kstat_named_t	tcp_mdt_legacy_all;
111 	kstat_named_t	tcp_mdt_legacy_ret;
112 	kstat_named_t	tcp_mdt_allocfail;
113 	kstat_named_t	tcp_mdt_addpdescfail;
114 	kstat_named_t	tcp_mdt_allocd;
115 	kstat_named_t	tcp_mdt_linked;
116 	kstat_named_t	tcp_fusion_flowctl;
117 	kstat_named_t	tcp_fusion_backenabled;
118 	kstat_named_t	tcp_fusion_urg;
119 	kstat_named_t	tcp_fusion_putnext;
120 	kstat_named_t	tcp_fusion_unfusable;
121 	kstat_named_t	tcp_fusion_aborted;
122 	kstat_named_t	tcp_fusion_unqualified;
123 	kstat_named_t	tcp_fusion_rrw_busy;
124 	kstat_named_t	tcp_fusion_rrw_msgcnt;
125 	kstat_named_t	tcp_fusion_rrw_plugged;
126 	kstat_named_t	tcp_in_ack_unsent_drop;
127 	kstat_named_t	tcp_sock_fallback;
128 	kstat_named_t	tcp_lso_enabled;
129 	kstat_named_t	tcp_lso_disabled;
130 	kstat_named_t	tcp_lso_times;
131 	kstat_named_t	tcp_lso_pkt_out;
132 } tcp_stat_t;
133 
134 #define	TCP_STAT(tcps, x)	((tcps)->tcps_statistics.x.value.ui64++)
135 #define	TCP_STAT_UPDATE(tcps, x, n)	\
136 	((tcps)->tcps_statistics.x.value.ui64 += (n))
137 #define	TCP_STAT_SET(tcps, x, n)	\
138 	((tcps)->tcps_statistics.x.value.ui64 = (n))
139 
140 typedef struct tcp_g_stat {
141 	kstat_named_t	tcp_timermp_alloced;
142 	kstat_named_t	tcp_timermp_allocfail;
143 	kstat_named_t	tcp_timermp_allocdblfail;
144 	kstat_named_t	tcp_freelist_cleanup;
145 } tcp_g_stat_t;
146 
147 #ifdef _KERNEL
148 
149 /*
150  * TCP stack instances
151  */
152 struct tcp_stack {
153 	netstack_t	*tcps_netstack;	/* Common netstack */
154 
155 	mib2_tcp_t	tcps_mib;
156 
157 	/* Protected by tcps_g_q_lock */
158 	queue_t		*tcps_g_q;	/* Default queue */
159 	uint_t		tcps_refcnt;	/* Total number of tcp_t's */
160 	kmutex_t	tcps_g_q_lock;
161 	kcondvar_t	tcps_g_q_cv;
162 	kthread_t	*tcps_g_q_creator;
163 	struct __ldi_handle *tcps_g_q_lh;
164 	cred_t		*tcps_g_q_cr;    /* For _inactive close call */
165 
166 	/* Protected by tcp_hsp_lock */
167 	struct tcp_hsp	**tcps_hsp_hash;	/* Hash table for HSPs */
168 	krwlock_t	 tcps_hsp_lock;
169 
170 	/*
171 	 * Extra privileged ports. In host byte order.
172 	 * Protected by tcp_epriv_port_lock.
173 	 */
174 #define	TCP_NUM_EPRIV_PORTS	64
175 	int		tcps_g_num_epriv_ports;
176 	uint16_t	tcps_g_epriv_ports[TCP_NUM_EPRIV_PORTS];
177 	kmutex_t	tcps_epriv_port_lock;
178 
179 	/*
180 	 * The smallest anonymous port in the priviledged port range which TCP
181 	 * looks for free port.  Use in the option TCP_ANONPRIVBIND.
182 	 */
183 	in_port_t	tcps_min_anonpriv_port;
184 
185 	/* Only modified during _init and _fini thus no locking is needed. */
186 	caddr_t		tcps_g_nd;
187 	struct tcpparam_s *tcps_params;	/* ndd parameters */
188 	struct tcpparam_s *tcps_wroff_xtra_param;
189 	struct tcpparam_s *tcps_mdt_head_param;
190 	struct tcpparam_s *tcps_mdt_tail_param;
191 	struct tcpparam_s *tcps_mdt_max_pbufs_param;
192 
193 	/* Hint not protected by any lock */
194 	uint_t		tcps_next_port_to_try;
195 
196 	/* TCP bind hash list - all tcp_t with state >= BOUND. */
197 	struct tf_s	*tcps_bind_fanout;
198 
199 	/* TCP queue hash list - all tcp_t in case they will be an acceptor. */
200 	struct tf_s	*tcps_acceptor_fanout;
201 
202 	/* The reserved port array. */
203 	struct tcp_rport_s	*tcps_reserved_port;
204 
205 	/* Locks to protect the tcp_reserved_ports array. */
206 	krwlock_t 	tcps_reserved_port_lock;
207 
208 	/* The number of ranges in the array. */
209 	uint32_t	tcps_reserved_port_array_size;
210 
211 	/*
212 	 * MIB-2 stuff for SNMP
213 	 * Note: tcpInErrs {tcp 15} is accumulated in ip.c
214 	 */
215 	kstat_t		*tcps_mibkp;	/* kstat exporting tcp_mib data */
216 	kstat_t		*tcps_kstat;
217 	tcp_stat_t	tcps_statistics;
218 
219 	uint32_t	tcps_iss_incr_extra;
220 				/* Incremented for each connection */
221 	kmutex_t	tcps_iss_key_lock;
222 	MD5_CTX		tcps_iss_key;
223 
224 	/* Packet dropper for TCP IPsec policy drops. */
225 	ipdropper_t	tcps_dropper;
226 
227 	/*
228 	 * This controls the rate some ndd info report functions can be used
229 	 * by non-priviledged users.  It stores the last time such info is
230 	 * requested.  When those report functions are called again, this
231 	 * is checked with the current time and compare with the ndd param
232 	 * tcp_ndd_get_info_interval.
233 	 */
234 	clock_t		tcps_last_ndd_get_info_time;
235 
236 	/*
237 	 * These two variables control the rate for TCP to generate RSTs in
238 	 * response to segments not belonging to any connections.  We limit
239 	 * TCP to sent out tcp_rst_sent_rate (ndd param) number of RSTs in
240 	 * each 1 second interval.  This is to protect TCP against DoS attack.
241 	 */
242 	clock_t		tcps_last_rst_intrvl;
243 	uint32_t	tcps_rst_cnt;
244 	/* The number of RST not sent because of the rate limit. */
245 	uint32_t	tcps_rst_unsent;
246 };
247 typedef struct tcp_stack tcp_stack_t;
248 
249 #endif /* _KERNEL */
250 #ifdef	__cplusplus
251 }
252 #endif
253 
254 #endif	/* _INET_TCP_STACK_H */
255