1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, Joyent Inc. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27 /* Copyright (c) 1990 Mentat Inc. */
28
29 #include <inet/ip.h>
30 #include <inet/tcp_impl.h>
31 #include <sys/multidata.h>
32 #include <sys/sunddi.h>
33
34 /* Max size IP datagram is 64k - 1 */
35 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
36 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
37
38 /* Max of the above */
39 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
40
41 /*
42 * Set the RFC 1948 pass phrase
43 */
44 /* ARGSUSED */
45 static int
tcp_set_1948phrase(netstack_t * stack,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pr_val,uint_t flags)46 tcp_set_1948phrase(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
47 const char *ifname, const void* pr_val, uint_t flags)
48 {
49 if (flags & MOD_PROP_DEFAULT)
50 return (ENOTSUP);
51
52 /*
53 * Basically, value contains a new pass phrase. Pass it along!
54 */
55 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val),
56 stack->netstack_tcp);
57 return (0);
58 }
59
60 /*
61 * returns the current list of listener limit configuration.
62 */
63 /* ARGSUSED */
64 static int
tcp_listener_conf_get(netstack_t * stack,mod_prop_info_t * pinfo,const char * ifname,void * val,uint_t psize,uint_t flags)65 tcp_listener_conf_get(netstack_t *stack, mod_prop_info_t *pinfo,
66 const char *ifname, void *val, uint_t psize, uint_t flags)
67 {
68 tcp_stack_t *tcps = stack->netstack_tcp;
69 tcp_listener_t *tl;
70 char *pval = val;
71 size_t nbytes = 0, tbytes = 0;
72 uint_t size;
73 int err = 0;
74
75 bzero(pval, psize);
76 size = psize;
77
78 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
79 return (0);
80
81 mutex_enter(&tcps->tcps_listener_conf_lock);
82 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
83 tl = list_next(&tcps->tcps_listener_conf, tl)) {
84 if (psize == size)
85 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port,
86 tl->tl_ratio);
87 else
88 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port,
89 tl->tl_ratio);
90 size -= nbytes;
91 pval += nbytes;
92 tbytes += nbytes;
93 if (tbytes >= psize) {
94 /* Buffer overflow, stop copying information */
95 err = ENOBUFS;
96 break;
97 }
98 }
99
100 mutex_exit(&tcps->tcps_listener_conf_lock);
101 return (err);
102 }
103
104 /*
105 * add a new listener limit configuration.
106 */
107 /* ARGSUSED */
108 static int
tcp_listener_conf_add(netstack_t * stack,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pval,uint_t flags)109 tcp_listener_conf_add(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
110 const char *ifname, const void* pval, uint_t flags)
111 {
112 tcp_listener_t *new_tl;
113 tcp_listener_t *tl;
114 long lport;
115 long ratio;
116 char *colon;
117 tcp_stack_t *tcps = stack->netstack_tcp;
118
119 if (flags & MOD_PROP_DEFAULT)
120 return (ENOTSUP);
121
122 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
123 lport > USHRT_MAX || *colon != ':') {
124 return (EINVAL);
125 }
126 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
127 return (EINVAL);
128
129 mutex_enter(&tcps->tcps_listener_conf_lock);
130 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
131 tl = list_next(&tcps->tcps_listener_conf, tl)) {
132 /* There is an existing entry, so update its ratio value. */
133 if (tl->tl_port == lport) {
134 tl->tl_ratio = ratio;
135 mutex_exit(&tcps->tcps_listener_conf_lock);
136 return (0);
137 }
138 }
139
140 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
141 NULL) {
142 mutex_exit(&tcps->tcps_listener_conf_lock);
143 return (ENOMEM);
144 }
145
146 new_tl->tl_port = lport;
147 new_tl->tl_ratio = ratio;
148 list_insert_tail(&tcps->tcps_listener_conf, new_tl);
149 mutex_exit(&tcps->tcps_listener_conf_lock);
150 return (0);
151 }
152
153 /*
154 * remove a listener limit configuration.
155 */
156 /* ARGSUSED */
157 static int
tcp_listener_conf_del(netstack_t * stack,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pval,uint_t flags)158 tcp_listener_conf_del(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
159 const char *ifname, const void* pval, uint_t flags)
160 {
161 tcp_listener_t *tl;
162 long lport;
163 tcp_stack_t *tcps = stack->netstack_tcp;
164
165 if (flags & MOD_PROP_DEFAULT)
166 return (ENOTSUP);
167
168 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
169 lport > USHRT_MAX) {
170 return (EINVAL);
171 }
172 mutex_enter(&tcps->tcps_listener_conf_lock);
173 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
174 tl = list_next(&tcps->tcps_listener_conf, tl)) {
175 if (tl->tl_port == lport) {
176 list_remove(&tcps->tcps_listener_conf, tl);
177 mutex_exit(&tcps->tcps_listener_conf_lock);
178 kmem_free(tl, sizeof (tcp_listener_t));
179 return (0);
180 }
181 }
182 mutex_exit(&tcps->tcps_listener_conf_lock);
183 return (ESRCH);
184 }
185
186 static int
tcp_set_buf_prop(netstack_t * stack,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pval,uint_t flags)187 tcp_set_buf_prop(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
188 const char *ifname, const void *pval, uint_t flags)
189 {
190 return (mod_set_buf_prop(stack->netstack_tcp->tcps_propinfo_tbl, stack,
191 cr, pinfo, ifname, pval, flags));
192 }
193
194 static int
tcp_get_buf_prop(netstack_t * stack,mod_prop_info_t * pinfo,const char * ifname,void * val,uint_t psize,uint_t flags)195 tcp_get_buf_prop(netstack_t *stack, mod_prop_info_t *pinfo, const char *ifname,
196 void *val, uint_t psize, uint_t flags)
197 {
198 return (mod_get_buf_prop(stack->netstack_tcp->tcps_propinfo_tbl, stack,
199 pinfo, ifname, val, psize, flags));
200 }
201
202 /*
203 * Special checkers for smallest/largest anonymous port so they don't
204 * ever happen to be (largest < smallest).
205 */
206 /* ARGSUSED */
207 static int
tcp_smallest_anon_set(netstack_t * stack,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pval,uint_t flags)208 tcp_smallest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
209 const char *ifname, const void *pval, uint_t flags)
210 {
211 unsigned long new_value;
212 tcp_stack_t *tcps = stack->netstack_tcp;
213 int err;
214
215 if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
216 return (err);
217 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
218 if ((uint32_t)new_value > tcps->tcps_largest_anon_port)
219 return (ERANGE);
220 pinfo->prop_cur_uval = (uint32_t)new_value;
221 return (0);
222 }
223
224 /* ARGSUSED */
225 static int
tcp_largest_anon_set(netstack_t * stack,cred_t * cr,mod_prop_info_t * pinfo,const char * ifname,const void * pval,uint_t flags)226 tcp_largest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
227 const char *ifname, const void *pval, uint_t flags)
228 {
229 unsigned long new_value;
230 tcp_stack_t *tcps = stack->netstack_tcp;
231 int err;
232
233 if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
234 return (err);
235 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
236 if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
237 return (ERANGE);
238 pinfo->prop_cur_uval = (uint32_t)new_value;
239 return (0);
240 }
241
242 /*
243 * All of these are alterable, within the min/max values given, at run time.
244 *
245 * Note: All those tunables which do not start with "_" are Committed and
246 * therefore are public. See PSARC 2010/080.
247 */
248 mod_prop_info_t tcp_propinfo_tbl[] = {
249 /* tunable - 0 */
250 { "_time_wait_interval", MOD_PROTO_TCP,
251 mod_set_uint32, mod_get_uint32,
252 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
253
254 { "_conn_req_max_q", MOD_PROTO_TCP,
255 mod_set_uint32, mod_get_uint32,
256 {1, UINT32_MAX, 128}, {128} },
257
258 { "_conn_req_max_q0", MOD_PROTO_TCP,
259 mod_set_uint32, mod_get_uint32,
260 {0, UINT32_MAX, 1024}, {1024} },
261
262 { "_conn_req_min", MOD_PROTO_TCP,
263 mod_set_uint32, mod_get_uint32,
264 {1, 1024, 1}, {1} },
265
266 { "_conn_grace_period", MOD_PROTO_TCP,
267 mod_set_uint32, mod_get_uint32,
268 {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
269
270 { "_cwnd_max", MOD_PROTO_TCP,
271 mod_set_uint32, mod_get_uint32,
272 {128, ULP_MAX_BUF, 1024*1024}, {1024*1024} },
273
274 { "_debug", MOD_PROTO_TCP,
275 mod_set_uint32, mod_get_uint32,
276 {0, 10, 0}, {0} },
277
278 { "smallest_nonpriv_port", MOD_PROTO_TCP,
279 mod_set_uint32, mod_get_uint32,
280 {1024, (32*1024), 1024}, {1024} },
281
282 { "_ip_abort_cinterval", MOD_PROTO_TCP,
283 mod_set_uint32, mod_get_uint32,
284 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
285
286 { "_ip_abort_linterval", MOD_PROTO_TCP,
287 mod_set_uint32, mod_get_uint32,
288 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
289
290 /* tunable - 10 */
291 { "_ip_abort_interval", MOD_PROTO_TCP,
292 mod_set_uint32, mod_get_uint32,
293 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
294
295 { "_ip_notify_cinterval", MOD_PROTO_TCP,
296 mod_set_uint32, mod_get_uint32,
297 {1*SECONDS, UINT32_MAX, 10*SECONDS},
298 {10*SECONDS} },
299
300 { "_ip_notify_interval", MOD_PROTO_TCP,
301 mod_set_uint32, mod_get_uint32,
302 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
303
304 { "_ipv4_ttl", MOD_PROTO_TCP,
305 mod_set_uint32, mod_get_uint32,
306 {1, 255, 64}, {64} },
307
308 { "_keepalive_interval", MOD_PROTO_TCP,
309 mod_set_uint32, mod_get_uint32,
310 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
311
312 { "_maxpsz_multiplier", MOD_PROTO_TCP,
313 mod_set_uint32, mod_get_uint32,
314 {0, 100, 10}, {10} },
315
316 { "_mss_def_ipv4", MOD_PROTO_TCP,
317 mod_set_uint32, mod_get_uint32,
318 {1, TCP_MSS_MAX_IPV4, 536}, {536} },
319
320 { "_mss_max_ipv4", MOD_PROTO_TCP,
321 mod_set_uint32, mod_get_uint32,
322 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
323 {TCP_MSS_MAX_IPV4} },
324
325 { "_mss_min", MOD_PROTO_TCP,
326 mod_set_uint32, mod_get_uint32,
327 {1, TCP_MSS_MAX, 108}, {108} },
328
329 { "_naglim_def", MOD_PROTO_TCP,
330 mod_set_uint32, mod_get_uint32,
331 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
332
333 /* tunable - 20 */
334 { "_rexmit_interval_initial", MOD_PROTO_TCP,
335 mod_set_uint32, mod_get_uint32,
336 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
337
338 { "_rexmit_interval_max", MOD_PROTO_TCP,
339 mod_set_uint32, mod_get_uint32,
340 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
341
342 { "_rexmit_interval_min", MOD_PROTO_TCP,
343 mod_set_uint32, mod_get_uint32,
344 {1*MS, 2*HOURS, 400*MS}, {400*MS} },
345
346 { "_deferred_ack_interval", MOD_PROTO_TCP,
347 mod_set_uint32, mod_get_uint32,
348 {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
349
350 { "_snd_lowat_fraction", MOD_PROTO_TCP,
351 mod_set_uint32, mod_get_uint32,
352 {0, 16, 10}, {10} },
353
354 { "_dupack_fast_retransmit", MOD_PROTO_TCP,
355 mod_set_uint32, mod_get_uint32,
356 {1, 10000, 3}, {3} },
357
358 { "_ignore_path_mtu", MOD_PROTO_TCP,
359 mod_set_boolean, mod_get_boolean,
360 {B_FALSE}, {B_FALSE} },
361
362 { "smallest_anon_port", MOD_PROTO_TCP,
363 tcp_smallest_anon_set, mod_get_uint32,
364 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
365
366 { "largest_anon_port", MOD_PROTO_TCP,
367 tcp_largest_anon_set, mod_get_uint32,
368 {1024, ULP_MAX_PORT, ULP_MAX_PORT},
369 {ULP_MAX_PORT} },
370
371 { "send_buf", MOD_PROTO_TCP,
372 tcp_set_buf_prop, tcp_get_buf_prop,
373 {TCP_XMIT_LOWATER, ULP_MAX_BUF, TCP_XMIT_HIWATER},
374 {TCP_XMIT_HIWATER} },
375
376 /* tunable - 30 */
377 { "_xmit_lowat", MOD_PROTO_TCP,
378 mod_set_uint32, mod_get_uint32,
379 {TCP_XMIT_LOWATER, ULP_MAX_BUF, TCP_XMIT_LOWATER},
380 {TCP_XMIT_LOWATER} },
381
382 { "recv_buf", MOD_PROTO_TCP,
383 tcp_set_buf_prop, tcp_get_buf_prop,
384 {TCP_RECV_LOWATER, ULP_MAX_BUF, TCP_RECV_HIWATER},
385 {TCP_RECV_HIWATER} },
386
387 { "_recv_hiwat_minmss", MOD_PROTO_TCP,
388 mod_set_uint32, mod_get_uint32,
389 {1, 65536, 4}, {4} },
390
391 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
392 mod_set_uint32, mod_get_uint32,
393 {1*SECONDS, 2*HOURS, 60*SECONDS},
394 {60*SECONDS} },
395
396 { "max_buf", MOD_PROTO_TCP,
397 mod_set_uint32, mod_get_uint32,
398 {8192, ULP_MAX_BUF, 1024*1024}, {1024*1024} },
399
400 { "_strong_iss", MOD_PROTO_TCP,
401 mod_set_uint32, mod_get_uint32,
402 {0, 2, 2}, {2} },
403
404 { "_rtt_updates", MOD_PROTO_TCP,
405 mod_set_uint32, mod_get_uint32,
406 {0, 65536, 20}, {20} },
407
408 { "_wscale_always", MOD_PROTO_TCP,
409 mod_set_boolean, mod_get_boolean,
410 {B_TRUE}, {B_TRUE} },
411
412 { "_tstamp_always", MOD_PROTO_TCP,
413 mod_set_boolean, mod_get_boolean,
414 {B_FALSE}, {B_FALSE} },
415
416 { "_tstamp_if_wscale", MOD_PROTO_TCP,
417 mod_set_boolean, mod_get_boolean,
418 {B_TRUE}, {B_TRUE} },
419
420 /* tunable - 40 */
421 { "_rexmit_interval_extra", MOD_PROTO_TCP,
422 mod_set_uint32, mod_get_uint32,
423 {0*MS, 2*HOURS, 0*MS}, {0*MS} },
424
425 { "_deferred_acks_max", MOD_PROTO_TCP,
426 mod_set_uint32, mod_get_uint32,
427 {0, 16, 2}, {2} },
428
429 { "_slow_start_after_idle", MOD_PROTO_TCP,
430 mod_set_uint32, mod_get_uint32,
431 {0, 16384, 0}, {0} },
432
433 { "_slow_start_initial", MOD_PROTO_TCP,
434 mod_set_uint32, mod_get_uint32,
435 {0, 16, 0}, {0} },
436
437 { "sack", MOD_PROTO_TCP,
438 mod_set_uint32, mod_get_uint32,
439 {0, 2, 2}, {2} },
440
441 { "_ipv6_hoplimit", MOD_PROTO_TCP,
442 mod_set_uint32, mod_get_uint32,
443 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
444 {IPV6_DEFAULT_HOPS} },
445
446 { "_mss_def_ipv6", MOD_PROTO_TCP,
447 mod_set_uint32, mod_get_uint32,
448 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
449
450 { "_mss_max_ipv6", MOD_PROTO_TCP,
451 mod_set_uint32, mod_get_uint32,
452 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
453 {TCP_MSS_MAX_IPV6} },
454
455 { "_rev_src_routes", MOD_PROTO_TCP,
456 mod_set_boolean, mod_get_boolean,
457 {B_FALSE}, {B_FALSE} },
458
459 { "_local_dack_interval", MOD_PROTO_TCP,
460 mod_set_uint32, mod_get_uint32,
461 {10*MS, 500*MS, 50*MS}, {50*MS} },
462
463 /* tunable - 50 */
464 { "_local_dacks_max", MOD_PROTO_TCP,
465 mod_set_uint32, mod_get_uint32,
466 {0, 16, 8}, {8} },
467
468 { "ecn", MOD_PROTO_TCP,
469 mod_set_uint32, mod_get_uint32,
470 {0, 2, 1}, {1} },
471
472 { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
473 mod_set_boolean, mod_get_boolean,
474 {B_TRUE}, {B_TRUE} },
475
476 { "_rst_sent_rate", MOD_PROTO_TCP,
477 mod_set_uint32, mod_get_uint32,
478 {0, UINT32_MAX, 40}, {40} },
479
480 { "_push_timer_interval", MOD_PROTO_TCP,
481 mod_set_uint32, mod_get_uint32,
482 {0, 100*MS, 50*MS}, {50*MS} },
483
484 { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
485 mod_set_boolean, mod_get_boolean,
486 {B_FALSE}, {B_FALSE} },
487
488 { "_keepalive_abort_interval", MOD_PROTO_TCP,
489 mod_set_uint32, mod_get_uint32,
490 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
491
492 /*
493 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
494 * layer header. It has to be a multiple of 8.
495 */
496 { "_wroff_xtra", MOD_PROTO_TCP,
497 mod_set_aligned, mod_get_uint32,
498 {0, 256, 32}, {32} },
499
500 { "_dev_flow_ctl", MOD_PROTO_TCP,
501 mod_set_boolean, mod_get_boolean,
502 {B_FALSE}, {B_FALSE} },
503
504 { "_reass_timeout", MOD_PROTO_TCP,
505 mod_set_uint32, mod_get_uint32,
506 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
507
508 /* tunable - 60 */
509 { "extra_priv_ports", MOD_PROTO_TCP,
510 mod_set_extra_privports, mod_get_extra_privports,
511 {1, ULP_MAX_PORT, 0}, {0} },
512
513 { "_1948_phrase", MOD_PROTO_TCP,
514 tcp_set_1948phrase, NULL, {0}, {0} },
515
516 { "_listener_limit_conf", MOD_PROTO_TCP,
517 NULL, tcp_listener_conf_get, {0}, {0} },
518
519 { "_listener_limit_conf_add", MOD_PROTO_TCP,
520 tcp_listener_conf_add, NULL, {0}, {0} },
521
522 { "_listener_limit_conf_del", MOD_PROTO_TCP,
523 tcp_listener_conf_del, NULL, {0}, {0} },
524
525 { "_iss_incr", MOD_PROTO_TCP,
526 mod_set_uint32, mod_get_uint32,
527 {1, ISS_INCR, ISS_INCR},
528 {ISS_INCR} },
529
530 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
531
532 { NULL, 0, NULL, NULL, {0}, {0} }
533 };
534
535 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);
536