1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <inet/ip.h> 26 #include <inet/tcp_impl.h> 27 #include <sys/multidata.h> 28 #include <sys/sunddi.h> 29 30 /* Max size IP datagram is 64k - 1 */ 31 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t))) 32 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t))) 33 34 /* Max of the above */ 35 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4 36 37 #define TCP_XMIT_LOWATER 4096 38 #define TCP_XMIT_HIWATER 49152 39 #define TCP_RECV_LOWATER 2048 40 #define TCP_RECV_HIWATER 128000 41 42 /* 43 * Set the RFC 1948 pass phrase 44 */ 45 /* ARGSUSED */ 46 static int 47 tcp_set_1948phrase(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 48 const char *ifname, const void* pr_val, uint_t flags) 49 { 50 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 51 52 if (flags & MOD_PROP_DEFAULT) 53 return (ENOTSUP); 54 55 /* 56 * Basically, value contains a new pass phrase. Pass it along! 57 */ 58 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps); 59 return (0); 60 } 61 62 /* 63 * returns the current list of listener limit configuration. 64 */ 65 /* ARGSUSED */ 66 static int 67 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname, 68 void *val, uint_t psize, uint_t flags) 69 { 70 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 71 tcp_listener_t *tl; 72 char *pval = val; 73 size_t nbytes = 0, tbytes = 0; 74 uint_t size; 75 int err = 0; 76 77 bzero(pval, psize); 78 size = psize; 79 80 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE)) 81 return (0); 82 83 mutex_enter(&tcps->tcps_listener_conf_lock); 84 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 85 tl = list_next(&tcps->tcps_listener_conf, tl)) { 86 if (psize == size) 87 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port, 88 tl->tl_ratio); 89 else 90 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port, 91 tl->tl_ratio); 92 size -= nbytes; 93 pval += nbytes; 94 tbytes += nbytes; 95 if (tbytes >= psize) { 96 /* Buffer overflow, stop copying information */ 97 err = ENOBUFS; 98 break; 99 } 100 } 101 102 mutex_exit(&tcps->tcps_listener_conf_lock); 103 return (err); 104 } 105 106 /* 107 * add a new listener limit configuration. 108 */ 109 /* ARGSUSED */ 110 static int 111 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 112 const char *ifname, const void* pval, uint_t flags) 113 { 114 tcp_listener_t *new_tl; 115 tcp_listener_t *tl; 116 long lport; 117 long ratio; 118 char *colon; 119 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 120 121 if (flags & MOD_PROP_DEFAULT) 122 return (ENOTSUP); 123 124 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 || 125 lport > USHRT_MAX || *colon != ':') { 126 return (EINVAL); 127 } 128 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0) 129 return (EINVAL); 130 131 mutex_enter(&tcps->tcps_listener_conf_lock); 132 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 133 tl = list_next(&tcps->tcps_listener_conf, tl)) { 134 /* There is an existing entry, so update its ratio value. */ 135 if (tl->tl_port == lport) { 136 tl->tl_ratio = ratio; 137 mutex_exit(&tcps->tcps_listener_conf_lock); 138 return (0); 139 } 140 } 141 142 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) == 143 NULL) { 144 mutex_exit(&tcps->tcps_listener_conf_lock); 145 return (ENOMEM); 146 } 147 148 new_tl->tl_port = lport; 149 new_tl->tl_ratio = ratio; 150 list_insert_tail(&tcps->tcps_listener_conf, new_tl); 151 mutex_exit(&tcps->tcps_listener_conf_lock); 152 return (0); 153 } 154 155 /* 156 * remove a listener limit configuration. 157 */ 158 /* ARGSUSED */ 159 static int 160 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 161 const char *ifname, const void* pval, uint_t flags) 162 { 163 tcp_listener_t *tl; 164 long lport; 165 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 166 167 if (flags & MOD_PROP_DEFAULT) 168 return (ENOTSUP); 169 170 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 || 171 lport > USHRT_MAX) { 172 return (EINVAL); 173 } 174 mutex_enter(&tcps->tcps_listener_conf_lock); 175 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 176 tl = list_next(&tcps->tcps_listener_conf, tl)) { 177 if (tl->tl_port == lport) { 178 list_remove(&tcps->tcps_listener_conf, tl); 179 mutex_exit(&tcps->tcps_listener_conf_lock); 180 kmem_free(tl, sizeof (tcp_listener_t)); 181 return (0); 182 } 183 } 184 mutex_exit(&tcps->tcps_listener_conf_lock); 185 return (ESRCH); 186 } 187 188 /* 189 * All of these are alterable, within the min/max values given, at run time. 190 * 191 * Note: All those tunables which do not start with "tcp_" are Committed and 192 * therefore are public. See PSARC 2009/306. 193 */ 194 mod_prop_info_t tcp_propinfo_tbl[] = { 195 /* tunable - 0 */ 196 { "tcp_time_wait_interval", MOD_PROTO_TCP, 197 mod_set_uint32, mod_get_uint32, 198 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} }, 199 200 { "tcp_conn_req_max_q", MOD_PROTO_TCP, 201 mod_set_uint32, mod_get_uint32, 202 {1, UINT32_MAX, 128}, {128} }, 203 204 { "tcp_conn_req_max_q0", MOD_PROTO_TCP, 205 mod_set_uint32, mod_get_uint32, 206 {0, UINT32_MAX, 1024}, {1024} }, 207 208 { "tcp_conn_req_min", MOD_PROTO_TCP, 209 mod_set_uint32, mod_get_uint32, 210 {1, 1024, 1}, {1} }, 211 212 { "tcp_conn_grace_period", MOD_PROTO_TCP, 213 mod_set_uint32, mod_get_uint32, 214 {0*MS, 20*SECONDS, 0*MS}, {0*MS} }, 215 216 { "tcp_cwnd_max", MOD_PROTO_TCP, 217 mod_set_uint32, mod_get_uint32, 218 {128, (1<<30), 1024*1024}, {1024*1024} }, 219 220 { "tcp_debug", MOD_PROTO_TCP, 221 mod_set_uint32, mod_get_uint32, 222 {0, 10, 0}, {0} }, 223 224 { "smallest_nonpriv_port", MOD_PROTO_TCP, 225 mod_set_uint32, mod_get_uint32, 226 {1024, (32*1024), 1024}, {1024} }, 227 228 { "tcp_ip_abort_cinterval", MOD_PROTO_TCP, 229 mod_set_uint32, mod_get_uint32, 230 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 231 232 { "tcp_ip_abort_linterval", MOD_PROTO_TCP, 233 mod_set_uint32, mod_get_uint32, 234 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 235 236 /* tunable - 10 */ 237 { "tcp_ip_abort_interval", MOD_PROTO_TCP, 238 mod_set_uint32, mod_get_uint32, 239 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} }, 240 241 { "tcp_ip_notify_cinterval", MOD_PROTO_TCP, 242 mod_set_uint32, mod_get_uint32, 243 {1*SECONDS, UINT32_MAX, 10*SECONDS}, 244 {10*SECONDS} }, 245 246 { "tcp_ip_notify_interval", MOD_PROTO_TCP, 247 mod_set_uint32, mod_get_uint32, 248 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} }, 249 250 { "tcp_ipv4_ttl", MOD_PROTO_TCP, 251 mod_set_uint32, mod_get_uint32, 252 {1, 255, 64}, {64} }, 253 254 { "tcp_keepalive_interval", MOD_PROTO_TCP, 255 mod_set_uint32, mod_get_uint32, 256 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} }, 257 258 { "tcp_maxpsz_multiplier", MOD_PROTO_TCP, 259 mod_set_uint32, mod_get_uint32, 260 {0, 100, 10}, {10} }, 261 262 { "tcp_mss_def_ipv4", MOD_PROTO_TCP, 263 mod_set_uint32, mod_get_uint32, 264 {1, TCP_MSS_MAX_IPV4, 536}, {536} }, 265 266 { "tcp_mss_max_ipv4", MOD_PROTO_TCP, 267 mod_set_uint32, mod_get_uint32, 268 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4}, 269 {TCP_MSS_MAX_IPV4} }, 270 271 { "tcp_mss_min", MOD_PROTO_TCP, 272 mod_set_uint32, mod_get_uint32, 273 {1, TCP_MSS_MAX, 108}, {108} }, 274 275 { "tcp_naglim_def", MOD_PROTO_TCP, 276 mod_set_uint32, mod_get_uint32, 277 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} }, 278 279 /* tunable - 20 */ 280 { "tcp_rexmit_interval_initial", MOD_PROTO_TCP, 281 mod_set_uint32, mod_get_uint32, 282 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} }, 283 284 { "tcp_rexmit_interval_max", MOD_PROTO_TCP, 285 mod_set_uint32, mod_get_uint32, 286 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} }, 287 288 { "tcp_rexmit_interval_min", MOD_PROTO_TCP, 289 mod_set_uint32, mod_get_uint32, 290 {1*MS, 2*HOURS, 400*MS}, {400*MS} }, 291 292 { "tcp_deferred_ack_interval", MOD_PROTO_TCP, 293 mod_set_uint32, mod_get_uint32, 294 {1*MS, 1*MINUTES, 100*MS}, {100*MS} }, 295 296 { "tcp_snd_lowat_fraction", MOD_PROTO_TCP, 297 mod_set_uint32, mod_get_uint32, 298 {0, 16, 0}, {0} }, 299 300 { "tcp_dupack_fast_retransmit", MOD_PROTO_TCP, 301 mod_set_uint32, mod_get_uint32, 302 {1, 10000, 3}, {3} }, 303 304 { "tcp_ignore_path_mtu", MOD_PROTO_TCP, 305 mod_set_boolean, mod_get_boolean, 306 {B_FALSE}, {B_FALSE} }, 307 308 { "smallest_anon_port", MOD_PROTO_TCP, 309 mod_set_uint32, mod_get_uint32, 310 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} }, 311 312 { "largest_anon_port", MOD_PROTO_TCP, 313 mod_set_uint32, mod_get_uint32, 314 {1024, ULP_MAX_PORT, ULP_MAX_PORT}, 315 {ULP_MAX_PORT} }, 316 317 { "send_maxbuf", MOD_PROTO_TCP, 318 mod_set_uint32, mod_get_uint32, 319 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER}, 320 {TCP_XMIT_HIWATER} }, 321 322 /* tunable - 30 */ 323 { "tcp_xmit_lowat", MOD_PROTO_TCP, 324 mod_set_uint32, mod_get_uint32, 325 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER}, 326 {TCP_XMIT_LOWATER} }, 327 328 { "recv_maxbuf", MOD_PROTO_TCP, 329 mod_set_uint32, mod_get_uint32, 330 {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER}, 331 {TCP_RECV_HIWATER} }, 332 333 { "tcp_recv_hiwat_minmss", MOD_PROTO_TCP, 334 mod_set_uint32, mod_get_uint32, 335 {1, 65536, 4}, {4} }, 336 337 { "tcp_fin_wait_2_flush_interval", MOD_PROTO_TCP, 338 mod_set_uint32, mod_get_uint32, 339 {1*SECONDS, 2*HOURS, 60*SECONDS}, 340 {60*SECONDS} }, 341 342 { "tcp_max_buf", MOD_PROTO_TCP, 343 mod_set_uint32, mod_get_uint32, 344 {8192, (1<<30), 1024*1024}, {1024*1024} }, 345 346 /* 347 * Question: What default value should I set for tcp_strong_iss? 348 */ 349 { "tcp_strong_iss", MOD_PROTO_TCP, 350 mod_set_uint32, mod_get_uint32, 351 {0, 2, 1}, {1} }, 352 353 { "tcp_rtt_updates", MOD_PROTO_TCP, 354 mod_set_uint32, mod_get_uint32, 355 {0, 65536, 20}, {20} }, 356 357 { "tcp_wscale_always", MOD_PROTO_TCP, 358 mod_set_boolean, mod_get_boolean, 359 {B_TRUE}, {B_TRUE} }, 360 361 { "tcp_tstamp_always", MOD_PROTO_TCP, 362 mod_set_boolean, mod_get_boolean, 363 {B_FALSE}, {B_FALSE} }, 364 365 { "tcp_tstamp_if_wscale", MOD_PROTO_TCP, 366 mod_set_boolean, mod_get_boolean, 367 {B_TRUE}, {B_TRUE} }, 368 369 /* tunable - 40 */ 370 { "tcp_rexmit_interval_extra", MOD_PROTO_TCP, 371 mod_set_uint32, mod_get_uint32, 372 {0*MS, 2*HOURS, 0*MS}, {0*MS} }, 373 374 { "tcp_deferred_acks_max", MOD_PROTO_TCP, 375 mod_set_uint32, mod_get_uint32, 376 {0, 16, 2}, {2} }, 377 378 { "tcp_slow_start_after_idle", MOD_PROTO_TCP, 379 mod_set_uint32, mod_get_uint32, 380 {1, 16384, 4}, {4} }, 381 382 { "tcp_slow_start_initial", MOD_PROTO_TCP, 383 mod_set_uint32, mod_get_uint32, 384 {1, 4, 4}, {4} }, 385 386 { "sack", MOD_PROTO_TCP, 387 mod_set_uint32, mod_get_uint32, 388 {0, 2, 2}, {2} }, 389 390 { "tcp_ipv6_hoplimit", MOD_PROTO_TCP, 391 mod_set_uint32, mod_get_uint32, 392 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS}, 393 {IPV6_DEFAULT_HOPS} }, 394 395 { "tcp_mss_def_ipv6", MOD_PROTO_TCP, 396 mod_set_uint32, mod_get_uint32, 397 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} }, 398 399 { "tcp_mss_max_ipv6", MOD_PROTO_TCP, 400 mod_set_uint32, mod_get_uint32, 401 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6}, 402 {TCP_MSS_MAX_IPV6} }, 403 404 { "tcp_rev_src_routes", MOD_PROTO_TCP, 405 mod_set_boolean, mod_get_boolean, 406 {B_FALSE}, {B_FALSE} }, 407 408 { "tcp_local_dack_interval", MOD_PROTO_TCP, 409 mod_set_uint32, mod_get_uint32, 410 {10*MS, 500*MS, 50*MS}, {50*MS} }, 411 412 /* tunable - 50 */ 413 { "tcp_local_dacks_max", MOD_PROTO_TCP, 414 mod_set_uint32, mod_get_uint32, 415 {0, 16, 8}, {8} }, 416 417 { "ecn", MOD_PROTO_TCP, 418 mod_set_uint32, mod_get_uint32, 419 {0, 2, 1}, {1} }, 420 421 { "tcp_rst_sent_rate_enabled", MOD_PROTO_TCP, 422 mod_set_boolean, mod_get_boolean, 423 {B_TRUE}, {B_TRUE} }, 424 425 { "tcp_rst_sent_rate", MOD_PROTO_TCP, 426 mod_set_uint32, mod_get_uint32, 427 {0, UINT32_MAX, 40}, {40} }, 428 429 { "tcp_push_timer_interval", MOD_PROTO_TCP, 430 mod_set_uint32, mod_get_uint32, 431 {0, 100*MS, 50*MS}, {50*MS} }, 432 433 { "tcp_use_smss_as_mss_opt", MOD_PROTO_TCP, 434 mod_set_boolean, mod_get_boolean, 435 {B_FALSE}, {B_FALSE} }, 436 437 { "tcp_keepalive_abort_interval", MOD_PROTO_TCP, 438 mod_set_uint32, mod_get_uint32, 439 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} }, 440 441 /* 442 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link 443 * layer header. It has to be a multiple of 8. 444 */ 445 { "tcp_wroff_xtra", MOD_PROTO_TCP, 446 mod_set_aligned, mod_get_uint32, 447 {0, 256, 32}, {32} }, 448 449 { "tcp_dev_flow_ctl", MOD_PROTO_TCP, 450 mod_set_boolean, mod_get_boolean, 451 {B_FALSE}, {B_FALSE} }, 452 453 { "tcp_reass_timeout", MOD_PROTO_TCP, 454 mod_set_uint32, mod_get_uint32, 455 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} }, 456 457 /* tunable - 60 */ 458 { "extra_priv_ports", MOD_PROTO_TCP, 459 mod_set_extra_privports, mod_get_extra_privports, 460 {1, ULP_MAX_PORT, 0}, {0} }, 461 462 { "tcp_1948_phrase", MOD_PROTO_TCP, 463 tcp_set_1948phrase, NULL, {0}, {0} }, 464 465 { "tcp_listener_limit_conf", MOD_PROTO_TCP, 466 NULL, tcp_listener_conf_get, {0}, {0} }, 467 468 { "tcp_listener_limit_conf_add", MOD_PROTO_TCP, 469 tcp_listener_conf_add, NULL, {0}, {0} }, 470 471 { "tcp_listener_limit_conf_del", MOD_PROTO_TCP, 472 tcp_listener_conf_del, NULL, {0}, {0} }, 473 474 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} }, 475 476 { NULL, 0, NULL, NULL, {0}, {0} } 477 }; 478 479 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl); 480