1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <inet/ip.h> 27 #include <inet/tcp_impl.h> 28 #include <sys/multidata.h> 29 #include <sys/sunddi.h> 30 31 /* Max size IP datagram is 64k - 1 */ 32 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t))) 33 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t))) 34 35 /* Max of the above */ 36 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4 37 38 #define TCP_XMIT_LOWATER 4096 39 #define TCP_XMIT_HIWATER 49152 40 #define TCP_RECV_LOWATER 2048 41 #define TCP_RECV_HIWATER 128000 42 43 /* 44 * Set the RFC 1948 pass phrase 45 */ 46 /* ARGSUSED */ 47 static int 48 tcp_set_1948phrase(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 49 const char *ifname, const void* pr_val, uint_t flags) 50 { 51 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 52 53 if (flags & MOD_PROP_DEFAULT) 54 return (ENOTSUP); 55 56 /* 57 * Basically, value contains a new pass phrase. Pass it along! 58 */ 59 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps); 60 return (0); 61 } 62 63 /* 64 * returns the current list of listener limit configuration. 65 */ 66 /* ARGSUSED */ 67 static int 68 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname, 69 void *val, uint_t psize, uint_t flags) 70 { 71 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 72 tcp_listener_t *tl; 73 char *pval = val; 74 size_t nbytes = 0, tbytes = 0; 75 uint_t size; 76 int err = 0; 77 78 bzero(pval, psize); 79 size = psize; 80 81 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE)) 82 return (0); 83 84 mutex_enter(&tcps->tcps_listener_conf_lock); 85 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 86 tl = list_next(&tcps->tcps_listener_conf, tl)) { 87 if (psize == size) 88 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port, 89 tl->tl_ratio); 90 else 91 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port, 92 tl->tl_ratio); 93 size -= nbytes; 94 pval += nbytes; 95 tbytes += nbytes; 96 if (tbytes >= psize) { 97 /* Buffer overflow, stop copying information */ 98 err = ENOBUFS; 99 break; 100 } 101 } 102 ret: 103 mutex_exit(&tcps->tcps_listener_conf_lock); 104 return (err); 105 } 106 107 /* 108 * add a new listener limit configuration. 109 */ 110 /* ARGSUSED */ 111 static int 112 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 113 const char *ifname, const void* pval, uint_t flags) 114 { 115 tcp_listener_t *new_tl; 116 tcp_listener_t *tl; 117 long lport; 118 long ratio; 119 char *colon; 120 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 121 122 if (flags & MOD_PROP_DEFAULT) 123 return (ENOTSUP); 124 125 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 || 126 lport > USHRT_MAX || *colon != ':') { 127 return (EINVAL); 128 } 129 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0) 130 return (EINVAL); 131 132 mutex_enter(&tcps->tcps_listener_conf_lock); 133 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 134 tl = list_next(&tcps->tcps_listener_conf, tl)) { 135 /* There is an existing entry, so update its ratio value. */ 136 if (tl->tl_port == lport) { 137 tl->tl_ratio = ratio; 138 mutex_exit(&tcps->tcps_listener_conf_lock); 139 return (0); 140 } 141 } 142 143 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) == 144 NULL) { 145 mutex_exit(&tcps->tcps_listener_conf_lock); 146 return (ENOMEM); 147 } 148 149 new_tl->tl_port = lport; 150 new_tl->tl_ratio = ratio; 151 list_insert_tail(&tcps->tcps_listener_conf, new_tl); 152 mutex_exit(&tcps->tcps_listener_conf_lock); 153 return (0); 154 } 155 156 /* 157 * remove a listener limit configuration. 158 */ 159 /* ARGSUSED */ 160 static int 161 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 162 const char *ifname, const void* pval, uint_t flags) 163 { 164 tcp_listener_t *tl; 165 long lport; 166 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 167 168 if (flags & MOD_PROP_DEFAULT) 169 return (ENOTSUP); 170 171 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 || 172 lport > USHRT_MAX) { 173 return (EINVAL); 174 } 175 mutex_enter(&tcps->tcps_listener_conf_lock); 176 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 177 tl = list_next(&tcps->tcps_listener_conf, tl)) { 178 if (tl->tl_port == lport) { 179 list_remove(&tcps->tcps_listener_conf, tl); 180 mutex_exit(&tcps->tcps_listener_conf_lock); 181 kmem_free(tl, sizeof (tcp_listener_t)); 182 return (0); 183 } 184 } 185 mutex_exit(&tcps->tcps_listener_conf_lock); 186 return (ESRCH); 187 } 188 189 /* 190 * All of these are alterable, within the min/max values given, at run time. 191 * 192 * Note: All those tunables which do not start with "tcp_" are Committed and 193 * therefore are public. See PSARC 2009/306. 194 */ 195 mod_prop_info_t tcp_propinfo_tbl[] = { 196 /* tunable - 0 */ 197 { "tcp_time_wait_interval", MOD_PROTO_TCP, 198 mod_set_uint32, mod_get_uint32, 199 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} }, 200 201 { "tcp_conn_req_max_q", MOD_PROTO_TCP, 202 mod_set_uint32, mod_get_uint32, 203 {1, UINT32_MAX, 128}, {128} }, 204 205 { "tcp_conn_req_max_q0", MOD_PROTO_TCP, 206 mod_set_uint32, mod_get_uint32, 207 {0, UINT32_MAX, 1024}, {1024} }, 208 209 { "tcp_conn_req_min", MOD_PROTO_TCP, 210 mod_set_uint32, mod_get_uint32, 211 {1, 1024, 1}, {1} }, 212 213 { "tcp_conn_grace_period", MOD_PROTO_TCP, 214 mod_set_uint32, mod_get_uint32, 215 {0*MS, 20*SECONDS, 0*MS}, {0*MS} }, 216 217 { "tcp_cwnd_max", MOD_PROTO_TCP, 218 mod_set_uint32, mod_get_uint32, 219 {128, (1<<30), 1024*1024}, {1024*1024} }, 220 221 { "tcp_debug", MOD_PROTO_TCP, 222 mod_set_uint32, mod_get_uint32, 223 {0, 10, 0}, {0} }, 224 225 { "smallest_nonpriv_port", MOD_PROTO_TCP, 226 mod_set_uint32, mod_get_uint32, 227 {1024, (32*1024), 1024}, {1024} }, 228 229 { "tcp_ip_abort_cinterval", MOD_PROTO_TCP, 230 mod_set_uint32, mod_get_uint32, 231 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 232 233 { "tcp_ip_abort_linterval", MOD_PROTO_TCP, 234 mod_set_uint32, mod_get_uint32, 235 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 236 237 /* tunable - 10 */ 238 { "tcp_ip_abort_interval", MOD_PROTO_TCP, 239 mod_set_uint32, mod_get_uint32, 240 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} }, 241 242 { "tcp_ip_notify_cinterval", MOD_PROTO_TCP, 243 mod_set_uint32, mod_get_uint32, 244 {1*SECONDS, UINT32_MAX, 10*SECONDS}, 245 {10*SECONDS} }, 246 247 { "tcp_ip_notify_interval", MOD_PROTO_TCP, 248 mod_set_uint32, mod_get_uint32, 249 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} }, 250 251 { "tcp_ipv4_ttl", MOD_PROTO_TCP, 252 mod_set_uint32, mod_get_uint32, 253 {1, 255, 64}, {64} }, 254 255 { "tcp_keepalive_interval", MOD_PROTO_TCP, 256 mod_set_uint32, mod_get_uint32, 257 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} }, 258 259 { "tcp_maxpsz_multiplier", MOD_PROTO_TCP, 260 mod_set_uint32, mod_get_uint32, 261 {0, 100, 10}, {10} }, 262 263 { "tcp_mss_def_ipv4", MOD_PROTO_TCP, 264 mod_set_uint32, mod_get_uint32, 265 {1, TCP_MSS_MAX_IPV4, 536}, {536} }, 266 267 { "tcp_mss_max_ipv4", MOD_PROTO_TCP, 268 mod_set_uint32, mod_get_uint32, 269 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4}, 270 {TCP_MSS_MAX_IPV4} }, 271 272 { "tcp_mss_min", MOD_PROTO_TCP, 273 mod_set_uint32, mod_get_uint32, 274 {1, TCP_MSS_MAX, 108}, {108} }, 275 276 { "tcp_naglim_def", MOD_PROTO_TCP, 277 mod_set_uint32, mod_get_uint32, 278 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} }, 279 280 /* tunable - 20 */ 281 { "tcp_rexmit_interval_initial", MOD_PROTO_TCP, 282 mod_set_uint32, mod_get_uint32, 283 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} }, 284 285 { "tcp_rexmit_interval_max", MOD_PROTO_TCP, 286 mod_set_uint32, mod_get_uint32, 287 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} }, 288 289 { "tcp_rexmit_interval_min", MOD_PROTO_TCP, 290 mod_set_uint32, mod_get_uint32, 291 {1*MS, 2*HOURS, 400*MS}, {400*MS} }, 292 293 { "tcp_deferred_ack_interval", MOD_PROTO_TCP, 294 mod_set_uint32, mod_get_uint32, 295 {1*MS, 1*MINUTES, 100*MS}, {100*MS} }, 296 297 { "tcp_snd_lowat_fraction", MOD_PROTO_TCP, 298 mod_set_uint32, mod_get_uint32, 299 {0, 16, 0}, {0} }, 300 301 { "tcp_dupack_fast_retransmit", MOD_PROTO_TCP, 302 mod_set_uint32, mod_get_uint32, 303 {1, 10000, 3}, {3} }, 304 305 { "tcp_ignore_path_mtu", MOD_PROTO_TCP, 306 mod_set_boolean, mod_get_boolean, 307 {B_FALSE}, {B_FALSE} }, 308 309 { "smallest_anon_port", MOD_PROTO_TCP, 310 mod_set_uint32, mod_get_uint32, 311 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} }, 312 313 { "largest_anon_port", MOD_PROTO_TCP, 314 mod_set_uint32, mod_get_uint32, 315 {1024, ULP_MAX_PORT, ULP_MAX_PORT}, 316 {ULP_MAX_PORT} }, 317 318 { "send_maxbuf", MOD_PROTO_TCP, 319 mod_set_uint32, mod_get_uint32, 320 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER}, 321 {TCP_XMIT_HIWATER} }, 322 323 /* tunable - 30 */ 324 { "tcp_xmit_lowat", MOD_PROTO_TCP, 325 mod_set_uint32, mod_get_uint32, 326 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER}, 327 {TCP_XMIT_LOWATER} }, 328 329 { "recv_maxbuf", MOD_PROTO_TCP, 330 mod_set_uint32, mod_get_uint32, 331 {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER}, 332 {TCP_RECV_HIWATER} }, 333 334 { "tcp_recv_hiwat_minmss", MOD_PROTO_TCP, 335 mod_set_uint32, mod_get_uint32, 336 {1, 65536, 4}, {4} }, 337 338 { "tcp_fin_wait_2_flush_interval", MOD_PROTO_TCP, 339 mod_set_uint32, mod_get_uint32, 340 {1*SECONDS, UINT32_MAX, 675*SECONDS}, 341 {675*SECONDS} }, 342 343 { "tcp_max_buf", MOD_PROTO_TCP, 344 mod_set_uint32, mod_get_uint32, 345 {8192, (1<<30), 1024*1024}, {1024*1024} }, 346 347 /* 348 * Question: What default value should I set for tcp_strong_iss? 349 */ 350 { "tcp_strong_iss", MOD_PROTO_TCP, 351 mod_set_uint32, mod_get_uint32, 352 {0, 2, 1}, {1} }, 353 354 { "tcp_rtt_updates", MOD_PROTO_TCP, 355 mod_set_uint32, mod_get_uint32, 356 {0, 65536, 20}, {20} }, 357 358 { "tcp_wscale_always", MOD_PROTO_TCP, 359 mod_set_boolean, mod_get_boolean, 360 {B_TRUE}, {B_TRUE} }, 361 362 { "tcp_tstamp_always", MOD_PROTO_TCP, 363 mod_set_boolean, mod_get_boolean, 364 {B_FALSE}, {B_FALSE} }, 365 366 { "tcp_tstamp_if_wscale", MOD_PROTO_TCP, 367 mod_set_boolean, mod_get_boolean, 368 {B_TRUE}, {B_TRUE} }, 369 370 /* tunable - 40 */ 371 { "tcp_rexmit_interval_extra", MOD_PROTO_TCP, 372 mod_set_uint32, mod_get_uint32, 373 {0*MS, 2*HOURS, 0*MS}, {0*MS} }, 374 375 { "tcp_deferred_acks_max", MOD_PROTO_TCP, 376 mod_set_uint32, mod_get_uint32, 377 {0, 16, 2}, {2} }, 378 379 { "tcp_slow_start_after_idle", MOD_PROTO_TCP, 380 mod_set_uint32, mod_get_uint32, 381 {1, 16384, 4}, {4} }, 382 383 { "tcp_slow_start_initial", MOD_PROTO_TCP, 384 mod_set_uint32, mod_get_uint32, 385 {1, 4, 4}, {4} }, 386 387 { "sack", MOD_PROTO_TCP, 388 mod_set_uint32, mod_get_uint32, 389 {0, 2, 2}, {2} }, 390 391 { "tcp_ipv6_hoplimit", MOD_PROTO_TCP, 392 mod_set_uint32, mod_get_uint32, 393 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS}, 394 {IPV6_DEFAULT_HOPS} }, 395 396 { "tcp_mss_def_ipv6", MOD_PROTO_TCP, 397 mod_set_uint32, mod_get_uint32, 398 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} }, 399 400 { "tcp_mss_max_ipv6", MOD_PROTO_TCP, 401 mod_set_uint32, mod_get_uint32, 402 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6}, 403 {TCP_MSS_MAX_IPV6} }, 404 405 { "tcp_rev_src_routes", MOD_PROTO_TCP, 406 mod_set_boolean, mod_get_boolean, 407 {B_FALSE}, {B_FALSE} }, 408 409 { "tcp_local_dack_interval", MOD_PROTO_TCP, 410 mod_set_uint32, mod_get_uint32, 411 {10*MS, 500*MS, 50*MS}, {50*MS} }, 412 413 /* tunable - 50 */ 414 { "tcp_local_dacks_max", MOD_PROTO_TCP, 415 mod_set_uint32, mod_get_uint32, 416 {0, 16, 8}, {8} }, 417 418 { "ecn", MOD_PROTO_TCP, 419 mod_set_uint32, mod_get_uint32, 420 {0, 2, 1}, {1} }, 421 422 { "tcp_rst_sent_rate_enabled", MOD_PROTO_TCP, 423 mod_set_boolean, mod_get_boolean, 424 {B_TRUE}, {B_TRUE} }, 425 426 { "tcp_rst_sent_rate", MOD_PROTO_TCP, 427 mod_set_uint32, mod_get_uint32, 428 {0, UINT32_MAX, 40}, {40} }, 429 430 { "tcp_push_timer_interval", MOD_PROTO_TCP, 431 mod_set_uint32, mod_get_uint32, 432 {0, 100*MS, 50*MS}, {50*MS} }, 433 434 { "tcp_use_smss_as_mss_opt", MOD_PROTO_TCP, 435 mod_set_boolean, mod_get_boolean, 436 {B_FALSE}, {B_FALSE} }, 437 438 { "tcp_keepalive_abort_interval", MOD_PROTO_TCP, 439 mod_set_uint32, mod_get_uint32, 440 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} }, 441 442 /* 443 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link 444 * layer header. It has to be a multiple of 8. 445 */ 446 { "tcp_wroff_xtra", MOD_PROTO_TCP, 447 mod_set_aligned, mod_get_uint32, 448 {0, 256, 32}, {32} }, 449 450 { "tcp_dev_flow_ctl", MOD_PROTO_TCP, 451 mod_set_boolean, mod_get_boolean, 452 {B_FALSE}, {B_FALSE} }, 453 454 { "tcp_reass_timeout", MOD_PROTO_TCP, 455 mod_set_uint32, mod_get_uint32, 456 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} }, 457 458 /* tunable - 60 */ 459 { "extra_priv_ports", MOD_PROTO_TCP, 460 mod_set_extra_privports, mod_get_extra_privports, 461 {1, ULP_MAX_PORT, 0}, {0} }, 462 463 { "tcp_1948_phrase", MOD_PROTO_TCP, 464 tcp_set_1948phrase, NULL, {0}, {0} }, 465 466 { "tcp_listener_limit_conf", MOD_PROTO_TCP, 467 NULL, tcp_listener_conf_get, {0}, {0} }, 468 469 { "tcp_listener_limit_conf_add", MOD_PROTO_TCP, 470 tcp_listener_conf_add, NULL, {0}, {0} }, 471 472 { "tcp_listener_limit_conf_del", MOD_PROTO_TCP, 473 tcp_listener_conf_del, NULL, {0}, {0} }, 474 475 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} }, 476 477 { NULL, 0, NULL, NULL, {0}, {0} } 478 }; 479 480 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl); 481