1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/types.h> 26 #include <sys/stream.h> 27 #define _SUN_TPI_VERSION 2 28 #include <sys/tihdr.h> 29 #include <sys/socket.h> 30 #include <sys/xti_xtiopt.h> 31 #include <sys/xti_inet.h> 32 #include <sys/policy.h> 33 34 #include <inet/common.h> 35 #include <netinet/ip6.h> 36 #include <inet/ip.h> 37 38 #include <netinet/in.h> 39 #include <netinet/tcp.h> 40 #include <inet/optcom.h> 41 #include <inet/proto_set.h> 42 #include <inet/tcp_impl.h> 43 44 /* 45 * Table of all known options handled on a TCP protocol stack. 46 * 47 * Note: This table contains options processed by both TCP and IP levels 48 * and is the superset of options that can be performed on a TCP over IP 49 * stack. 50 */ 51 opdes_t tcp_opt_arr[] = { 52 53 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, 54 sizeof (struct linger), 0 }, 55 56 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 57 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 58 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 59 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 60 }, 61 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 62 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 63 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 64 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 }, 65 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 66 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 67 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, 68 sizeof (struct timeval), 0 }, 69 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, 70 sizeof (struct timeval), 0 }, 71 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 72 }, 73 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 74 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 75 0 }, 76 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 77 0 }, 78 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 79 0 }, 80 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int), 81 0 }, 82 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 83 84 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 }, 85 86 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 }, 87 88 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 89 }, 90 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t), 91 536 }, 92 93 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 94 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 95 96 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 97 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 98 99 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 100 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 101 102 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 103 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 104 105 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 106 0 }, 107 108 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0, 109 sizeof (int), 0 }, 110 111 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 112 }, 113 114 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0, 115 sizeof (int), 0 }, 116 117 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, 118 sizeof (int), 0 }, 119 120 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, 121 sizeof (int), 0 }, 122 123 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 124 125 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 126 (OP_VARLEN|OP_NODEFAULT), 127 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ }, 128 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 129 (OP_VARLEN|OP_NODEFAULT), 130 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ }, 131 132 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 133 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 134 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN, 135 sizeof (int), -1 /* not initialized */ }, 136 137 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT, 138 sizeof (ipsec_req_t), -1 /* not initialized */ }, 139 140 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, 141 sizeof (int), 0 /* no ifindex */ }, 142 143 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0, 144 sizeof (int), 0 }, 145 146 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN, 147 sizeof (int), -1 /* not initialized */ }, 148 149 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 150 sizeof (int), 0 /* no ifindex */ }, 151 152 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 153 154 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0, 155 sizeof (in_addr_t), -1 /* not initialized */ }, 156 157 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0, 158 sizeof (int), 0 }, 159 160 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 161 (OP_NODEFAULT|OP_VARLEN), 162 sizeof (struct in6_pktinfo), -1 /* not initialized */ }, 163 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 164 OP_NODEFAULT, 165 sizeof (sin6_t), -1 /* not initialized */ }, 166 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 167 (OP_VARLEN|OP_NODEFAULT), 255*8, 168 -1 /* not initialized */ }, 169 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 170 (OP_VARLEN|OP_NODEFAULT), 255*8, 171 -1 /* not initialized */ }, 172 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 173 (OP_VARLEN|OP_NODEFAULT), 255*8, 174 -1 /* not initialized */ }, 175 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 176 (OP_VARLEN|OP_NODEFAULT), 255*8, 177 -1 /* not initialized */ }, 178 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 179 OP_NODEFAULT, 180 sizeof (int), -1 /* not initialized */ }, 181 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 182 OP_NODEFAULT, 183 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ }, 184 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 185 sizeof (int), 0 }, 186 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 187 sizeof (int), 0 }, 188 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 189 sizeof (int), 0 }, 190 191 /* Enable receipt of ancillary data */ 192 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 193 sizeof (int), 0 }, 194 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 195 sizeof (int), 0 }, 196 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 197 sizeof (int), 0 }, 198 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 199 sizeof (int), 0 }, 200 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 201 sizeof (int), 0 }, 202 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 203 sizeof (int), 0 }, 204 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 205 sizeof (int), 0 }, 206 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 207 sizeof (int), 0 }, 208 209 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT, 210 sizeof (ipsec_req_t), -1 /* not initialized */ }, 211 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 212 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT }, 213 }; 214 215 /* 216 * Table of all supported levels 217 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 218 * any supported options so we need this info separately. 219 * 220 * This is needed only for topmost tpi providers and is used only by 221 * XTI interfaces. 222 */ 223 optlevel_t tcp_valid_levels_arr[] = { 224 XTI_GENERIC, 225 SOL_SOCKET, 226 IPPROTO_TCP, 227 IPPROTO_IP, 228 IPPROTO_IPV6 229 }; 230 231 232 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr) 233 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr) 234 235 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */ 236 237 /* 238 * Initialize option database object for TCP 239 * 240 * This object represents database of options to search passed to 241 * {sock,tpi}optcom_req() interface routine to take care of option 242 * management and associated methods. 243 */ 244 245 optdb_obj_t tcp_opt_obj = { 246 tcp_opt_default, /* TCP default value function pointer */ 247 tcp_tpi_opt_get, /* TCP get function pointer */ 248 tcp_tpi_opt_set, /* TCP set function pointer */ 249 TCP_OPT_ARR_CNT, /* TCP option database count of entries */ 250 tcp_opt_arr, /* TCP option database */ 251 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */ 252 tcp_valid_levels_arr /* TCP valid level array */ 253 }; 254 255 /* Maximum TCP initial cwin (start/restart). */ 256 #define TCP_MAX_INIT_CWND 16 257 258 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND; 259 260 /* 261 * Some TCP options can be "set" by requesting them in the option 262 * buffer. This is needed for XTI feature test though we do not 263 * allow it in general. We interpret that this mechanism is more 264 * applicable to OSI protocols and need not be allowed in general. 265 * This routine filters out options for which it is not allowed (most) 266 * and lets through those (few) for which it is. [ The XTI interface 267 * test suite specifics will imply that any XTI_GENERIC level XTI_* if 268 * ever implemented will have to be allowed here ]. 269 */ 270 static boolean_t 271 tcp_allow_connopt_set(int level, int name) 272 { 273 274 switch (level) { 275 case IPPROTO_TCP: 276 switch (name) { 277 case TCP_NODELAY: 278 return (B_TRUE); 279 default: 280 return (B_FALSE); 281 } 282 /*NOTREACHED*/ 283 default: 284 return (B_FALSE); 285 } 286 /*NOTREACHED*/ 287 } 288 289 /* 290 * This routine gets default values of certain options whose default 291 * values are maintained by protocol specific code 292 */ 293 /* ARGSUSED */ 294 int 295 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 296 { 297 int32_t *i1 = (int32_t *)ptr; 298 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; 299 300 switch (level) { 301 case IPPROTO_TCP: 302 switch (name) { 303 case TCP_NOTIFY_THRESHOLD: 304 *i1 = tcps->tcps_ip_notify_interval; 305 break; 306 case TCP_ABORT_THRESHOLD: 307 *i1 = tcps->tcps_ip_abort_interval; 308 break; 309 case TCP_CONN_NOTIFY_THRESHOLD: 310 *i1 = tcps->tcps_ip_notify_cinterval; 311 break; 312 case TCP_CONN_ABORT_THRESHOLD: 313 *i1 = tcps->tcps_ip_abort_cinterval; 314 break; 315 default: 316 return (-1); 317 } 318 break; 319 case IPPROTO_IP: 320 switch (name) { 321 case IP_TTL: 322 *i1 = tcps->tcps_ipv4_ttl; 323 break; 324 default: 325 return (-1); 326 } 327 break; 328 case IPPROTO_IPV6: 329 switch (name) { 330 case IPV6_UNICAST_HOPS: 331 *i1 = tcps->tcps_ipv6_hoplimit; 332 break; 333 default: 334 return (-1); 335 } 336 break; 337 default: 338 return (-1); 339 } 340 return (sizeof (int)); 341 } 342 343 /* 344 * TCP routine to get the values of options. 345 */ 346 int 347 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 348 { 349 int *i1 = (int *)ptr; 350 tcp_t *tcp = connp->conn_tcp; 351 conn_opt_arg_t coas; 352 int retval; 353 354 coas.coa_connp = connp; 355 coas.coa_ixa = connp->conn_ixa; 356 coas.coa_ipp = &connp->conn_xmit_ipp; 357 coas.coa_ancillary = B_FALSE; 358 coas.coa_changed = 0; 359 360 switch (level) { 361 case SOL_SOCKET: 362 switch (name) { 363 case SO_SND_COPYAVOID: 364 *i1 = tcp->tcp_snd_zcopy_on ? 365 SO_SND_COPYAVOID : 0; 366 return (sizeof (int)); 367 case SO_ACCEPTCONN: 368 *i1 = (tcp->tcp_state == TCPS_LISTEN); 369 return (sizeof (int)); 370 } 371 break; 372 case IPPROTO_TCP: 373 switch (name) { 374 case TCP_NODELAY: 375 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0; 376 return (sizeof (int)); 377 case TCP_MAXSEG: 378 *i1 = tcp->tcp_mss; 379 return (sizeof (int)); 380 case TCP_NOTIFY_THRESHOLD: 381 *i1 = (int)tcp->tcp_first_timer_threshold; 382 return (sizeof (int)); 383 case TCP_ABORT_THRESHOLD: 384 *i1 = tcp->tcp_second_timer_threshold; 385 return (sizeof (int)); 386 case TCP_CONN_NOTIFY_THRESHOLD: 387 *i1 = tcp->tcp_first_ctimer_threshold; 388 return (sizeof (int)); 389 case TCP_CONN_ABORT_THRESHOLD: 390 *i1 = tcp->tcp_second_ctimer_threshold; 391 return (sizeof (int)); 392 case TCP_INIT_CWND: 393 *i1 = tcp->tcp_init_cwnd; 394 return (sizeof (int)); 395 case TCP_KEEPALIVE_THRESHOLD: 396 *i1 = tcp->tcp_ka_interval; 397 return (sizeof (int)); 398 case TCP_KEEPALIVE_ABORT_THRESHOLD: 399 *i1 = tcp->tcp_ka_abort_thres; 400 return (sizeof (int)); 401 case TCP_CORK: 402 *i1 = tcp->tcp_cork; 403 return (sizeof (int)); 404 } 405 break; 406 case IPPROTO_IP: 407 if (connp->conn_family != AF_INET) 408 return (-1); 409 switch (name) { 410 case IP_OPTIONS: 411 case T_IP_OPTIONS: 412 /* Caller ensures enough space */ 413 return (ip_opt_get_user(connp, ptr)); 414 default: 415 break; 416 } 417 break; 418 419 case IPPROTO_IPV6: 420 /* 421 * IPPROTO_IPV6 options are only supported for sockets 422 * that are using IPv6 on the wire. 423 */ 424 if (connp->conn_ipversion != IPV6_VERSION) { 425 return (-1); 426 } 427 switch (name) { 428 case IPV6_PATHMTU: 429 if (tcp->tcp_state < TCPS_ESTABLISHED) 430 return (-1); 431 break; 432 } 433 break; 434 } 435 mutex_enter(&connp->conn_lock); 436 retval = conn_opt_get(&coas, level, name, ptr); 437 mutex_exit(&connp->conn_lock); 438 return (retval); 439 } 440 441 /* 442 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements. 443 * Parameters are assumed to be verified by the caller. 444 */ 445 /* ARGSUSED */ 446 int 447 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 448 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 449 void *thisdg_attrs, cred_t *cr) 450 { 451 tcp_t *tcp = connp->conn_tcp; 452 int *i1 = (int *)invalp; 453 boolean_t onoff = (*i1 == 0) ? 0 : 1; 454 boolean_t checkonly; 455 int reterr; 456 tcp_stack_t *tcps = tcp->tcp_tcps; 457 conn_opt_arg_t coas; 458 459 coas.coa_connp = connp; 460 coas.coa_ixa = connp->conn_ixa; 461 coas.coa_ipp = &connp->conn_xmit_ipp; 462 coas.coa_ancillary = B_FALSE; 463 coas.coa_changed = 0; 464 465 switch (optset_context) { 466 case SETFN_OPTCOM_CHECKONLY: 467 checkonly = B_TRUE; 468 /* 469 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 470 * inlen != 0 implies value supplied and 471 * we have to "pretend" to set it. 472 * inlen == 0 implies that there is no 473 * value part in T_CHECK request and just validation 474 * done elsewhere should be enough, we just return here. 475 */ 476 if (inlen == 0) { 477 *outlenp = 0; 478 return (0); 479 } 480 break; 481 case SETFN_OPTCOM_NEGOTIATE: 482 checkonly = B_FALSE; 483 break; 484 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */ 485 case SETFN_CONN_NEGOTIATE: 486 checkonly = B_FALSE; 487 /* 488 * Negotiating local and "association-related" options 489 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ) 490 * primitives is allowed by XTI, but we choose 491 * to not implement this style negotiation for Internet 492 * protocols (We interpret it is a must for OSI world but 493 * optional for Internet protocols) for all options. 494 * [ Will do only for the few options that enable test 495 * suites that our XTI implementation of this feature 496 * works for transports that do allow it ] 497 */ 498 if (!tcp_allow_connopt_set(level, name)) { 499 *outlenp = 0; 500 return (EINVAL); 501 } 502 break; 503 default: 504 /* 505 * We should never get here 506 */ 507 *outlenp = 0; 508 return (EINVAL); 509 } 510 511 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 512 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 513 514 /* 515 * For TCP, we should have no ancillary data sent down 516 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs 517 * has to be zero. 518 */ 519 ASSERT(thisdg_attrs == NULL); 520 521 /* 522 * For fixed length options, no sanity check 523 * of passed in length is done. It is assumed *_optcom_req() 524 * routines do the right thing. 525 */ 526 switch (level) { 527 case SOL_SOCKET: 528 switch (name) { 529 case SO_KEEPALIVE: 530 if (checkonly) { 531 /* check only case */ 532 break; 533 } 534 535 if (!onoff) { 536 if (connp->conn_keepalive) { 537 if (tcp->tcp_ka_tid != 0) { 538 (void) TCP_TIMER_CANCEL(tcp, 539 tcp->tcp_ka_tid); 540 tcp->tcp_ka_tid = 0; 541 } 542 connp->conn_keepalive = 0; 543 } 544 break; 545 } 546 if (!connp->conn_keepalive) { 547 /* Crank up the keepalive timer */ 548 tcp->tcp_ka_last_intrvl = 0; 549 tcp->tcp_ka_tid = TCP_TIMER(tcp, 550 tcp_keepalive_timer, tcp->tcp_ka_interval); 551 connp->conn_keepalive = 1; 552 } 553 break; 554 case SO_SNDBUF: { 555 if (*i1 > tcps->tcps_max_buf) { 556 *outlenp = 0; 557 return (ENOBUFS); 558 } 559 if (checkonly) 560 break; 561 562 connp->conn_sndbuf = *i1; 563 if (tcps->tcps_snd_lowat_fraction != 0) { 564 connp->conn_sndlowat = connp->conn_sndbuf / 565 tcps->tcps_snd_lowat_fraction; 566 } 567 (void) tcp_maxpsz_set(tcp, B_TRUE); 568 /* 569 * If we are flow-controlled, recheck the condition. 570 * There are apps that increase SO_SNDBUF size when 571 * flow-controlled (EWOULDBLOCK), and expect the flow 572 * control condition to be lifted right away. 573 */ 574 mutex_enter(&tcp->tcp_non_sq_lock); 575 if (tcp->tcp_flow_stopped && 576 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) { 577 tcp_clrqfull(tcp); 578 } 579 mutex_exit(&tcp->tcp_non_sq_lock); 580 *outlenp = inlen; 581 return (0); 582 } 583 case SO_RCVBUF: 584 if (*i1 > tcps->tcps_max_buf) { 585 *outlenp = 0; 586 return (ENOBUFS); 587 } 588 /* Silently ignore zero */ 589 if (!checkonly && *i1 != 0) { 590 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss); 591 (void) tcp_rwnd_set(tcp, *i1); 592 } 593 /* 594 * XXX should we return the rwnd here 595 * and tcp_opt_get ? 596 */ 597 *outlenp = inlen; 598 return (0); 599 case SO_SND_COPYAVOID: 600 if (!checkonly) { 601 if (tcp->tcp_loopback || 602 (tcp->tcp_kssl_ctx != NULL) || 603 (onoff != 1) || !tcp_zcopy_check(tcp)) { 604 *outlenp = 0; 605 return (EOPNOTSUPP); 606 } 607 tcp->tcp_snd_zcopy_aware = 1; 608 } 609 *outlenp = inlen; 610 return (0); 611 } 612 break; 613 case IPPROTO_TCP: 614 switch (name) { 615 case TCP_NODELAY: 616 if (!checkonly) 617 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss; 618 break; 619 case TCP_NOTIFY_THRESHOLD: 620 if (!checkonly) 621 tcp->tcp_first_timer_threshold = *i1; 622 break; 623 case TCP_ABORT_THRESHOLD: 624 if (!checkonly) 625 tcp->tcp_second_timer_threshold = *i1; 626 break; 627 case TCP_CONN_NOTIFY_THRESHOLD: 628 if (!checkonly) 629 tcp->tcp_first_ctimer_threshold = *i1; 630 break; 631 case TCP_CONN_ABORT_THRESHOLD: 632 if (!checkonly) 633 tcp->tcp_second_ctimer_threshold = *i1; 634 break; 635 case TCP_RECVDSTADDR: 636 if (tcp->tcp_state > TCPS_LISTEN) { 637 *outlenp = 0; 638 return (EOPNOTSUPP); 639 } 640 /* Setting done in conn_opt_set */ 641 break; 642 case TCP_INIT_CWND: { 643 uint32_t init_cwnd = *((uint32_t *)invalp); 644 645 if (checkonly) 646 break; 647 648 /* 649 * Only allow socket with network configuration 650 * privilege to set the initial cwnd to be larger 651 * than allowed by RFC 3390. 652 */ 653 if (init_cwnd <= MIN(4, MAX(2, 4380 / tcp->tcp_mss))) { 654 tcp->tcp_init_cwnd = init_cwnd; 655 break; 656 } 657 if ((reterr = secpolicy_ip_config(cr, B_TRUE)) != 0) { 658 *outlenp = 0; 659 return (reterr); 660 } 661 if (init_cwnd > tcp_max_init_cwnd) { 662 *outlenp = 0; 663 return (EINVAL); 664 } 665 tcp->tcp_init_cwnd = init_cwnd; 666 break; 667 } 668 case TCP_KEEPALIVE_THRESHOLD: 669 if (checkonly) 670 break; 671 672 if (*i1 < tcps->tcps_keepalive_interval_low || 673 *i1 > tcps->tcps_keepalive_interval_high) { 674 *outlenp = 0; 675 return (EINVAL); 676 } 677 if (*i1 != tcp->tcp_ka_interval) { 678 tcp->tcp_ka_interval = *i1; 679 /* 680 * Check if we need to restart the 681 * keepalive timer. 682 */ 683 if (tcp->tcp_ka_tid != 0) { 684 ASSERT(connp->conn_keepalive); 685 (void) TCP_TIMER_CANCEL(tcp, 686 tcp->tcp_ka_tid); 687 tcp->tcp_ka_last_intrvl = 0; 688 tcp->tcp_ka_tid = TCP_TIMER(tcp, 689 tcp_keepalive_timer, 690 tcp->tcp_ka_interval); 691 } 692 } 693 break; 694 case TCP_KEEPALIVE_ABORT_THRESHOLD: 695 if (!checkonly) { 696 if (*i1 < 697 tcps->tcps_keepalive_abort_interval_low || 698 *i1 > 699 tcps->tcps_keepalive_abort_interval_high) { 700 *outlenp = 0; 701 return (EINVAL); 702 } 703 tcp->tcp_ka_abort_thres = *i1; 704 } 705 break; 706 case TCP_CORK: 707 if (!checkonly) { 708 /* 709 * if tcp->tcp_cork was set and is now 710 * being unset, we have to make sure that 711 * the remaining data gets sent out. Also 712 * unset tcp->tcp_cork so that tcp_wput_data() 713 * can send data even if it is less than mss 714 */ 715 if (tcp->tcp_cork && onoff == 0 && 716 tcp->tcp_unsent > 0) { 717 tcp->tcp_cork = B_FALSE; 718 tcp_wput_data(tcp, NULL, B_FALSE); 719 } 720 tcp->tcp_cork = onoff; 721 } 722 break; 723 default: 724 break; 725 } 726 break; 727 case IPPROTO_IP: 728 if (connp->conn_family != AF_INET) { 729 *outlenp = 0; 730 return (EINVAL); 731 } 732 switch (name) { 733 case IP_SEC_OPT: 734 /* 735 * We should not allow policy setting after 736 * we start listening for connections. 737 */ 738 if (tcp->tcp_state == TCPS_LISTEN) { 739 return (EINVAL); 740 } 741 break; 742 } 743 break; 744 case IPPROTO_IPV6: 745 /* 746 * IPPROTO_IPV6 options are only supported for sockets 747 * that are using IPv6 on the wire. 748 */ 749 if (connp->conn_ipversion != IPV6_VERSION) { 750 *outlenp = 0; 751 return (EINVAL); 752 } 753 754 switch (name) { 755 case IPV6_RECVPKTINFO: 756 if (!checkonly) { 757 /* Force it to be sent up with the next msg */ 758 tcp->tcp_recvifindex = 0; 759 } 760 break; 761 case IPV6_RECVTCLASS: 762 if (!checkonly) { 763 /* Force it to be sent up with the next msg */ 764 tcp->tcp_recvtclass = 0xffffffffU; 765 } 766 break; 767 case IPV6_RECVHOPLIMIT: 768 if (!checkonly) { 769 /* Force it to be sent up with the next msg */ 770 tcp->tcp_recvhops = 0xffffffffU; 771 } 772 break; 773 case IPV6_PKTINFO: 774 /* This is an extra check for TCP */ 775 if (inlen == sizeof (struct in6_pktinfo)) { 776 struct in6_pktinfo *pkti; 777 778 pkti = (struct in6_pktinfo *)invalp; 779 /* 780 * RFC 3542 states that ipi6_addr must be 781 * the unspecified address when setting the 782 * IPV6_PKTINFO sticky socket option on a 783 * TCP socket. 784 */ 785 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) 786 return (EINVAL); 787 } 788 break; 789 case IPV6_SEC_OPT: 790 /* 791 * We should not allow policy setting after 792 * we start listening for connections. 793 */ 794 if (tcp->tcp_state == TCPS_LISTEN) { 795 return (EINVAL); 796 } 797 break; 798 } 799 break; 800 } 801 reterr = conn_opt_set(&coas, level, name, inlen, invalp, 802 checkonly, cr); 803 if (reterr != 0) { 804 *outlenp = 0; 805 return (reterr); 806 } 807 808 /* 809 * Common case of OK return with outval same as inval 810 */ 811 if (invalp != outvalp) { 812 /* don't trust bcopy for identical src/dst */ 813 (void) bcopy(invalp, outvalp, inlen); 814 } 815 *outlenp = inlen; 816 817 if (coas.coa_changed & COA_HEADER_CHANGED) { 818 /* If we are connected we rebuilt the headers */ 819 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 820 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 821 reterr = tcp_build_hdrs(tcp); 822 if (reterr != 0) 823 return (reterr); 824 } 825 } 826 if (coas.coa_changed & COA_ROUTE_CHANGED) { 827 in6_addr_t nexthop; 828 829 /* 830 * If we are connected we re-cache the information. 831 * We ignore errors to preserve BSD behavior. 832 * Note that we don't redo IPsec policy lookup here 833 * since the final destination (or source) didn't change. 834 */ 835 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa, 836 &connp->conn_faddr_v6, &nexthop); 837 838 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 839 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 840 (void) ip_attr_connect(connp, connp->conn_ixa, 841 &connp->conn_laddr_v6, &connp->conn_faddr_v6, 842 &nexthop, connp->conn_fport, NULL, NULL, 843 IPDF_VERIFY_DST); 844 } 845 } 846 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 847 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 848 } 849 if (coas.coa_changed & COA_WROFF_CHANGED) { 850 connp->conn_wroff = connp->conn_ht_iphc_allocated + 851 tcps->tcps_wroff_xtra; 852 (void) proto_set_tx_wroff(connp->conn_rq, connp, 853 connp->conn_wroff); 854 } 855 if (coas.coa_changed & COA_OOBINLINE_CHANGED) { 856 if (IPCL_IS_NONSTR(connp)) 857 proto_set_rx_oob_opt(connp, onoff); 858 } 859 return (0); 860 } 861