1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #define _SUN_TPI_VERSION 2 29 #include <sys/tihdr.h> 30 #include <sys/socket.h> 31 #include <sys/xti_xtiopt.h> 32 #include <sys/xti_inet.h> 33 #include <sys/policy.h> 34 35 #include <inet/common.h> 36 #include <netinet/ip6.h> 37 #include <inet/ip.h> 38 39 #include <netinet/in.h> 40 #include <netinet/tcp.h> 41 #include <inet/optcom.h> 42 #include <inet/proto_set.h> 43 #include <inet/tcp_impl.h> 44 45 /* 46 * Table of all known options handled on a TCP protocol stack. 47 * 48 * Note: This table contains options processed by both TCP and IP levels 49 * and is the superset of options that can be performed on a TCP over IP 50 * stack. 51 */ 52 opdes_t tcp_opt_arr[] = { 53 54 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, 55 sizeof (struct linger), 0 }, 56 57 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 58 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 59 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 60 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 61 }, 62 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 63 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 64 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 65 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 }, 66 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 67 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 68 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, 69 sizeof (struct timeval), 0 }, 70 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, 71 sizeof (struct timeval), 0 }, 72 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 73 }, 74 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 75 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 76 0 }, 77 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 78 0 }, 79 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 80 0 }, 81 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int), 82 0 }, 83 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 84 85 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 }, 86 87 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 }, 88 89 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 90 }, 91 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t), 92 536 }, 93 94 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 95 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 96 97 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 98 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 99 100 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 101 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 102 103 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 104 OP_DEF_FN, sizeof (int), -1 /* not initialized */ }, 105 106 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 107 0 }, 108 109 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0, 110 sizeof (int), 0 }, 111 112 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 113 }, 114 115 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0, 116 sizeof (int), 0 }, 117 118 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, 119 sizeof (int), 0 }, 120 121 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, 122 sizeof (int), 0 }, 123 124 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 125 126 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 127 (OP_VARLEN|OP_NODEFAULT), 128 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ }, 129 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 130 (OP_VARLEN|OP_NODEFAULT), 131 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ }, 132 133 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 134 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 135 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN, 136 sizeof (int), -1 /* not initialized */ }, 137 138 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT, 139 sizeof (ipsec_req_t), -1 /* not initialized */ }, 140 141 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, 142 sizeof (int), 0 /* no ifindex */ }, 143 144 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0, 145 sizeof (int), 0 }, 146 147 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN, 148 sizeof (int), -1 /* not initialized */ }, 149 150 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 151 sizeof (int), 0 /* no ifindex */ }, 152 153 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, 154 155 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0, 156 sizeof (in_addr_t), -1 /* not initialized */ }, 157 158 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0, 159 sizeof (int), 0 }, 160 161 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 162 (OP_NODEFAULT|OP_VARLEN), 163 sizeof (struct in6_pktinfo), -1 /* not initialized */ }, 164 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 165 OP_NODEFAULT, 166 sizeof (sin6_t), -1 /* not initialized */ }, 167 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 168 (OP_VARLEN|OP_NODEFAULT), 255*8, 169 -1 /* not initialized */ }, 170 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 171 (OP_VARLEN|OP_NODEFAULT), 255*8, 172 -1 /* not initialized */ }, 173 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 174 (OP_VARLEN|OP_NODEFAULT), 255*8, 175 -1 /* not initialized */ }, 176 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 177 (OP_VARLEN|OP_NODEFAULT), 255*8, 178 -1 /* not initialized */ }, 179 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 180 OP_NODEFAULT, 181 sizeof (int), -1 /* not initialized */ }, 182 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 183 OP_NODEFAULT, 184 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ }, 185 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 186 sizeof (int), 0 }, 187 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 188 sizeof (int), 0 }, 189 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 190 sizeof (int), 0 }, 191 192 /* Enable receipt of ancillary data */ 193 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 194 sizeof (int), 0 }, 195 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 196 sizeof (int), 0 }, 197 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 198 sizeof (int), 0 }, 199 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 200 sizeof (int), 0 }, 201 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 202 sizeof (int), 0 }, 203 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 204 sizeof (int), 0 }, 205 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 206 sizeof (int), 0 }, 207 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 208 sizeof (int), 0 }, 209 210 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT, 211 sizeof (ipsec_req_t), -1 /* not initialized */ }, 212 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0, 213 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT }, 214 }; 215 216 /* 217 * Table of all supported levels 218 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 219 * any supported options so we need this info separately. 220 * 221 * This is needed only for topmost tpi providers and is used only by 222 * XTI interfaces. 223 */ 224 optlevel_t tcp_valid_levels_arr[] = { 225 XTI_GENERIC, 226 SOL_SOCKET, 227 IPPROTO_TCP, 228 IPPROTO_IP, 229 IPPROTO_IPV6 230 }; 231 232 233 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr) 234 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr) 235 236 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */ 237 238 /* 239 * Initialize option database object for TCP 240 * 241 * This object represents database of options to search passed to 242 * {sock,tpi}optcom_req() interface routine to take care of option 243 * management and associated methods. 244 */ 245 246 optdb_obj_t tcp_opt_obj = { 247 tcp_opt_default, /* TCP default value function pointer */ 248 tcp_tpi_opt_get, /* TCP get function pointer */ 249 tcp_tpi_opt_set, /* TCP set function pointer */ 250 TCP_OPT_ARR_CNT, /* TCP option database count of entries */ 251 tcp_opt_arr, /* TCP option database */ 252 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */ 253 tcp_valid_levels_arr /* TCP valid level array */ 254 }; 255 256 /* Maximum TCP initial cwin (start/restart). */ 257 #define TCP_MAX_INIT_CWND 16 258 259 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND; 260 261 /* 262 * Some TCP options can be "set" by requesting them in the option 263 * buffer. This is needed for XTI feature test though we do not 264 * allow it in general. We interpret that this mechanism is more 265 * applicable to OSI protocols and need not be allowed in general. 266 * This routine filters out options for which it is not allowed (most) 267 * and lets through those (few) for which it is. [ The XTI interface 268 * test suite specifics will imply that any XTI_GENERIC level XTI_* if 269 * ever implemented will have to be allowed here ]. 270 */ 271 static boolean_t 272 tcp_allow_connopt_set(int level, int name) 273 { 274 275 switch (level) { 276 case IPPROTO_TCP: 277 switch (name) { 278 case TCP_NODELAY: 279 return (B_TRUE); 280 default: 281 return (B_FALSE); 282 } 283 /*NOTREACHED*/ 284 default: 285 return (B_FALSE); 286 } 287 /*NOTREACHED*/ 288 } 289 290 /* 291 * This routine gets default values of certain options whose default 292 * values are maintained by protocol specific code 293 */ 294 /* ARGSUSED */ 295 int 296 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 297 { 298 int32_t *i1 = (int32_t *)ptr; 299 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps; 300 301 switch (level) { 302 case IPPROTO_TCP: 303 switch (name) { 304 case TCP_NOTIFY_THRESHOLD: 305 *i1 = tcps->tcps_ip_notify_interval; 306 break; 307 case TCP_ABORT_THRESHOLD: 308 *i1 = tcps->tcps_ip_abort_interval; 309 break; 310 case TCP_CONN_NOTIFY_THRESHOLD: 311 *i1 = tcps->tcps_ip_notify_cinterval; 312 break; 313 case TCP_CONN_ABORT_THRESHOLD: 314 *i1 = tcps->tcps_ip_abort_cinterval; 315 break; 316 default: 317 return (-1); 318 } 319 break; 320 case IPPROTO_IP: 321 switch (name) { 322 case IP_TTL: 323 *i1 = tcps->tcps_ipv4_ttl; 324 break; 325 default: 326 return (-1); 327 } 328 break; 329 case IPPROTO_IPV6: 330 switch (name) { 331 case IPV6_UNICAST_HOPS: 332 *i1 = tcps->tcps_ipv6_hoplimit; 333 break; 334 default: 335 return (-1); 336 } 337 break; 338 default: 339 return (-1); 340 } 341 return (sizeof (int)); 342 } 343 344 /* 345 * TCP routine to get the values of options. 346 */ 347 int 348 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 349 { 350 int *i1 = (int *)ptr; 351 tcp_t *tcp = connp->conn_tcp; 352 conn_opt_arg_t coas; 353 int retval; 354 355 coas.coa_connp = connp; 356 coas.coa_ixa = connp->conn_ixa; 357 coas.coa_ipp = &connp->conn_xmit_ipp; 358 coas.coa_ancillary = B_FALSE; 359 coas.coa_changed = 0; 360 361 switch (level) { 362 case SOL_SOCKET: 363 switch (name) { 364 case SO_SND_COPYAVOID: 365 *i1 = tcp->tcp_snd_zcopy_on ? 366 SO_SND_COPYAVOID : 0; 367 return (sizeof (int)); 368 case SO_ACCEPTCONN: 369 *i1 = (tcp->tcp_state == TCPS_LISTEN); 370 return (sizeof (int)); 371 } 372 break; 373 case IPPROTO_TCP: 374 switch (name) { 375 case TCP_NODELAY: 376 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0; 377 return (sizeof (int)); 378 case TCP_MAXSEG: 379 *i1 = tcp->tcp_mss; 380 return (sizeof (int)); 381 case TCP_NOTIFY_THRESHOLD: 382 *i1 = (int)tcp->tcp_first_timer_threshold; 383 return (sizeof (int)); 384 case TCP_ABORT_THRESHOLD: 385 *i1 = tcp->tcp_second_timer_threshold; 386 return (sizeof (int)); 387 case TCP_CONN_NOTIFY_THRESHOLD: 388 *i1 = tcp->tcp_first_ctimer_threshold; 389 return (sizeof (int)); 390 case TCP_CONN_ABORT_THRESHOLD: 391 *i1 = tcp->tcp_second_ctimer_threshold; 392 return (sizeof (int)); 393 case TCP_INIT_CWND: 394 *i1 = tcp->tcp_init_cwnd; 395 return (sizeof (int)); 396 case TCP_KEEPALIVE_THRESHOLD: 397 *i1 = tcp->tcp_ka_interval; 398 return (sizeof (int)); 399 case TCP_KEEPALIVE_ABORT_THRESHOLD: 400 *i1 = tcp->tcp_ka_abort_thres; 401 return (sizeof (int)); 402 case TCP_CORK: 403 *i1 = tcp->tcp_cork; 404 return (sizeof (int)); 405 } 406 break; 407 case IPPROTO_IP: 408 if (connp->conn_family != AF_INET) 409 return (-1); 410 switch (name) { 411 case IP_OPTIONS: 412 case T_IP_OPTIONS: 413 /* Caller ensures enough space */ 414 return (ip_opt_get_user(connp, ptr)); 415 default: 416 break; 417 } 418 break; 419 420 case IPPROTO_IPV6: 421 /* 422 * IPPROTO_IPV6 options are only supported for sockets 423 * that are using IPv6 on the wire. 424 */ 425 if (connp->conn_ipversion != IPV6_VERSION) { 426 return (-1); 427 } 428 switch (name) { 429 case IPV6_PATHMTU: 430 if (tcp->tcp_state < TCPS_ESTABLISHED) 431 return (-1); 432 break; 433 } 434 break; 435 } 436 mutex_enter(&connp->conn_lock); 437 retval = conn_opt_get(&coas, level, name, ptr); 438 mutex_exit(&connp->conn_lock); 439 return (retval); 440 } 441 442 /* 443 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements. 444 * Parameters are assumed to be verified by the caller. 445 */ 446 /* ARGSUSED */ 447 int 448 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 449 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 450 void *thisdg_attrs, cred_t *cr) 451 { 452 tcp_t *tcp = connp->conn_tcp; 453 int *i1 = (int *)invalp; 454 boolean_t onoff = (*i1 == 0) ? 0 : 1; 455 boolean_t checkonly; 456 int reterr; 457 tcp_stack_t *tcps = tcp->tcp_tcps; 458 conn_opt_arg_t coas; 459 460 coas.coa_connp = connp; 461 coas.coa_ixa = connp->conn_ixa; 462 coas.coa_ipp = &connp->conn_xmit_ipp; 463 coas.coa_ancillary = B_FALSE; 464 coas.coa_changed = 0; 465 466 switch (optset_context) { 467 case SETFN_OPTCOM_CHECKONLY: 468 checkonly = B_TRUE; 469 /* 470 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 471 * inlen != 0 implies value supplied and 472 * we have to "pretend" to set it. 473 * inlen == 0 implies that there is no 474 * value part in T_CHECK request and just validation 475 * done elsewhere should be enough, we just return here. 476 */ 477 if (inlen == 0) { 478 *outlenp = 0; 479 return (0); 480 } 481 break; 482 case SETFN_OPTCOM_NEGOTIATE: 483 checkonly = B_FALSE; 484 break; 485 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */ 486 case SETFN_CONN_NEGOTIATE: 487 checkonly = B_FALSE; 488 /* 489 * Negotiating local and "association-related" options 490 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ) 491 * primitives is allowed by XTI, but we choose 492 * to not implement this style negotiation for Internet 493 * protocols (We interpret it is a must for OSI world but 494 * optional for Internet protocols) for all options. 495 * [ Will do only for the few options that enable test 496 * suites that our XTI implementation of this feature 497 * works for transports that do allow it ] 498 */ 499 if (!tcp_allow_connopt_set(level, name)) { 500 *outlenp = 0; 501 return (EINVAL); 502 } 503 break; 504 default: 505 /* 506 * We should never get here 507 */ 508 *outlenp = 0; 509 return (EINVAL); 510 } 511 512 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 513 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 514 515 /* 516 * For TCP, we should have no ancillary data sent down 517 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs 518 * has to be zero. 519 */ 520 ASSERT(thisdg_attrs == NULL); 521 522 /* 523 * For fixed length options, no sanity check 524 * of passed in length is done. It is assumed *_optcom_req() 525 * routines do the right thing. 526 */ 527 switch (level) { 528 case SOL_SOCKET: 529 switch (name) { 530 case SO_KEEPALIVE: 531 if (checkonly) { 532 /* check only case */ 533 break; 534 } 535 536 if (!onoff) { 537 if (connp->conn_keepalive) { 538 if (tcp->tcp_ka_tid != 0) { 539 (void) TCP_TIMER_CANCEL(tcp, 540 tcp->tcp_ka_tid); 541 tcp->tcp_ka_tid = 0; 542 } 543 connp->conn_keepalive = 0; 544 } 545 break; 546 } 547 if (!connp->conn_keepalive) { 548 /* Crank up the keepalive timer */ 549 tcp->tcp_ka_last_intrvl = 0; 550 tcp->tcp_ka_tid = TCP_TIMER(tcp, 551 tcp_keepalive_timer, 552 MSEC_TO_TICK(tcp->tcp_ka_interval)); 553 connp->conn_keepalive = 1; 554 } 555 break; 556 case SO_SNDBUF: { 557 if (*i1 > tcps->tcps_max_buf) { 558 *outlenp = 0; 559 return (ENOBUFS); 560 } 561 if (checkonly) 562 break; 563 564 connp->conn_sndbuf = *i1; 565 if (tcps->tcps_snd_lowat_fraction != 0) { 566 connp->conn_sndlowat = connp->conn_sndbuf / 567 tcps->tcps_snd_lowat_fraction; 568 } 569 (void) tcp_maxpsz_set(tcp, B_TRUE); 570 /* 571 * If we are flow-controlled, recheck the condition. 572 * There are apps that increase SO_SNDBUF size when 573 * flow-controlled (EWOULDBLOCK), and expect the flow 574 * control condition to be lifted right away. 575 */ 576 mutex_enter(&tcp->tcp_non_sq_lock); 577 if (tcp->tcp_flow_stopped && 578 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) { 579 tcp_clrqfull(tcp); 580 } 581 mutex_exit(&tcp->tcp_non_sq_lock); 582 *outlenp = inlen; 583 return (0); 584 } 585 case SO_RCVBUF: 586 if (*i1 > tcps->tcps_max_buf) { 587 *outlenp = 0; 588 return (ENOBUFS); 589 } 590 /* Silently ignore zero */ 591 if (!checkonly && *i1 != 0) { 592 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss); 593 (void) tcp_rwnd_set(tcp, *i1); 594 } 595 /* 596 * XXX should we return the rwnd here 597 * and tcp_opt_get ? 598 */ 599 *outlenp = inlen; 600 return (0); 601 case SO_SND_COPYAVOID: 602 if (!checkonly) { 603 if (tcp->tcp_loopback || 604 (tcp->tcp_kssl_ctx != NULL) || 605 (onoff != 1) || !tcp_zcopy_check(tcp)) { 606 *outlenp = 0; 607 return (EOPNOTSUPP); 608 } 609 tcp->tcp_snd_zcopy_aware = 1; 610 } 611 *outlenp = inlen; 612 return (0); 613 } 614 break; 615 case IPPROTO_TCP: 616 switch (name) { 617 case TCP_NODELAY: 618 if (!checkonly) 619 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss; 620 break; 621 case TCP_NOTIFY_THRESHOLD: 622 if (!checkonly) 623 tcp->tcp_first_timer_threshold = *i1; 624 break; 625 case TCP_ABORT_THRESHOLD: 626 if (!checkonly) 627 tcp->tcp_second_timer_threshold = *i1; 628 break; 629 case TCP_CONN_NOTIFY_THRESHOLD: 630 if (!checkonly) 631 tcp->tcp_first_ctimer_threshold = *i1; 632 break; 633 case TCP_CONN_ABORT_THRESHOLD: 634 if (!checkonly) 635 tcp->tcp_second_ctimer_threshold = *i1; 636 break; 637 case TCP_RECVDSTADDR: 638 if (tcp->tcp_state > TCPS_LISTEN) { 639 *outlenp = 0; 640 return (EOPNOTSUPP); 641 } 642 /* Setting done in conn_opt_set */ 643 break; 644 case TCP_INIT_CWND: { 645 uint32_t init_cwnd = *((uint32_t *)invalp); 646 647 if (checkonly) 648 break; 649 650 /* 651 * Only allow socket with network configuration 652 * privilege to set the initial cwnd to be larger 653 * than allowed by RFC 3390. 654 */ 655 if (init_cwnd <= MIN(4, MAX(2, 4380 / tcp->tcp_mss))) { 656 tcp->tcp_init_cwnd = init_cwnd; 657 break; 658 } 659 if ((reterr = secpolicy_ip_config(cr, B_TRUE)) != 0) { 660 *outlenp = 0; 661 return (reterr); 662 } 663 if (init_cwnd > tcp_max_init_cwnd) { 664 *outlenp = 0; 665 return (EINVAL); 666 } 667 tcp->tcp_init_cwnd = init_cwnd; 668 break; 669 } 670 case TCP_KEEPALIVE_THRESHOLD: 671 if (checkonly) 672 break; 673 674 if (*i1 < tcps->tcps_keepalive_interval_low || 675 *i1 > tcps->tcps_keepalive_interval_high) { 676 *outlenp = 0; 677 return (EINVAL); 678 } 679 if (*i1 != tcp->tcp_ka_interval) { 680 tcp->tcp_ka_interval = *i1; 681 /* 682 * Check if we need to restart the 683 * keepalive timer. 684 */ 685 if (tcp->tcp_ka_tid != 0) { 686 ASSERT(connp->conn_keepalive); 687 (void) TCP_TIMER_CANCEL(tcp, 688 tcp->tcp_ka_tid); 689 tcp->tcp_ka_last_intrvl = 0; 690 tcp->tcp_ka_tid = TCP_TIMER(tcp, 691 tcp_keepalive_timer, 692 MSEC_TO_TICK(tcp->tcp_ka_interval)); 693 } 694 } 695 break; 696 case TCP_KEEPALIVE_ABORT_THRESHOLD: 697 if (!checkonly) { 698 if (*i1 < 699 tcps->tcps_keepalive_abort_interval_low || 700 *i1 > 701 tcps->tcps_keepalive_abort_interval_high) { 702 *outlenp = 0; 703 return (EINVAL); 704 } 705 tcp->tcp_ka_abort_thres = *i1; 706 } 707 break; 708 case TCP_CORK: 709 if (!checkonly) { 710 /* 711 * if tcp->tcp_cork was set and is now 712 * being unset, we have to make sure that 713 * the remaining data gets sent out. Also 714 * unset tcp->tcp_cork so that tcp_wput_data() 715 * can send data even if it is less than mss 716 */ 717 if (tcp->tcp_cork && onoff == 0 && 718 tcp->tcp_unsent > 0) { 719 tcp->tcp_cork = B_FALSE; 720 tcp_wput_data(tcp, NULL, B_FALSE); 721 } 722 tcp->tcp_cork = onoff; 723 } 724 break; 725 default: 726 break; 727 } 728 break; 729 case IPPROTO_IP: 730 if (connp->conn_family != AF_INET) { 731 *outlenp = 0; 732 return (EINVAL); 733 } 734 switch (name) { 735 case IP_SEC_OPT: 736 /* 737 * We should not allow policy setting after 738 * we start listening for connections. 739 */ 740 if (tcp->tcp_state == TCPS_LISTEN) { 741 return (EINVAL); 742 } 743 break; 744 } 745 break; 746 case IPPROTO_IPV6: 747 /* 748 * IPPROTO_IPV6 options are only supported for sockets 749 * that are using IPv6 on the wire. 750 */ 751 if (connp->conn_ipversion != IPV6_VERSION) { 752 *outlenp = 0; 753 return (EINVAL); 754 } 755 756 switch (name) { 757 case IPV6_RECVPKTINFO: 758 if (!checkonly) { 759 /* Force it to be sent up with the next msg */ 760 tcp->tcp_recvifindex = 0; 761 } 762 break; 763 case IPV6_RECVTCLASS: 764 if (!checkonly) { 765 /* Force it to be sent up with the next msg */ 766 tcp->tcp_recvtclass = 0xffffffffU; 767 } 768 break; 769 case IPV6_RECVHOPLIMIT: 770 if (!checkonly) { 771 /* Force it to be sent up with the next msg */ 772 tcp->tcp_recvhops = 0xffffffffU; 773 } 774 break; 775 case IPV6_PKTINFO: 776 /* This is an extra check for TCP */ 777 if (inlen == sizeof (struct in6_pktinfo)) { 778 struct in6_pktinfo *pkti; 779 780 pkti = (struct in6_pktinfo *)invalp; 781 /* 782 * RFC 3542 states that ipi6_addr must be 783 * the unspecified address when setting the 784 * IPV6_PKTINFO sticky socket option on a 785 * TCP socket. 786 */ 787 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) 788 return (EINVAL); 789 } 790 break; 791 case IPV6_SEC_OPT: 792 /* 793 * We should not allow policy setting after 794 * we start listening for connections. 795 */ 796 if (tcp->tcp_state == TCPS_LISTEN) { 797 return (EINVAL); 798 } 799 break; 800 } 801 break; 802 } 803 reterr = conn_opt_set(&coas, level, name, inlen, invalp, 804 checkonly, cr); 805 if (reterr != 0) { 806 *outlenp = 0; 807 return (reterr); 808 } 809 810 /* 811 * Common case of OK return with outval same as inval 812 */ 813 if (invalp != outvalp) { 814 /* don't trust bcopy for identical src/dst */ 815 (void) bcopy(invalp, outvalp, inlen); 816 } 817 *outlenp = inlen; 818 819 if (coas.coa_changed & COA_HEADER_CHANGED) { 820 /* If we are connected we rebuilt the headers */ 821 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 822 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 823 reterr = tcp_build_hdrs(tcp); 824 if (reterr != 0) 825 return (reterr); 826 } 827 } 828 if (coas.coa_changed & COA_ROUTE_CHANGED) { 829 in6_addr_t nexthop; 830 831 /* 832 * If we are connected we re-cache the information. 833 * We ignore errors to preserve BSD behavior. 834 * Note that we don't redo IPsec policy lookup here 835 * since the final destination (or source) didn't change. 836 */ 837 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa, 838 &connp->conn_faddr_v6, &nexthop); 839 840 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 841 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 842 (void) ip_attr_connect(connp, connp->conn_ixa, 843 &connp->conn_laddr_v6, &connp->conn_faddr_v6, 844 &nexthop, connp->conn_fport, NULL, NULL, 845 IPDF_VERIFY_DST); 846 } 847 } 848 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 849 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 850 } 851 if (coas.coa_changed & COA_WROFF_CHANGED) { 852 connp->conn_wroff = connp->conn_ht_iphc_allocated + 853 tcps->tcps_wroff_xtra; 854 (void) proto_set_tx_wroff(connp->conn_rq, connp, 855 connp->conn_wroff); 856 } 857 if (coas.coa_changed & COA_OOBINLINE_CHANGED) { 858 if (IPCL_IS_NONSTR(connp)) 859 proto_set_rx_oob_opt(connp, onoff); 860 } 861 return (0); 862 } 863