xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_opt_data.c (revision 95faac55ed9158a0f593df1059de9fffbe33c5b4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  * Copyright (c) 2016 by Delphix. All rights reserved.
26  */
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #define	_SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/socket.h>
33 #include <sys/xti_xtiopt.h>
34 #include <sys/xti_inet.h>
35 #include <sys/policy.h>
36 
37 #include <inet/cc.h>
38 #include <inet/common.h>
39 #include <netinet/ip6.h>
40 #include <inet/ip.h>
41 
42 #include <netinet/in.h>
43 #include <netinet/tcp.h>
44 #include <inet/optcom.h>
45 #include <inet/proto_set.h>
46 #include <inet/tcp_impl.h>
47 
48 static int	tcp_opt_default(queue_t *, int, int, uchar_t *);
49 
50 /*
51  * Table of all known options handled on a TCP protocol stack.
52  *
53  * Note: This table contains options processed by both TCP and IP levels
54  *       and is the superset of options that can be performed on a TCP over IP
55  *       stack.
56  */
57 opdes_t	tcp_opt_arr[] = {
58 
59 { SO_LINGER,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
60 	sizeof (struct linger), 0 },
61 
62 { SO_DEBUG,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63 { SO_KEEPALIVE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64 { SO_DONTROUTE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
65 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
66 	},
67 { SO_BROADCAST,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
68 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
70 { SO_TYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
71 { SO_SNDBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
72 { SO_RCVBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
73 { SO_SNDTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
74 	sizeof (struct timeval), 0 },
75 { SO_RCVTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
76 	sizeof (struct timeval), 0 },
77 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
78 	},
79 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
80 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
81 	0 },
82 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
83 	0 },
84 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
85 	0 },
86 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
87 	0 },
88 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
89 
90 { SO_DOMAIN,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
91 
92 { SO_PROTOTYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
93 
94 { TCP_NODELAY,	IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
95 	},
96 { TCP_MAXSEG,	IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
97 	536 },
98 
99 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
100 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
101 
102 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
103 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
104 
105 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
106 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
107 
108 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
109 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
110 
111 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
112 	0 },
113 
114 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
115 	sizeof (int), 0 },
116 
117 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
118 	},
119 
120 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
121 	sizeof (int), 0 },
122 
123 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
124 	sizeof (int), 0	},
125 
126 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
127 
128 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
129 
130 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
131 
132 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
133 	sizeof (int), 0	},
134 
135 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
136 
137 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
138 
139 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
140 
141 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
142 
143 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
144 
145 { TCP_CONGESTION, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
146 	OP_VARLEN, CC_ALGO_NAME_MAX, 0 },
147 
148 { IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
149 	(OP_VARLEN|OP_NODEFAULT),
150 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
151 { T_IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
152 	(OP_VARLEN|OP_NODEFAULT),
153 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
154 
155 { IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
156 { T_IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
157 { IP_TTL,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
158 	sizeof (int), -1 /* not initialized */ },
159 
160 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
161 	sizeof (ipsec_req_t), -1 /* not initialized */ },
162 
163 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
164 	sizeof (int),	0 /* no ifindex */ },
165 
166 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
167 	sizeof (int), 0 },
168 
169 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
170 	sizeof (int), -1 /* not initialized */ },
171 
172 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
173 	sizeof (int),	0 /* no ifindex */ },
174 
175 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
176 
177 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
178 	sizeof (in_addr_t),	-1 /* not initialized  */ },
179 
180 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
181 	sizeof (int), 0 },
182 
183 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
184 	(OP_NODEFAULT|OP_VARLEN),
185 	sizeof (struct in6_pktinfo), -1 /* not initialized */ },
186 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
187 	OP_NODEFAULT,
188 	sizeof (sin6_t), -1 /* not initialized */ },
189 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
190 	(OP_VARLEN|OP_NODEFAULT), 255*8,
191 	-1 /* not initialized */ },
192 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
193 	(OP_VARLEN|OP_NODEFAULT), 255*8,
194 	-1 /* not initialized */ },
195 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
196 	(OP_VARLEN|OP_NODEFAULT), 255*8,
197 	-1 /* not initialized */ },
198 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
199 	(OP_VARLEN|OP_NODEFAULT), 255*8,
200 	-1 /* not initialized */ },
201 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
202 	OP_NODEFAULT,
203 	sizeof (int), -1 /* not initialized */ },
204 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
205 	OP_NODEFAULT,
206 	sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
207 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
208 	sizeof (int), 0 },
209 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
210 	sizeof (int), 0 },
211 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
212 	sizeof (int), 0 },
213 
214 /* Enable receipt of ancillary data */
215 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
216 	sizeof (int), 0 },
217 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
218 	sizeof (int), 0 },
219 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 	sizeof (int), 0 },
221 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
222 	sizeof (int), 0 },
223 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
224 	sizeof (int), 0 },
225 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
226 	sizeof (int), 0 },
227 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
228 	sizeof (int), 0 },
229 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
230 	sizeof (int), 0 },
231 
232 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
233 	sizeof (ipsec_req_t), -1 /* not initialized */ },
234 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
235 	sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
236 };
237 
238 /*
239  * Table of all supported levels
240  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
241  * any supported options so we need this info separately.
242  *
243  * This is needed only for topmost tpi providers and is used only by
244  * XTI interfaces.
245  */
246 optlevel_t	tcp_valid_levels_arr[] = {
247 	XTI_GENERIC,
248 	SOL_SOCKET,
249 	IPPROTO_TCP,
250 	IPPROTO_IP,
251 	IPPROTO_IPV6
252 };
253 
254 
255 #define	TCP_OPT_ARR_CNT		A_CNT(tcp_opt_arr)
256 #define	TCP_VALID_LEVELS_CNT	A_CNT(tcp_valid_levels_arr)
257 
258 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
259 
260 /*
261  * Initialize option database object for TCP
262  *
263  * This object represents database of options to search passed to
264  * {sock,tpi}optcom_req() interface routine to take care of option
265  * management and associated methods.
266  */
267 
268 optdb_obj_t tcp_opt_obj = {
269 	tcp_opt_default,	/* TCP default value function pointer */
270 	tcp_tpi_opt_get,	/* TCP get function pointer */
271 	tcp_tpi_opt_set,	/* TCP set function pointer */
272 	TCP_OPT_ARR_CNT,	/* TCP option database count of entries */
273 	tcp_opt_arr,		/* TCP option database */
274 	TCP_VALID_LEVELS_CNT,	/* TCP valid level count of entries */
275 	tcp_valid_levels_arr	/* TCP valid level array */
276 };
277 
278 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
279 
280 /*
281  * Some TCP options can be "set" by requesting them in the option
282  * buffer. This is needed for XTI feature test though we do not
283  * allow it in general. We interpret that this mechanism is more
284  * applicable to OSI protocols and need not be allowed in general.
285  * This routine filters out options for which it is not allowed (most)
286  * and lets through those (few) for which it is. [ The XTI interface
287  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
288  * ever implemented will have to be allowed here ].
289  */
290 static boolean_t
291 tcp_allow_connopt_set(int level, int name)
292 {
293 
294 	switch (level) {
295 	case IPPROTO_TCP:
296 		switch (name) {
297 		case TCP_NODELAY:
298 			return (B_TRUE);
299 		default:
300 			return (B_FALSE);
301 		}
302 		/*NOTREACHED*/
303 	default:
304 		return (B_FALSE);
305 	}
306 	/*NOTREACHED*/
307 }
308 
309 /*
310  * This routine gets default values of certain options whose default
311  * values are maintained by protocol specific code
312  */
313 /* ARGSUSED */
314 static int
315 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
316 {
317 	int32_t	*i1 = (int32_t *)ptr;
318 	tcp_stack_t	*tcps = Q_TO_TCP(q)->tcp_tcps;
319 
320 	switch (level) {
321 	case IPPROTO_TCP:
322 		switch (name) {
323 		case TCP_NOTIFY_THRESHOLD:
324 			*i1 = tcps->tcps_ip_notify_interval;
325 			break;
326 		case TCP_ABORT_THRESHOLD:
327 			*i1 = tcps->tcps_ip_abort_interval;
328 			break;
329 		case TCP_CONN_NOTIFY_THRESHOLD:
330 			*i1 = tcps->tcps_ip_notify_cinterval;
331 			break;
332 		case TCP_CONN_ABORT_THRESHOLD:
333 			*i1 = tcps->tcps_ip_abort_cinterval;
334 			break;
335 		default:
336 			return (-1);
337 		}
338 		break;
339 	case IPPROTO_IP:
340 		switch (name) {
341 		case IP_TTL:
342 			*i1 = tcps->tcps_ipv4_ttl;
343 			break;
344 		default:
345 			return (-1);
346 		}
347 		break;
348 	case IPPROTO_IPV6:
349 		switch (name) {
350 		case IPV6_UNICAST_HOPS:
351 			*i1 = tcps->tcps_ipv6_hoplimit;
352 			break;
353 		default:
354 			return (-1);
355 		}
356 		break;
357 	default:
358 		return (-1);
359 	}
360 	return (sizeof (int));
361 }
362 
363 /*
364  * TCP routine to get the values of options.
365  */
366 int
367 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
368 {
369 	int		*i1 = (int *)ptr;
370 	tcp_t		*tcp = connp->conn_tcp;
371 	conn_opt_arg_t	coas;
372 	int		retval;
373 
374 	coas.coa_connp = connp;
375 	coas.coa_ixa = connp->conn_ixa;
376 	coas.coa_ipp = &connp->conn_xmit_ipp;
377 	coas.coa_ancillary = B_FALSE;
378 	coas.coa_changed = 0;
379 
380 	switch (level) {
381 	case SOL_SOCKET:
382 		switch (name) {
383 		case SO_SND_COPYAVOID:
384 			*i1 = tcp->tcp_snd_zcopy_on ?
385 			    SO_SND_COPYAVOID : 0;
386 			return (sizeof (int));
387 		case SO_ACCEPTCONN:
388 			*i1 = (tcp->tcp_state == TCPS_LISTEN);
389 			return (sizeof (int));
390 		}
391 		break;
392 	case IPPROTO_TCP:
393 		switch (name) {
394 		case TCP_NODELAY:
395 			*i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
396 			return (sizeof (int));
397 		case TCP_MAXSEG:
398 			*i1 = tcp->tcp_mss;
399 			return (sizeof (int));
400 		case TCP_NOTIFY_THRESHOLD:
401 			*i1 = (int)tcp->tcp_first_timer_threshold;
402 			return (sizeof (int));
403 		case TCP_ABORT_THRESHOLD:
404 			*i1 = tcp->tcp_second_timer_threshold;
405 			return (sizeof (int));
406 		case TCP_CONN_NOTIFY_THRESHOLD:
407 			*i1 = tcp->tcp_first_ctimer_threshold;
408 			return (sizeof (int));
409 		case TCP_CONN_ABORT_THRESHOLD:
410 			*i1 = tcp->tcp_second_ctimer_threshold;
411 			return (sizeof (int));
412 		case TCP_INIT_CWND:
413 			*i1 = tcp->tcp_init_cwnd;
414 			return (sizeof (int));
415 		case TCP_KEEPALIVE_THRESHOLD:
416 			*i1 = tcp->tcp_ka_interval;
417 			return (sizeof (int));
418 
419 		/*
420 		 * TCP_KEEPIDLE expects value in seconds, but
421 		 * tcp_ka_interval is in milliseconds.
422 		 */
423 		case TCP_KEEPIDLE:
424 			*i1 = tcp->tcp_ka_interval / 1000;
425 			return (sizeof (int));
426 		case TCP_KEEPCNT:
427 			*i1 = tcp->tcp_ka_cnt;
428 			return (sizeof (int));
429 
430 		/*
431 		 * TCP_KEEPINTVL expects value in seconds, but
432 		 * tcp_ka_rinterval is in milliseconds.
433 		 */
434 		case TCP_KEEPINTVL:
435 			*i1 = tcp->tcp_ka_rinterval / 1000;
436 			return (sizeof (int));
437 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
438 			*i1 = tcp->tcp_ka_abort_thres;
439 			return (sizeof (int));
440 		case TCP_CONGESTION: {
441 			size_t len = strlcpy((char *)ptr, CC_ALGO(tcp)->name,
442 			    CC_ALGO_NAME_MAX);
443 			if (len >= CC_ALGO_NAME_MAX)
444 				return (-1);
445 			return (len + 1);
446 		}
447 		case TCP_CORK:
448 			*i1 = tcp->tcp_cork;
449 			return (sizeof (int));
450 		case TCP_RTO_INITIAL:
451 			*i1 = tcp->tcp_rto_initial;
452 			return (sizeof (uint32_t));
453 		case TCP_RTO_MIN:
454 			*i1 = tcp->tcp_rto_min;
455 			return (sizeof (uint32_t));
456 		case TCP_RTO_MAX:
457 			*i1 = tcp->tcp_rto_max;
458 			return (sizeof (uint32_t));
459 		case TCP_LINGER2:
460 			*i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
461 			return (sizeof (int));
462 		}
463 		break;
464 	case IPPROTO_IP:
465 		if (connp->conn_family != AF_INET)
466 			return (-1);
467 		switch (name) {
468 		case IP_OPTIONS:
469 		case T_IP_OPTIONS:
470 			/* Caller ensures enough space */
471 			return (ip_opt_get_user(connp, ptr));
472 		default:
473 			break;
474 		}
475 		break;
476 
477 	case IPPROTO_IPV6:
478 		/*
479 		 * IPPROTO_IPV6 options are only supported for sockets
480 		 * that are using IPv6 on the wire.
481 		 */
482 		if (connp->conn_ipversion != IPV6_VERSION) {
483 			return (-1);
484 		}
485 		switch (name) {
486 		case IPV6_PATHMTU:
487 			if (tcp->tcp_state < TCPS_ESTABLISHED)
488 				return (-1);
489 			break;
490 		}
491 		break;
492 	}
493 	mutex_enter(&connp->conn_lock);
494 	retval = conn_opt_get(&coas, level, name, ptr);
495 	mutex_exit(&connp->conn_lock);
496 	return (retval);
497 }
498 
499 /*
500  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
501  * Parameters are assumed to be verified by the caller.
502  */
503 /* ARGSUSED */
504 int
505 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
506     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
507     void *thisdg_attrs, cred_t *cr)
508 {
509 	tcp_t	*tcp = connp->conn_tcp;
510 	int	*i1 = (int *)invalp;
511 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
512 	boolean_t checkonly;
513 	int	reterr;
514 	tcp_stack_t	*tcps = tcp->tcp_tcps;
515 	conn_opt_arg_t	coas;
516 	uint32_t	val = *((uint32_t *)invalp);
517 
518 	coas.coa_connp = connp;
519 	coas.coa_ixa = connp->conn_ixa;
520 	coas.coa_ipp = &connp->conn_xmit_ipp;
521 	coas.coa_ancillary = B_FALSE;
522 	coas.coa_changed = 0;
523 
524 	switch (optset_context) {
525 	case SETFN_OPTCOM_CHECKONLY:
526 		checkonly = B_TRUE;
527 		/*
528 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
529 		 * inlen != 0 implies value supplied and
530 		 * 	we have to "pretend" to set it.
531 		 * inlen == 0 implies that there is no
532 		 * 	value part in T_CHECK request and just validation
533 		 * done elsewhere should be enough, we just return here.
534 		 */
535 		if (inlen == 0) {
536 			*outlenp = 0;
537 			return (0);
538 		}
539 		break;
540 	case SETFN_OPTCOM_NEGOTIATE:
541 		checkonly = B_FALSE;
542 		break;
543 	case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
544 	case SETFN_CONN_NEGOTIATE:
545 		checkonly = B_FALSE;
546 		/*
547 		 * Negotiating local and "association-related" options
548 		 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
549 		 * primitives is allowed by XTI, but we choose
550 		 * to not implement this style negotiation for Internet
551 		 * protocols (We interpret it is a must for OSI world but
552 		 * optional for Internet protocols) for all options.
553 		 * [ Will do only for the few options that enable test
554 		 * suites that our XTI implementation of this feature
555 		 * works for transports that do allow it ]
556 		 */
557 		if (!tcp_allow_connopt_set(level, name)) {
558 			*outlenp = 0;
559 			return (EINVAL);
560 		}
561 		break;
562 	default:
563 		/*
564 		 * We should never get here
565 		 */
566 		*outlenp = 0;
567 		return (EINVAL);
568 	}
569 
570 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
571 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
572 
573 	/*
574 	 * For TCP, we should have no ancillary data sent down
575 	 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
576 	 * has to be zero.
577 	 */
578 	ASSERT(thisdg_attrs == NULL);
579 
580 	/*
581 	 * For fixed length options, no sanity check
582 	 * of passed in length is done. It is assumed *_optcom_req()
583 	 * routines do the right thing.
584 	 */
585 	switch (level) {
586 	case SOL_SOCKET:
587 		switch (name) {
588 		case SO_KEEPALIVE:
589 			if (checkonly) {
590 				/* check only case */
591 				break;
592 			}
593 
594 			if (!onoff) {
595 				if (connp->conn_keepalive) {
596 					if (tcp->tcp_ka_tid != 0) {
597 						(void) TCP_TIMER_CANCEL(tcp,
598 						    tcp->tcp_ka_tid);
599 						tcp->tcp_ka_tid = 0;
600 					}
601 					connp->conn_keepalive = 0;
602 				}
603 				break;
604 			}
605 			if (!connp->conn_keepalive) {
606 				/* Crank up the keepalive timer */
607 				tcp->tcp_ka_last_intrvl = 0;
608 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
609 				    tcp_keepalive_timer, tcp->tcp_ka_interval);
610 				connp->conn_keepalive = 1;
611 			}
612 			break;
613 		case SO_SNDBUF: {
614 			if (*i1 > tcps->tcps_max_buf) {
615 				*outlenp = 0;
616 				return (ENOBUFS);
617 			}
618 			if (checkonly)
619 				break;
620 
621 			connp->conn_sndbuf = *i1;
622 			if (tcps->tcps_snd_lowat_fraction != 0) {
623 				connp->conn_sndlowat = connp->conn_sndbuf /
624 				    tcps->tcps_snd_lowat_fraction;
625 			}
626 			(void) tcp_maxpsz_set(tcp, B_TRUE);
627 			/*
628 			 * If we are flow-controlled, recheck the condition.
629 			 * There are apps that increase SO_SNDBUF size when
630 			 * flow-controlled (EWOULDBLOCK), and expect the flow
631 			 * control condition to be lifted right away.
632 			 */
633 			mutex_enter(&tcp->tcp_non_sq_lock);
634 			if (tcp->tcp_flow_stopped &&
635 			    TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
636 				tcp_clrqfull(tcp);
637 			}
638 			mutex_exit(&tcp->tcp_non_sq_lock);
639 			*outlenp = inlen;
640 			return (0);
641 		}
642 		case SO_RCVBUF:
643 			if (*i1 > tcps->tcps_max_buf) {
644 				*outlenp = 0;
645 				return (ENOBUFS);
646 			}
647 			/* Silently ignore zero */
648 			if (!checkonly && *i1 != 0) {
649 				*i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
650 				(void) tcp_rwnd_set(tcp, *i1);
651 			}
652 			/*
653 			 * XXX should we return the rwnd here
654 			 * and tcp_opt_get ?
655 			 */
656 			*outlenp = inlen;
657 			return (0);
658 		case SO_SND_COPYAVOID:
659 			if (!checkonly) {
660 				if (tcp->tcp_loopback ||
661 				    (onoff != 1) || !tcp_zcopy_check(tcp)) {
662 					*outlenp = 0;
663 					return (EOPNOTSUPP);
664 				}
665 				tcp->tcp_snd_zcopy_aware = 1;
666 			}
667 			*outlenp = inlen;
668 			return (0);
669 		}
670 		break;
671 	case IPPROTO_TCP:
672 		switch (name) {
673 		case TCP_NODELAY:
674 			if (!checkonly)
675 				tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
676 			break;
677 		case TCP_NOTIFY_THRESHOLD:
678 			if (!checkonly)
679 				tcp->tcp_first_timer_threshold = *i1;
680 			break;
681 		case TCP_ABORT_THRESHOLD:
682 			if (!checkonly)
683 				tcp->tcp_second_timer_threshold = *i1;
684 			break;
685 		case TCP_CONN_NOTIFY_THRESHOLD:
686 			if (!checkonly)
687 				tcp->tcp_first_ctimer_threshold = *i1;
688 			break;
689 		case TCP_CONN_ABORT_THRESHOLD:
690 			if (!checkonly)
691 				tcp->tcp_second_ctimer_threshold = *i1;
692 			break;
693 		case TCP_RECVDSTADDR:
694 			if (tcp->tcp_state > TCPS_LISTEN) {
695 				*outlenp = 0;
696 				return (EOPNOTSUPP);
697 			}
698 			/* Setting done in conn_opt_set */
699 			break;
700 		case TCP_INIT_CWND:
701 			if (checkonly)
702 				break;
703 
704 			/*
705 			 * Only allow socket with network configuration
706 			 * privilege to set the initial cwnd to be larger
707 			 * than allowed by RFC 3390.
708 			 */
709 			if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
710 				if ((reterr = secpolicy_ip_config(cr, B_TRUE))
711 				    != 0) {
712 					*outlenp = 0;
713 					return (reterr);
714 				}
715 				if (val > tcp_max_init_cwnd) {
716 					*outlenp = 0;
717 					return (EINVAL);
718 				}
719 			}
720 
721 			tcp->tcp_init_cwnd = val;
722 
723 			/*
724 			 * If the socket is connected, AND no outbound data
725 			 * has been sent, reset the actual cwnd values.
726 			 */
727 			if (tcp->tcp_state == TCPS_ESTABLISHED &&
728 			    tcp->tcp_iss == tcp->tcp_snxt - 1) {
729 				tcp->tcp_cwnd =
730 				    MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
731 			}
732 			break;
733 
734 		/*
735 		 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
736 		 * is in milliseconds. TCP_KEEPIDLE is introduced for
737 		 * compatibility with other Unix flavors.
738 		 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
739 		 * converting the input to milliseconds.
740 		 */
741 		case TCP_KEEPIDLE:
742 			*i1 *= 1000;
743 			/* FALLTHRU */
744 
745 		case TCP_KEEPALIVE_THRESHOLD:
746 			if (checkonly)
747 				break;
748 
749 			if (*i1 < tcps->tcps_keepalive_interval_low ||
750 			    *i1 > tcps->tcps_keepalive_interval_high) {
751 				*outlenp = 0;
752 				return (EINVAL);
753 			}
754 			if (*i1 != tcp->tcp_ka_interval) {
755 				tcp->tcp_ka_interval = *i1;
756 				/*
757 				 * Check if we need to restart the
758 				 * keepalive timer.
759 				 */
760 				if (tcp->tcp_ka_tid != 0) {
761 					ASSERT(connp->conn_keepalive);
762 					(void) TCP_TIMER_CANCEL(tcp,
763 					    tcp->tcp_ka_tid);
764 					tcp->tcp_ka_last_intrvl = 0;
765 					tcp->tcp_ka_tid = TCP_TIMER(tcp,
766 					    tcp_keepalive_timer,
767 					    tcp->tcp_ka_interval);
768 				}
769 			}
770 			break;
771 
772 		/*
773 		 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
774 		 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
775 		 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
776 		 * tcp_ka_cnt.
777 		 */
778 		case TCP_KEEPCNT:
779 			if (checkonly)
780 				break;
781 
782 			if (*i1 == 0) {
783 				return (EINVAL);
784 			} else if (tcp->tcp_ka_rinterval == 0) {
785 				/*
786 				 * When TCP_KEEPCNT is specified without first
787 				 * specifying a TCP_KEEPINTVL, we infer an
788 				 * interval based on a tunable specific to our
789 				 * stack: the tcp_keepalive_abort_interval.
790 				 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
791 				 * the unlikely event that that has been set.)
792 				 * Given the abort interval's default value of
793 				 * 480 seconds, low TCP_KEEPCNT values can
794 				 * result in intervals that exceed the default
795 				 * maximum RTO of 60 seconds.  Rather than
796 				 * fail in these cases, we (implicitly) clamp
797 				 * the interval at the maximum RTO; if the
798 				 * TCP_KEEPCNT is shortly followed by a
799 				 * TCP_KEEPINTVL (as we expect), the abort
800 				 * threshold will be recalculated correctly --
801 				 * and if a TCP_KEEPINTVL is not forthcoming,
802 				 * keep-alive will at least operate reasonably
803 				 * given the underconfigured state.
804 				 */
805 				uint32_t interval;
806 
807 				interval = tcp->tcp_ka_abort_thres / *i1;
808 
809 				if (interval < tcp->tcp_rto_min)
810 					interval = tcp->tcp_rto_min;
811 
812 				if (interval > tcp->tcp_rto_max)
813 					interval = tcp->tcp_rto_max;
814 
815 				tcp->tcp_ka_rinterval = interval;
816 			} else {
817 				if ((*i1 * tcp->tcp_ka_rinterval) <
818 				    tcps->tcps_keepalive_abort_interval_low ||
819 				    (*i1 * tcp->tcp_ka_rinterval) >
820 				    tcps->tcps_keepalive_abort_interval_high)
821 					return (EINVAL);
822 				tcp->tcp_ka_abort_thres =
823 				    (*i1 * tcp->tcp_ka_rinterval);
824 			}
825 			tcp->tcp_ka_cnt = *i1;
826 			break;
827 		case TCP_KEEPINTVL:
828 			/*
829 			 * TCP_KEEPINTVL is specified in seconds, but
830 			 * tcp_ka_rinterval is in milliseconds.
831 			 */
832 
833 			if (checkonly)
834 				break;
835 
836 			if ((*i1 * 1000) < tcp->tcp_rto_min ||
837 			    (*i1 * 1000) > tcp->tcp_rto_max)
838 				return (EINVAL);
839 
840 			if (tcp->tcp_ka_cnt == 0) {
841 				tcp->tcp_ka_cnt =
842 				    tcp->tcp_ka_abort_thres / (*i1 * 1000);
843 			} else {
844 				if ((*i1 * tcp->tcp_ka_cnt * 1000) <
845 				    tcps->tcps_keepalive_abort_interval_low ||
846 				    (*i1 * tcp->tcp_ka_cnt * 1000) >
847 				    tcps->tcps_keepalive_abort_interval_high)
848 					return (EINVAL);
849 				tcp->tcp_ka_abort_thres =
850 				    (*i1 * tcp->tcp_ka_cnt * 1000);
851 			}
852 			tcp->tcp_ka_rinterval = *i1 * 1000;
853 			break;
854 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
855 			if (!checkonly) {
856 				if (*i1 <
857 				    tcps->tcps_keepalive_abort_interval_low ||
858 				    *i1 >
859 				    tcps->tcps_keepalive_abort_interval_high) {
860 					*outlenp = 0;
861 					return (EINVAL);
862 				}
863 				tcp->tcp_ka_abort_thres = *i1;
864 				tcp->tcp_ka_cnt = 0;
865 				tcp->tcp_ka_rinterval = 0;
866 			}
867 			break;
868 		case TCP_CONGESTION: {
869 			struct cc_algo *algo;
870 
871 			if (checkonly) {
872 				break;
873 			}
874 
875 			/*
876 			 * Make sure the string is NUL-terminated. Some
877 			 * consumers pass only the number of characters
878 			 * in the string, and don't include the NUL
879 			 * terminator, so we set it for them.
880 			 */
881 			if (inlen < CC_ALGO_NAME_MAX) {
882 				invalp[inlen] = '\0';
883 			}
884 			invalp[CC_ALGO_NAME_MAX - 1] = '\0';
885 
886 			if ((algo = cc_load_algo((char *)invalp)) == NULL) {
887 				return (ENOENT);
888 			}
889 
890 			if (CC_ALGO(tcp)->cb_destroy != NULL) {
891 				CC_ALGO(tcp)->cb_destroy(&tcp->tcp_ccv);
892 			}
893 
894 			CC_DATA(tcp) = NULL;
895 			CC_ALGO(tcp) = algo;
896 
897 			if (CC_ALGO(tcp)->cb_init != NULL) {
898 				VERIFY0(CC_ALGO(tcp)->cb_init(&tcp->tcp_ccv));
899 			}
900 
901 			break;
902 		}
903 		case TCP_CORK:
904 			if (!checkonly) {
905 				/*
906 				 * if tcp->tcp_cork was set and is now
907 				 * being unset, we have to make sure that
908 				 * the remaining data gets sent out. Also
909 				 * unset tcp->tcp_cork so that tcp_wput_data()
910 				 * can send data even if it is less than mss
911 				 */
912 				if (tcp->tcp_cork && onoff == 0 &&
913 				    tcp->tcp_unsent > 0) {
914 					tcp->tcp_cork = B_FALSE;
915 					tcp_wput_data(tcp, NULL, B_FALSE);
916 				}
917 				tcp->tcp_cork = onoff;
918 			}
919 			break;
920 		case TCP_RTO_INITIAL:
921 			if (checkonly || val == 0)
922 				break;
923 
924 			/*
925 			 * Sanity checks
926 			 *
927 			 * The initial RTO should be bounded by the minimum
928 			 * and maximum RTO.  And it should also be smaller
929 			 * than the connect attempt abort timeout.  Otherwise,
930 			 * the connection won't be aborted in a period
931 			 * reasonably close to that timeout.
932 			 */
933 			if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
934 			    val > tcp->tcp_second_ctimer_threshold ||
935 			    val < tcps->tcps_rexmit_interval_initial_low ||
936 			    val > tcps->tcps_rexmit_interval_initial_high) {
937 				*outlenp = 0;
938 				return (EINVAL);
939 			}
940 			tcp->tcp_rto_initial = val;
941 
942 			/*
943 			 * If TCP has not sent anything, need to re-calculate
944 			 * tcp_rto.  Otherwise, this option change does not
945 			 * really affect anything.
946 			 */
947 			if (tcp->tcp_state >= TCPS_SYN_SENT)
948 				break;
949 
950 			tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
951 			tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
952 			tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
953 			    tcps->tcps_conn_grace_period);
954 			break;
955 		case TCP_RTO_MIN:
956 			if (checkonly || val == 0)
957 				break;
958 
959 			if (val < tcps->tcps_rexmit_interval_min_low ||
960 			    val > tcps->tcps_rexmit_interval_min_high ||
961 			    val > tcp->tcp_rto_max) {
962 				*outlenp = 0;
963 				return (EINVAL);
964 			}
965 			tcp->tcp_rto_min = val;
966 			if (tcp->tcp_rto < val)
967 				tcp->tcp_rto = val;
968 			break;
969 		case TCP_RTO_MAX:
970 			if (checkonly || val == 0)
971 				break;
972 
973 			/*
974 			 * Sanity checks
975 			 *
976 			 * The maximum RTO should not be larger than the
977 			 * connection abort timeout.  Otherwise, the
978 			 * connection won't be aborted in a period reasonably
979 			 * close to that timeout.
980 			 */
981 			if (val < tcps->tcps_rexmit_interval_max_low ||
982 			    val > tcps->tcps_rexmit_interval_max_high ||
983 			    val < tcp->tcp_rto_min ||
984 			    val > tcp->tcp_second_timer_threshold) {
985 				*outlenp = 0;
986 				return (EINVAL);
987 			}
988 			tcp->tcp_rto_max = val;
989 			if (tcp->tcp_rto > val)
990 				tcp->tcp_rto = val;
991 			break;
992 		case TCP_LINGER2:
993 			if (checkonly || *i1 == 0)
994 				break;
995 
996 			/*
997 			 * Note that the option value's unit is second.  And
998 			 * the value should be bigger than the private
999 			 * parameter tcp_fin_wait_2_flush_interval's lower
1000 			 * bound and smaller than the current value of that
1001 			 * parameter.  It should be smaller than the current
1002 			 * value to avoid an app setting TCP_LINGER2 to a big
1003 			 * value, causing resource to be held up too long in
1004 			 * FIN-WAIT-2 state.
1005 			 */
1006 			if (*i1 < 0 ||
1007 			    tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1008 			    *i1 ||
1009 			    tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1010 			    *i1) {
1011 				*outlenp = 0;
1012 				return (EINVAL);
1013 			}
1014 			tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1015 			break;
1016 		default:
1017 			break;
1018 		}
1019 		break;
1020 	case IPPROTO_IP:
1021 		if (connp->conn_family != AF_INET) {
1022 			*outlenp = 0;
1023 			return (EINVAL);
1024 		}
1025 		switch (name) {
1026 		case IP_SEC_OPT:
1027 			/*
1028 			 * We should not allow policy setting after
1029 			 * we start listening for connections.
1030 			 */
1031 			if (tcp->tcp_state == TCPS_LISTEN) {
1032 				return (EINVAL);
1033 			}
1034 			break;
1035 		}
1036 		break;
1037 	case IPPROTO_IPV6:
1038 		/*
1039 		 * IPPROTO_IPV6 options are only supported for sockets
1040 		 * that are using IPv6 on the wire.
1041 		 */
1042 		if (connp->conn_ipversion != IPV6_VERSION) {
1043 			*outlenp = 0;
1044 			return (EINVAL);
1045 		}
1046 
1047 		switch (name) {
1048 		case IPV6_RECVPKTINFO:
1049 			if (!checkonly) {
1050 				/* Force it to be sent up with the next msg */
1051 				tcp->tcp_recvifindex = 0;
1052 			}
1053 			break;
1054 		case IPV6_RECVTCLASS:
1055 			if (!checkonly) {
1056 				/* Force it to be sent up with the next msg */
1057 				tcp->tcp_recvtclass = 0xffffffffU;
1058 			}
1059 			break;
1060 		case IPV6_RECVHOPLIMIT:
1061 			if (!checkonly) {
1062 				/* Force it to be sent up with the next msg */
1063 				tcp->tcp_recvhops = 0xffffffffU;
1064 			}
1065 			break;
1066 		case IPV6_PKTINFO:
1067 			/* This is an extra check for TCP */
1068 			if (inlen == sizeof (struct in6_pktinfo)) {
1069 				struct in6_pktinfo *pkti;
1070 
1071 				pkti = (struct in6_pktinfo *)invalp;
1072 				/*
1073 				 * RFC 3542 states that ipi6_addr must be
1074 				 * the unspecified address when setting the
1075 				 * IPV6_PKTINFO sticky socket option on a
1076 				 * TCP socket.
1077 				 */
1078 				if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1079 					return (EINVAL);
1080 			}
1081 			break;
1082 		case IPV6_SEC_OPT:
1083 			/*
1084 			 * We should not allow policy setting after
1085 			 * we start listening for connections.
1086 			 */
1087 			if (tcp->tcp_state == TCPS_LISTEN) {
1088 				return (EINVAL);
1089 			}
1090 			break;
1091 		}
1092 		break;
1093 	}
1094 	reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1095 	    checkonly, cr);
1096 	if (reterr != 0) {
1097 		*outlenp = 0;
1098 		return (reterr);
1099 	}
1100 
1101 	/*
1102 	 * Common case of OK return with outval same as inval
1103 	 */
1104 	if (invalp != outvalp) {
1105 		/* don't trust bcopy for identical src/dst */
1106 		(void) bcopy(invalp, outvalp, inlen);
1107 	}
1108 	*outlenp = inlen;
1109 
1110 	if (coas.coa_changed & COA_HEADER_CHANGED) {
1111 		/* If we are connected we rebuilt the headers */
1112 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1113 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1114 			reterr = tcp_build_hdrs(tcp);
1115 			if (reterr != 0)
1116 				return (reterr);
1117 		}
1118 	}
1119 	if (coas.coa_changed & COA_ROUTE_CHANGED) {
1120 		in6_addr_t nexthop;
1121 
1122 		/*
1123 		 * If we are connected we re-cache the information.
1124 		 * We ignore errors to preserve BSD behavior.
1125 		 * Note that we don't redo IPsec policy lookup here
1126 		 * since the final destination (or source) didn't change.
1127 		 */
1128 		ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1129 		    &connp->conn_faddr_v6, &nexthop);
1130 
1131 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1132 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1133 			(void) ip_attr_connect(connp, connp->conn_ixa,
1134 			    &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1135 			    &nexthop, connp->conn_fport, NULL, NULL,
1136 			    IPDF_VERIFY_DST);
1137 		}
1138 	}
1139 	if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1140 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1141 	}
1142 	if (coas.coa_changed & COA_WROFF_CHANGED) {
1143 		connp->conn_wroff = connp->conn_ht_iphc_allocated +
1144 		    tcps->tcps_wroff_xtra;
1145 		(void) proto_set_tx_wroff(connp->conn_rq, connp,
1146 		    connp->conn_wroff);
1147 	}
1148 	if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1149 		if (IPCL_IS_NONSTR(connp))
1150 			proto_set_rx_oob_opt(connp, onoff);
1151 	}
1152 	return (0);
1153 }
1154