xref: /illumos-gate/usr/src/uts/common/inet/ip/conn_opt.c (revision 45ede40b2394db7967e59f19288fae9b62efd4aa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/xti_inet.h>
34 #include <sys/ucred.h>
35 #include <sys/zone.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/atomic.h>
41 #include <sys/policy.h>
42 
43 #include <sys/systm.h>
44 #include <sys/param.h>
45 #include <sys/kmem.h>
46 #include <sys/sdt.h>
47 #include <sys/socket.h>
48 #include <sys/ethernet.h>
49 #include <sys/mac.h>
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/if_arp.h>
53 #include <net/route.h>
54 #include <sys/sockio.h>
55 #include <netinet/in.h>
56 #include <net/if_dl.h>
57 
58 #include <inet/common.h>
59 #include <inet/mi.h>
60 #include <inet/mib2.h>
61 #include <inet/nd.h>
62 #include <inet/arp.h>
63 #include <inet/snmpcom.h>
64 #include <inet/kstatcom.h>
65 
66 #include <netinet/igmp_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet/icmp6.h>
69 #include <netinet/sctp.h>
70 
71 #include <inet/ip.h>
72 #include <inet/ip_impl.h>
73 #include <inet/ip6.h>
74 #include <inet/ip6_asp.h>
75 #include <inet/tcp.h>
76 #include <inet/ip_multi.h>
77 #include <inet/ip_if.h>
78 #include <inet/ip_ire.h>
79 #include <inet/ip_ftable.h>
80 #include <inet/ip_rts.h>
81 #include <inet/optcom.h>
82 #include <inet/ip_ndp.h>
83 #include <inet/ip_listutils.h>
84 #include <netinet/igmp.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/udp.h>
87 #include <inet/ipp_common.h>
88 
89 #include <net/pfkeyv2.h>
90 #include <inet/sadb.h>
91 #include <inet/ipsec_impl.h>
92 #include <inet/ipdrop.h>
93 #include <inet/ip_netinfo.h>
94 
95 #include <inet/ipclassifier.h>
96 #include <inet/sctp_ip.h>
97 #include <inet/sctp/sctp_impl.h>
98 #include <inet/udp_impl.h>
99 #include <sys/sunddi.h>
100 
101 #include <sys/tsol/label.h>
102 #include <sys/tsol/tnet.h>
103 
104 /*
105  * Return how much size is needed for the different ancillary data items
106  */
107 uint_t
108 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
109     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
110 {
111 	uint_t		ancil_size;
112 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
113 
114 	/*
115 	 * If IP_RECVDSTADDR is set we include the destination IP
116 	 * address as an option. With IP_RECVOPTS we include all
117 	 * the IP options.
118 	 */
119 	ancil_size = 0;
120 	if (recv_ancillary.crb_recvdstaddr &&
121 	    (ira->ira_flags & IRAF_IS_IPV4)) {
122 		ancil_size += sizeof (struct T_opthdr) +
123 		    sizeof (struct in_addr);
124 		IP_STAT(ipst, conn_in_recvdstaddr);
125 	}
126 
127 	/*
128 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
129 	 * are different
130 	 */
131 	if (recv_ancillary.crb_ip_recvpktinfo &&
132 	    connp->conn_family == AF_INET) {
133 		ancil_size += sizeof (struct T_opthdr) +
134 		    sizeof (struct in_pktinfo);
135 		IP_STAT(ipst, conn_in_recvpktinfo);
136 	}
137 
138 	if ((recv_ancillary.crb_recvopts) &&
139 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
140 		ancil_size += sizeof (struct T_opthdr) +
141 		    ipp->ipp_ipv4_options_len;
142 		IP_STAT(ipst, conn_in_recvopts);
143 	}
144 
145 	if (recv_ancillary.crb_recvslla) {
146 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
147 		ill_t *ill;
148 
149 		/* Make sure ira_l2src is setup if not already */
150 		if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
151 			ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
152 			    ipst);
153 			if (ill != NULL) {
154 				ip_setl2src(mp, ira, ill);
155 				ill_refrele(ill);
156 			}
157 		}
158 		ancil_size += sizeof (struct T_opthdr) +
159 		    sizeof (struct sockaddr_dl);
160 		IP_STAT(ipst, conn_in_recvslla);
161 	}
162 
163 	if (recv_ancillary.crb_recvif) {
164 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
165 		IP_STAT(ipst, conn_in_recvif);
166 	}
167 
168 	/*
169 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
170 	 * are different
171 	 */
172 	if (recv_ancillary.crb_ip_recvpktinfo &&
173 	    connp->conn_family == AF_INET6) {
174 		ancil_size += sizeof (struct T_opthdr) +
175 		    sizeof (struct in6_pktinfo);
176 		IP_STAT(ipst, conn_in_recvpktinfo);
177 	}
178 
179 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
180 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
181 		IP_STAT(ipst, conn_in_recvhoplimit);
182 	}
183 
184 	if (recv_ancillary.crb_ipv6_recvtclass) {
185 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
186 		IP_STAT(ipst, conn_in_recvtclass);
187 	}
188 
189 	if (recv_ancillary.crb_ipv6_recvhopopts &&
190 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
191 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
192 		IP_STAT(ipst, conn_in_recvhopopts);
193 	}
194 	/*
195 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
196 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
197 	 * options that appear before a routing header.
198 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
199 	 */
200 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
201 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
202 		    (recv_ancillary.crb_ipv6_recvdstopts &&
203 		    recv_ancillary.crb_ipv6_recvrthdr)) {
204 			ancil_size += sizeof (struct T_opthdr) +
205 			    ipp->ipp_rthdrdstoptslen;
206 			IP_STAT(ipst, conn_in_recvrthdrdstopts);
207 		}
208 	}
209 	if ((recv_ancillary.crb_ipv6_recvrthdr) &&
210 	    (ipp->ipp_fields & IPPF_RTHDR)) {
211 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
212 		IP_STAT(ipst, conn_in_recvrthdr);
213 	}
214 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
215 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
216 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
217 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
218 		IP_STAT(ipst, conn_in_recvdstopts);
219 	}
220 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
221 		ancil_size += sizeof (struct T_opthdr) +
222 		    ucredminsize(ira->ira_cred);
223 		IP_STAT(ipst, conn_in_recvucred);
224 	}
225 
226 	/*
227 	 * If SO_TIMESTAMP is set allocate the appropriate sized
228 	 * buffer. Since gethrestime() expects a pointer aligned
229 	 * argument, we allocate space necessary for extra
230 	 * alignment (even though it might not be used).
231 	 */
232 	if (recv_ancillary.crb_timestamp) {
233 		ancil_size += sizeof (struct T_opthdr) +
234 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
235 		IP_STAT(ipst, conn_in_timestamp);
236 	}
237 
238 	/*
239 	 * If IP_RECVTOS is set allocate the appropriately sized buffer
240 	 */
241 	if (recv_ancillary.crb_recvtos &&
242 	    (ira->ira_flags & IRAF_IS_IPV4)) {
243 		ancil_size += sizeof (struct T_opthdr) +
244 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
245 		IP_STAT(ipst, conn_in_recvtos);
246 	}
247 
248 	/*
249 	 * If IP_RECVTTL is set allocate the appropriate sized buffer
250 	 */
251 	if (recv_ancillary.crb_recvttl &&
252 	    (ira->ira_flags & IRAF_IS_IPV4)) {
253 		ancil_size += sizeof (struct T_opthdr) +
254 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
255 		IP_STAT(ipst, conn_in_recvttl);
256 	}
257 
258 	return (ancil_size);
259 }
260 
261 /*
262  * Lay down the ancillary data items at "ancil_buf".
263  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
264  * large buffer - ancil_size.
265  */
266 void
267 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
268     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
269 {
270 	/*
271 	 * Copy in destination address before options to avoid
272 	 * any padding issues.
273 	 */
274 	if (recv_ancillary.crb_recvdstaddr &&
275 	    (ira->ira_flags & IRAF_IS_IPV4)) {
276 		struct T_opthdr *toh;
277 		ipaddr_t *dstptr;
278 
279 		toh = (struct T_opthdr *)ancil_buf;
280 		toh->level = IPPROTO_IP;
281 		toh->name = IP_RECVDSTADDR;
282 		toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
283 		toh->status = 0;
284 		ancil_buf += sizeof (struct T_opthdr);
285 		dstptr = (ipaddr_t *)ancil_buf;
286 		*dstptr = ipp->ipp_addr_v4;
287 		ancil_buf += sizeof (ipaddr_t);
288 		ancil_size -= toh->len;
289 	}
290 
291 	/*
292 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
293 	 * are different
294 	 */
295 	if (recv_ancillary.crb_ip_recvpktinfo &&
296 	    connp->conn_family == AF_INET) {
297 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
298 		struct T_opthdr *toh;
299 		struct in_pktinfo *pktinfop;
300 		ill_t *ill;
301 		ipif_t *ipif;
302 
303 		toh = (struct T_opthdr *)ancil_buf;
304 		toh->level = IPPROTO_IP;
305 		toh->name = IP_PKTINFO;
306 		toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
307 		toh->status = 0;
308 		ancil_buf += sizeof (struct T_opthdr);
309 		pktinfop = (struct in_pktinfo *)ancil_buf;
310 
311 		pktinfop->ipi_ifindex = ira->ira_ruifindex;
312 		pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
313 
314 		/* Find a good address to report */
315 		ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
316 		if (ill != NULL) {
317 			ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
318 			if (ipif != NULL) {
319 				pktinfop->ipi_spec_dst.s_addr =
320 				    ipif->ipif_lcl_addr;
321 				ipif_refrele(ipif);
322 			}
323 			ill_refrele(ill);
324 		}
325 		pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
326 		ancil_buf += sizeof (struct in_pktinfo);
327 		ancil_size -= toh->len;
328 	}
329 
330 	if ((recv_ancillary.crb_recvopts) &&
331 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
332 		struct T_opthdr *toh;
333 
334 		toh = (struct T_opthdr *)ancil_buf;
335 		toh->level = IPPROTO_IP;
336 		toh->name = IP_RECVOPTS;
337 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
338 		toh->status = 0;
339 		ancil_buf += sizeof (struct T_opthdr);
340 		bcopy(ipp->ipp_ipv4_options, ancil_buf,
341 		    ipp->ipp_ipv4_options_len);
342 		ancil_buf += ipp->ipp_ipv4_options_len;
343 		ancil_size -= toh->len;
344 	}
345 
346 	if (recv_ancillary.crb_recvslla) {
347 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
348 		struct T_opthdr *toh;
349 		struct sockaddr_dl *dstptr;
350 		ill_t *ill;
351 		int alen = 0;
352 
353 		ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
354 		if (ill != NULL)
355 			alen = ill->ill_phys_addr_length;
356 
357 		/*
358 		 * For loopback multicast and broadcast the packet arrives
359 		 * with ira_ruifdex being the physical interface, but
360 		 * ira_l2src is all zero since ip_postfrag_loopback doesn't
361 		 * know our l2src. We don't report the address in that case.
362 		 */
363 		if (ira->ira_flags & IRAF_LOOPBACK)
364 			alen = 0;
365 
366 		toh = (struct T_opthdr *)ancil_buf;
367 		toh->level = IPPROTO_IP;
368 		toh->name = IP_RECVSLLA;
369 		toh->len = sizeof (struct T_opthdr) +
370 		    sizeof (struct sockaddr_dl);
371 		toh->status = 0;
372 		ancil_buf += sizeof (struct T_opthdr);
373 		dstptr = (struct sockaddr_dl *)ancil_buf;
374 		dstptr->sdl_family = AF_LINK;
375 		dstptr->sdl_index = ira->ira_ruifindex;
376 		if (ill != NULL)
377 			dstptr->sdl_type = ill->ill_type;
378 		else
379 			dstptr->sdl_type = 0;
380 		dstptr->sdl_nlen = 0;
381 		dstptr->sdl_alen = alen;
382 		dstptr->sdl_slen = 0;
383 		bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
384 		ancil_buf += sizeof (struct sockaddr_dl);
385 		ancil_size -= toh->len;
386 		if (ill != NULL)
387 			ill_refrele(ill);
388 	}
389 
390 	if (recv_ancillary.crb_recvif) {
391 		struct T_opthdr *toh;
392 		uint_t		*dstptr;
393 
394 		toh = (struct T_opthdr *)ancil_buf;
395 		toh->level = IPPROTO_IP;
396 		toh->name = IP_RECVIF;
397 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
398 		toh->status = 0;
399 		ancil_buf += sizeof (struct T_opthdr);
400 		dstptr = (uint_t *)ancil_buf;
401 		*dstptr = ira->ira_ruifindex;
402 		ancil_buf += sizeof (uint_t);
403 		ancil_size -= toh->len;
404 	}
405 
406 	/*
407 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
408 	 * are different
409 	 */
410 	if (recv_ancillary.crb_ip_recvpktinfo &&
411 	    connp->conn_family == AF_INET6) {
412 		struct T_opthdr *toh;
413 		struct in6_pktinfo *pkti;
414 
415 		toh = (struct T_opthdr *)ancil_buf;
416 		toh->level = IPPROTO_IPV6;
417 		toh->name = IPV6_PKTINFO;
418 		toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
419 		toh->status = 0;
420 		ancil_buf += sizeof (struct T_opthdr);
421 		pkti = (struct in6_pktinfo *)ancil_buf;
422 		if (ira->ira_flags & IRAF_IS_IPV4) {
423 			IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
424 			    &pkti->ipi6_addr);
425 		} else {
426 			pkti->ipi6_addr = ipp->ipp_addr;
427 		}
428 		pkti->ipi6_ifindex = ira->ira_ruifindex;
429 
430 		ancil_buf += sizeof (*pkti);
431 		ancil_size -= toh->len;
432 	}
433 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
434 		struct T_opthdr *toh;
435 
436 		toh = (struct T_opthdr *)ancil_buf;
437 		toh->level = IPPROTO_IPV6;
438 		toh->name = IPV6_HOPLIMIT;
439 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
440 		toh->status = 0;
441 		ancil_buf += sizeof (struct T_opthdr);
442 		*(uint_t *)ancil_buf = ipp->ipp_hoplimit;
443 		ancil_buf += sizeof (uint_t);
444 		ancil_size -= toh->len;
445 	}
446 	if (recv_ancillary.crb_ipv6_recvtclass) {
447 		struct T_opthdr *toh;
448 
449 		toh = (struct T_opthdr *)ancil_buf;
450 		toh->level = IPPROTO_IPV6;
451 		toh->name = IPV6_TCLASS;
452 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
453 		toh->status = 0;
454 		ancil_buf += sizeof (struct T_opthdr);
455 
456 		if (ira->ira_flags & IRAF_IS_IPV4)
457 			*(uint_t *)ancil_buf = ipp->ipp_type_of_service;
458 		else
459 			*(uint_t *)ancil_buf = ipp->ipp_tclass;
460 		ancil_buf += sizeof (uint_t);
461 		ancil_size -= toh->len;
462 	}
463 	if (recv_ancillary.crb_ipv6_recvhopopts &&
464 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
465 		struct T_opthdr *toh;
466 
467 		toh = (struct T_opthdr *)ancil_buf;
468 		toh->level = IPPROTO_IPV6;
469 		toh->name = IPV6_HOPOPTS;
470 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
471 		toh->status = 0;
472 		ancil_buf += sizeof (struct T_opthdr);
473 		bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
474 		ancil_buf += ipp->ipp_hopoptslen;
475 		ancil_size -= toh->len;
476 	}
477 	/*
478 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
479 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
480 	 * options that appear before a routing header.
481 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
482 	 */
483 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
484 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
485 		    (recv_ancillary.crb_ipv6_recvdstopts &&
486 		    recv_ancillary.crb_ipv6_recvrthdr)) {
487 			struct T_opthdr *toh;
488 
489 			toh = (struct T_opthdr *)ancil_buf;
490 			toh->level = IPPROTO_IPV6;
491 			toh->name = IPV6_DSTOPTS;
492 			toh->len = sizeof (struct T_opthdr) +
493 			    ipp->ipp_rthdrdstoptslen;
494 			toh->status = 0;
495 			ancil_buf += sizeof (struct T_opthdr);
496 			bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
497 			    ipp->ipp_rthdrdstoptslen);
498 			ancil_buf += ipp->ipp_rthdrdstoptslen;
499 			ancil_size -= toh->len;
500 		}
501 	}
502 	if (recv_ancillary.crb_ipv6_recvrthdr &&
503 	    (ipp->ipp_fields & IPPF_RTHDR)) {
504 		struct T_opthdr *toh;
505 
506 		toh = (struct T_opthdr *)ancil_buf;
507 		toh->level = IPPROTO_IPV6;
508 		toh->name = IPV6_RTHDR;
509 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
510 		toh->status = 0;
511 		ancil_buf += sizeof (struct T_opthdr);
512 		bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
513 		ancil_buf += ipp->ipp_rthdrlen;
514 		ancil_size -= toh->len;
515 	}
516 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
517 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
518 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
519 		struct T_opthdr *toh;
520 
521 		toh = (struct T_opthdr *)ancil_buf;
522 		toh->level = IPPROTO_IPV6;
523 		toh->name = IPV6_DSTOPTS;
524 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
525 		toh->status = 0;
526 		ancil_buf += sizeof (struct T_opthdr);
527 		bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
528 		ancil_buf += ipp->ipp_dstoptslen;
529 		ancil_size -= toh->len;
530 	}
531 
532 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
533 		struct T_opthdr *toh;
534 		cred_t		*rcr = connp->conn_cred;
535 
536 		toh = (struct T_opthdr *)ancil_buf;
537 		toh->level = SOL_SOCKET;
538 		toh->name = SCM_UCRED;
539 		toh->len = sizeof (struct T_opthdr) +
540 		    ucredminsize(ira->ira_cred);
541 		toh->status = 0;
542 		(void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
543 		ancil_buf += toh->len;
544 		ancil_size -= toh->len;
545 	}
546 	if (recv_ancillary.crb_timestamp) {
547 		struct	T_opthdr *toh;
548 
549 		toh = (struct T_opthdr *)ancil_buf;
550 		toh->level = SOL_SOCKET;
551 		toh->name = SCM_TIMESTAMP;
552 		toh->len = sizeof (struct T_opthdr) +
553 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
554 		toh->status = 0;
555 		ancil_buf += sizeof (struct T_opthdr);
556 		/* Align for gethrestime() */
557 		ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
558 		    sizeof (intptr_t));
559 		gethrestime((timestruc_t *)ancil_buf);
560 		ancil_buf = (uchar_t *)toh + toh->len;
561 		ancil_size -= toh->len;
562 	}
563 
564 	if (recv_ancillary.crb_recvtos &&
565 	    (ira->ira_flags & IRAF_IS_IPV4)) {
566 		struct	T_opthdr *toh;
567 		uint8_t	*dstptr;
568 
569 		toh = (struct T_opthdr *)ancil_buf;
570 		toh->level = IPPROTO_IP;
571 		toh->name = IP_RECVTOS;
572 		toh->len = sizeof (struct T_opthdr) +
573 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
574 		toh->status = 0;
575 		ancil_buf += sizeof (struct T_opthdr);
576 		dstptr = (uint8_t *)ancil_buf;
577 		*dstptr = ipp->ipp_type_of_service;
578 		ancil_buf = (uchar_t *)toh + toh->len;
579 		ancil_size -= toh->len;
580 		ASSERT(__TPI_TOPT_ISALIGNED(toh));
581 	}
582 
583 	if (recv_ancillary.crb_recvttl &&
584 	    (ira->ira_flags & IRAF_IS_IPV4)) {
585 		struct	T_opthdr *toh;
586 		uint8_t	*dstptr;
587 
588 		toh = (struct T_opthdr *)ancil_buf;
589 		toh->level = IPPROTO_IP;
590 		toh->name = IP_RECVTTL;
591 		toh->len = sizeof (struct T_opthdr) +
592 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
593 		toh->status = 0;
594 		ancil_buf += sizeof (struct T_opthdr);
595 		dstptr = (uint8_t *)ancil_buf;
596 		*dstptr = ipp->ipp_hoplimit;
597 		ancil_buf = (uchar_t *)toh + toh->len;
598 		ancil_size -= toh->len;
599 		ASSERT(__TPI_TOPT_ISALIGNED(toh));
600 	}
601 
602 	/* Consumed all of allocated space */
603 	ASSERT(ancil_size == 0);
604 
605 }
606 
607 /*
608  * This routine retrieves the current status of socket options.
609  * It returns the size of the option retrieved, or -1.
610  */
611 int
612 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
613     uchar_t *ptr)
614 {
615 	int		*i1 = (int *)ptr;
616 	conn_t		*connp = coa->coa_connp;
617 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
618 	ip_pkt_t	*ipp = coa->coa_ipp;
619 	ip_stack_t	*ipst = ixa->ixa_ipst;
620 	uint_t		len;
621 
622 	ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
623 
624 	switch (level) {
625 	case SOL_SOCKET:
626 		switch (name) {
627 		case SO_DEBUG:
628 			*i1 = connp->conn_debug ? SO_DEBUG : 0;
629 			break;	/* goto sizeof (int) option return */
630 		case SO_KEEPALIVE:
631 			*i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
632 			break;
633 		case SO_LINGER:	{
634 			struct linger *lgr = (struct linger *)ptr;
635 
636 			lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
637 			lgr->l_linger = connp->conn_lingertime;
638 			}
639 			return (sizeof (struct linger));
640 
641 		case SO_OOBINLINE:
642 			*i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
643 			break;
644 		case SO_REUSEADDR:
645 			*i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
646 			break;	/* goto sizeof (int) option return */
647 		case SO_TYPE:
648 			*i1 = connp->conn_so_type;
649 			break;	/* goto sizeof (int) option return */
650 		case SO_DONTROUTE:
651 			*i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
652 			    SO_DONTROUTE : 0;
653 			break;	/* goto sizeof (int) option return */
654 		case SO_USELOOPBACK:
655 			*i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
656 			break;	/* goto sizeof (int) option return */
657 		case SO_BROADCAST:
658 			*i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
659 			break;	/* goto sizeof (int) option return */
660 
661 		case SO_SNDBUF:
662 			*i1 = connp->conn_sndbuf;
663 			break;	/* goto sizeof (int) option return */
664 		case SO_RCVBUF:
665 			*i1 = connp->conn_rcvbuf;
666 			break;	/* goto sizeof (int) option return */
667 		case SO_RCVTIMEO:
668 		case SO_SNDTIMEO:
669 			/*
670 			 * Pass these two options in order for third part
671 			 * protocol usage. Here just return directly.
672 			 */
673 			*i1 = 0;
674 			break;
675 		case SO_DGRAM_ERRIND:
676 			*i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
677 			break;	/* goto sizeof (int) option return */
678 		case SO_RECVUCRED:
679 			*i1 = connp->conn_recv_ancillary.crb_recvucred;
680 			break;	/* goto sizeof (int) option return */
681 		case SO_TIMESTAMP:
682 			*i1 = connp->conn_recv_ancillary.crb_timestamp;
683 			break;	/* goto sizeof (int) option return */
684 		case SO_VRRP:
685 			*i1 = connp->conn_isvrrp;
686 			break;	/* goto sizeof (int) option return */
687 		case SO_ANON_MLP:
688 			*i1 = connp->conn_anon_mlp;
689 			break;	/* goto sizeof (int) option return */
690 		case SO_MAC_EXEMPT:
691 			*i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
692 			break;	/* goto sizeof (int) option return */
693 		case SO_MAC_IMPLICIT:
694 			*i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
695 			break;	/* goto sizeof (int) option return */
696 		case SO_ALLZONES:
697 			*i1 = connp->conn_allzones;
698 			break;	/* goto sizeof (int) option return */
699 		case SO_EXCLBIND:
700 			*i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
701 			break;
702 		case SO_PROTOTYPE:
703 			*i1 = connp->conn_proto;
704 			break;
705 
706 		case SO_DOMAIN:
707 			*i1 = connp->conn_family;
708 			break;
709 		default:
710 			return (-1);
711 		}
712 		break;
713 	case IPPROTO_IP:
714 		if (connp->conn_family != AF_INET)
715 			return (-1);
716 		switch (name) {
717 		case IP_OPTIONS:
718 		case T_IP_OPTIONS:
719 			if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
720 				return (0);
721 
722 			len = ipp->ipp_ipv4_options_len;
723 			if (len > 0) {
724 				bcopy(ipp->ipp_ipv4_options, ptr, len);
725 			}
726 			return (len);
727 
728 		case IP_PKTINFO: {
729 			/*
730 			 * This also handles IP_RECVPKTINFO.
731 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
732 			 * Differentiation is based on the size of the
733 			 * argument passed in.
734 			 */
735 			struct in_pktinfo *pktinfo;
736 
737 #ifdef notdef
738 			/* optcom doesn't provide a length with "get" */
739 			if (inlen == sizeof (int)) {
740 				/* This is IP_RECVPKTINFO option. */
741 				*i1 = connp->conn_recv_ancillary.
742 				    crb_ip_recvpktinfo;
743 				return (sizeof (int));
744 			}
745 #endif
746 			/* XXX assumes that caller has room for max size! */
747 
748 			pktinfo = (struct in_pktinfo *)ptr;
749 			pktinfo->ipi_ifindex = ixa->ixa_ifindex;
750 			if (ipp->ipp_fields & IPPF_ADDR)
751 				pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
752 			else
753 				pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
754 			return (sizeof (struct in_pktinfo));
755 		}
756 		case IP_DONTFRAG:
757 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
758 			return (sizeof (int));
759 		case IP_TOS:
760 		case T_IP_TOS:
761 			*i1 = (int)ipp->ipp_type_of_service;
762 			break;	/* goto sizeof (int) option return */
763 		case IP_TTL:
764 			*i1 = (int)ipp->ipp_unicast_hops;
765 			break;	/* goto sizeof (int) option return */
766 		case IP_DHCPINIT_IF:
767 			return (-1);
768 		case IP_NEXTHOP:
769 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
770 				*(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
771 				return (sizeof (ipaddr_t));
772 			} else {
773 				return (0);
774 			}
775 
776 		case IP_MULTICAST_IF:
777 			/* 0 address if not set */
778 			*(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
779 			return (sizeof (ipaddr_t));
780 		case IP_MULTICAST_TTL:
781 			*(uchar_t *)ptr = ixa->ixa_multicast_ttl;
782 			return (sizeof (uchar_t));
783 		case IP_MULTICAST_LOOP:
784 			*ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
785 			return (sizeof (uint8_t));
786 		case IP_RECVOPTS:
787 			*i1 = connp->conn_recv_ancillary.crb_recvopts;
788 			break;	/* goto sizeof (int) option return */
789 		case IP_RECVDSTADDR:
790 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
791 			break;	/* goto sizeof (int) option return */
792 		case IP_RECVIF:
793 			*i1 = connp->conn_recv_ancillary.crb_recvif;
794 			break;	/* goto sizeof (int) option return */
795 		case IP_RECVSLLA:
796 			*i1 = connp->conn_recv_ancillary.crb_recvslla;
797 			break;	/* goto sizeof (int) option return */
798 		case IP_RECVTTL:
799 			*i1 = connp->conn_recv_ancillary.crb_recvttl;
800 			break;	/* goto sizeof (int) option return */
801 		case IP_RECVTOS:
802 			*i1 = connp->conn_recv_ancillary.crb_recvtos;
803 			break;	/* goto sizeof (int) option return */
804 		case IP_ADD_MEMBERSHIP:
805 		case IP_DROP_MEMBERSHIP:
806 		case MCAST_JOIN_GROUP:
807 		case MCAST_LEAVE_GROUP:
808 		case IP_BLOCK_SOURCE:
809 		case IP_UNBLOCK_SOURCE:
810 		case IP_ADD_SOURCE_MEMBERSHIP:
811 		case IP_DROP_SOURCE_MEMBERSHIP:
812 		case MCAST_BLOCK_SOURCE:
813 		case MCAST_UNBLOCK_SOURCE:
814 		case MCAST_JOIN_SOURCE_GROUP:
815 		case MCAST_LEAVE_SOURCE_GROUP:
816 		case MRT_INIT:
817 		case MRT_DONE:
818 		case MRT_ADD_VIF:
819 		case MRT_DEL_VIF:
820 		case MRT_ADD_MFC:
821 		case MRT_DEL_MFC:
822 			/* cannot "get" the value for these */
823 			return (-1);
824 		case MRT_VERSION:
825 		case MRT_ASSERT:
826 			(void) ip_mrouter_get(name, connp, ptr);
827 			return (sizeof (int));
828 		case IP_SEC_OPT:
829 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
830 			    IPSEC_AF_V4));
831 		case IP_BOUND_IF:
832 			/* Zero if not set */
833 			*i1 = connp->conn_bound_if;
834 			break;	/* goto sizeof (int) option return */
835 		case IP_UNSPEC_SRC:
836 			*i1 = connp->conn_unspec_src;
837 			break;	/* goto sizeof (int) option return */
838 		case IP_BROADCAST_TTL:
839 			if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
840 				*(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
841 			else
842 				*(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
843 			return (sizeof (uchar_t));
844 		default:
845 			return (-1);
846 		}
847 		break;
848 	case IPPROTO_IPV6:
849 		if (connp->conn_family != AF_INET6)
850 			return (-1);
851 		switch (name) {
852 		case IPV6_UNICAST_HOPS:
853 			*i1 = (int)ipp->ipp_unicast_hops;
854 			break;	/* goto sizeof (int) option return */
855 		case IPV6_MULTICAST_IF:
856 			/* 0 index if not set */
857 			*i1 = ixa->ixa_multicast_ifindex;
858 			break;	/* goto sizeof (int) option return */
859 		case IPV6_MULTICAST_HOPS:
860 			*i1 = ixa->ixa_multicast_ttl;
861 			break;	/* goto sizeof (int) option return */
862 		case IPV6_MULTICAST_LOOP:
863 			*i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
864 			break;	/* goto sizeof (int) option return */
865 		case IPV6_JOIN_GROUP:
866 		case IPV6_LEAVE_GROUP:
867 		case MCAST_JOIN_GROUP:
868 		case MCAST_LEAVE_GROUP:
869 		case MCAST_BLOCK_SOURCE:
870 		case MCAST_UNBLOCK_SOURCE:
871 		case MCAST_JOIN_SOURCE_GROUP:
872 		case MCAST_LEAVE_SOURCE_GROUP:
873 			/* cannot "get" the value for these */
874 			return (-1);
875 		case IPV6_BOUND_IF:
876 			/* Zero if not set */
877 			*i1 = connp->conn_bound_if;
878 			break;	/* goto sizeof (int) option return */
879 		case IPV6_UNSPEC_SRC:
880 			*i1 = connp->conn_unspec_src;
881 			break;	/* goto sizeof (int) option return */
882 		case IPV6_RECVPKTINFO:
883 			*i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
884 			break;	/* goto sizeof (int) option return */
885 		case IPV6_RECVTCLASS:
886 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
887 			break;	/* goto sizeof (int) option return */
888 		case IPV6_RECVPATHMTU:
889 			*i1 = connp->conn_ipv6_recvpathmtu;
890 			break;	/* goto sizeof (int) option return */
891 		case IPV6_RECVHOPLIMIT:
892 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
893 			break;	/* goto sizeof (int) option return */
894 		case IPV6_RECVHOPOPTS:
895 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
896 			break;	/* goto sizeof (int) option return */
897 		case IPV6_RECVDSTOPTS:
898 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
899 			break;	/* goto sizeof (int) option return */
900 		case _OLD_IPV6_RECVDSTOPTS:
901 			*i1 =
902 			    connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
903 			break;	/* goto sizeof (int) option return */
904 		case IPV6_RECVRTHDRDSTOPTS:
905 			*i1 = connp->conn_recv_ancillary.
906 			    crb_ipv6_recvrthdrdstopts;
907 			break;	/* goto sizeof (int) option return */
908 		case IPV6_RECVRTHDR:
909 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
910 			break;	/* goto sizeof (int) option return */
911 		case IPV6_PKTINFO: {
912 			/* XXX assumes that caller has room for max size! */
913 			struct in6_pktinfo *pkti;
914 
915 			pkti = (struct in6_pktinfo *)ptr;
916 			pkti->ipi6_ifindex = ixa->ixa_ifindex;
917 			if (ipp->ipp_fields & IPPF_ADDR)
918 				pkti->ipi6_addr = ipp->ipp_addr;
919 			else
920 				pkti->ipi6_addr = ipv6_all_zeros;
921 			return (sizeof (struct in6_pktinfo));
922 		}
923 		case IPV6_TCLASS:
924 			*i1 = ipp->ipp_tclass;
925 			break;	/* goto sizeof (int) option return */
926 		case IPV6_NEXTHOP: {
927 			sin6_t *sin6 = (sin6_t *)ptr;
928 
929 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
930 				return (0);
931 
932 			*sin6 = sin6_null;
933 			sin6->sin6_family = AF_INET6;
934 			sin6->sin6_addr = ixa->ixa_nexthop_v6;
935 
936 			return (sizeof (sin6_t));
937 		}
938 		case IPV6_HOPOPTS:
939 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
940 				return (0);
941 			bcopy(ipp->ipp_hopopts, ptr,
942 			    ipp->ipp_hopoptslen);
943 			return (ipp->ipp_hopoptslen);
944 		case IPV6_RTHDRDSTOPTS:
945 			if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
946 				return (0);
947 			bcopy(ipp->ipp_rthdrdstopts, ptr,
948 			    ipp->ipp_rthdrdstoptslen);
949 			return (ipp->ipp_rthdrdstoptslen);
950 		case IPV6_RTHDR:
951 			if (!(ipp->ipp_fields & IPPF_RTHDR))
952 				return (0);
953 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
954 			return (ipp->ipp_rthdrlen);
955 		case IPV6_DSTOPTS:
956 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
957 				return (0);
958 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
959 			return (ipp->ipp_dstoptslen);
960 		case IPV6_PATHMTU:
961 			return (ip_fill_mtuinfo(connp, ixa,
962 			    (struct ip6_mtuinfo *)ptr));
963 		case IPV6_SEC_OPT:
964 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
965 			    IPSEC_AF_V6));
966 		case IPV6_SRC_PREFERENCES:
967 			return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
968 		case IPV6_DONTFRAG:
969 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
970 			return (sizeof (int));
971 		case IPV6_USE_MIN_MTU:
972 			if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
973 				*i1 = ixa->ixa_use_min_mtu;
974 			else
975 				*i1 = IPV6_USE_MIN_MTU_MULTICAST;
976 			break;
977 		case IPV6_V6ONLY:
978 			*i1 = connp->conn_ipv6_v6only;
979 			return (sizeof (int));
980 		default:
981 			return (-1);
982 		}
983 		break;
984 	case IPPROTO_UDP:
985 		switch (name) {
986 		case UDP_ANONPRIVBIND:
987 			*i1 = connp->conn_anon_priv_bind;
988 			break;
989 		case UDP_EXCLBIND:
990 			*i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
991 			break;
992 		default:
993 			return (-1);
994 		}
995 		break;
996 	case IPPROTO_TCP:
997 		switch (name) {
998 		case TCP_RECVDSTADDR:
999 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
1000 			break;
1001 		case TCP_ANONPRIVBIND:
1002 			*i1 = connp->conn_anon_priv_bind;
1003 			break;
1004 		case TCP_EXCLBIND:
1005 			*i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
1006 			break;
1007 		default:
1008 			return (-1);
1009 		}
1010 		break;
1011 	default:
1012 		return (-1);
1013 	}
1014 	return (sizeof (int));
1015 }
1016 
1017 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
1018     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1019 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
1020     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1021 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
1022     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1023 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1024     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1025 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1026     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1027 
1028 /*
1029  * This routine sets the most common socket options including some
1030  * that are transport/ULP specific.
1031  * It returns errno or zero.
1032  *
1033  * For fixed length options, there is no sanity check
1034  * of passed in length is done. It is assumed *_optcom_req()
1035  * routines do the right thing.
1036  */
1037 int
1038 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1039     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1040 {
1041 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1042 
1043 	/* We have different functions for different levels */
1044 	switch (level) {
1045 	case SOL_SOCKET:
1046 		return (conn_opt_set_socket(coa, name, inlen, invalp,
1047 		    checkonly, cr));
1048 	case IPPROTO_IP:
1049 		return (conn_opt_set_ip(coa, name, inlen, invalp,
1050 		    checkonly, cr));
1051 	case IPPROTO_IPV6:
1052 		return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1053 		    checkonly, cr));
1054 	case IPPROTO_UDP:
1055 		return (conn_opt_set_udp(coa, name, inlen, invalp,
1056 		    checkonly, cr));
1057 	case IPPROTO_TCP:
1058 		return (conn_opt_set_tcp(coa, name, inlen, invalp,
1059 		    checkonly, cr));
1060 	default:
1061 		return (0);
1062 	}
1063 }
1064 
1065 /*
1066  * Handle SOL_SOCKET
1067  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1068  * it implement their own checks and setting of conn_proto.
1069  */
1070 /* ARGSUSED1 */
1071 static int
1072 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1073     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1074 {
1075 	conn_t		*connp = coa->coa_connp;
1076 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1077 	int		*i1 = (int *)invalp;
1078 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1079 
1080 	switch (name) {
1081 	case SO_ALLZONES:
1082 		if (IPCL_IS_BOUND(connp))
1083 			return (EINVAL);
1084 		break;
1085 	case SO_VRRP:
1086 		if (secpolicy_ip_config(cr, checkonly) != 0)
1087 			return (EACCES);
1088 		break;
1089 	case SO_MAC_EXEMPT:
1090 		if (secpolicy_net_mac_aware(cr) != 0)
1091 			return (EACCES);
1092 		if (IPCL_IS_BOUND(connp))
1093 			return (EINVAL);
1094 		break;
1095 	case SO_MAC_IMPLICIT:
1096 		if (secpolicy_net_mac_implicit(cr) != 0)
1097 			return (EACCES);
1098 		break;
1099 	}
1100 	if (checkonly)
1101 		return (0);
1102 
1103 	mutex_enter(&connp->conn_lock);
1104 	/* Here we set the actual option value */
1105 	switch (name) {
1106 	case SO_DEBUG:
1107 		connp->conn_debug = onoff;
1108 		break;
1109 	case SO_KEEPALIVE:
1110 		connp->conn_keepalive = onoff;
1111 		break;
1112 	case SO_LINGER: {
1113 		struct linger *lgr = (struct linger *)invalp;
1114 
1115 		if (lgr->l_onoff) {
1116 			connp->conn_linger = 1;
1117 			connp->conn_lingertime = lgr->l_linger;
1118 		} else {
1119 			connp->conn_linger = 0;
1120 			connp->conn_lingertime = 0;
1121 		}
1122 		break;
1123 	}
1124 	case SO_OOBINLINE:
1125 		connp->conn_oobinline = onoff;
1126 		coa->coa_changed |= COA_OOBINLINE_CHANGED;
1127 		break;
1128 	case SO_REUSEADDR:
1129 		connp->conn_reuseaddr = onoff;
1130 		break;
1131 	case SO_DONTROUTE:
1132 		if (onoff)
1133 			ixa->ixa_flags |= IXAF_DONTROUTE;
1134 		else
1135 			ixa->ixa_flags &= ~IXAF_DONTROUTE;
1136 		coa->coa_changed |= COA_ROUTE_CHANGED;
1137 		break;
1138 	case SO_USELOOPBACK:
1139 		connp->conn_useloopback = onoff;
1140 		break;
1141 	case SO_BROADCAST:
1142 		connp->conn_broadcast = onoff;
1143 		break;
1144 	case SO_SNDBUF:
1145 		/* ULP has range checked the value */
1146 		connp->conn_sndbuf = *i1;
1147 		coa->coa_changed |= COA_SNDBUF_CHANGED;
1148 		break;
1149 	case SO_RCVBUF:
1150 		/* ULP has range checked the value */
1151 		connp->conn_rcvbuf = *i1;
1152 		coa->coa_changed |= COA_RCVBUF_CHANGED;
1153 		break;
1154 	case SO_RCVTIMEO:
1155 	case SO_SNDTIMEO:
1156 		/*
1157 		 * Pass these two options in order for third part
1158 		 * protocol usage.
1159 		 */
1160 		break;
1161 	case SO_DGRAM_ERRIND:
1162 		connp->conn_dgram_errind = onoff;
1163 		break;
1164 	case SO_RECVUCRED:
1165 		connp->conn_recv_ancillary.crb_recvucred = onoff;
1166 		break;
1167 	case SO_ALLZONES:
1168 		connp->conn_allzones = onoff;
1169 		coa->coa_changed |= COA_ROUTE_CHANGED;
1170 		if (onoff)
1171 			ixa->ixa_zoneid = ALL_ZONES;
1172 		else
1173 			ixa->ixa_zoneid = connp->conn_zoneid;
1174 		break;
1175 	case SO_TIMESTAMP:
1176 		connp->conn_recv_ancillary.crb_timestamp = onoff;
1177 		break;
1178 	case SO_VRRP:
1179 		connp->conn_isvrrp = onoff;
1180 		break;
1181 	case SO_ANON_MLP:
1182 		connp->conn_anon_mlp = onoff;
1183 		break;
1184 	case SO_MAC_EXEMPT:
1185 		connp->conn_mac_mode = onoff ?
1186 		    CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1187 		break;
1188 	case SO_MAC_IMPLICIT:
1189 		connp->conn_mac_mode = onoff ?
1190 		    CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1191 		break;
1192 	case SO_EXCLBIND:
1193 		connp->conn_exclbind = onoff;
1194 		break;
1195 	}
1196 	mutex_exit(&connp->conn_lock);
1197 	return (0);
1198 }
1199 
1200 /* Handle IPPROTO_IP */
1201 static int
1202 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1203     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1204 {
1205 	conn_t		*connp = coa->coa_connp;
1206 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1207 	ip_pkt_t	*ipp = coa->coa_ipp;
1208 	int		*i1 = (int *)invalp;
1209 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1210 	ipaddr_t	addr = (ipaddr_t)*i1;
1211 	uint_t		ifindex;
1212 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1213 	ipif_t		*ipif;
1214 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1215 	int		error;
1216 
1217 	if (connp->conn_family != AF_INET)
1218 		return (EINVAL);
1219 
1220 	ifindex = UINT_MAX;
1221 	switch (name) {
1222 	case IP_TTL:
1223 		/* Don't allow zero */
1224 		if (*i1 < 1 || *i1 > 255)
1225 			return (EINVAL);
1226 		break;
1227 	case IP_MULTICAST_IF:
1228 		if (addr == INADDR_ANY) {
1229 			/* Clear */
1230 			ifindex = 0;
1231 			break;
1232 		}
1233 		ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1234 		if (ipif == NULL)
1235 			return (EHOSTUNREACH);
1236 		/* not supported by the virtual network iface */
1237 		if (IS_VNI(ipif->ipif_ill)) {
1238 			ipif_refrele(ipif);
1239 			return (EINVAL);
1240 		}
1241 		ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1242 		ipif_refrele(ipif);
1243 		break;
1244 	case IP_NEXTHOP: {
1245 		ire_t	*ire;
1246 
1247 		if (addr == INADDR_ANY) {
1248 			/* Clear */
1249 			break;
1250 		}
1251 		/* Verify that the next-hop is on-link */
1252 		ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1253 		    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1254 		if (ire == NULL)
1255 			return (EHOSTUNREACH);
1256 		ire_refrele(ire);
1257 		break;
1258 	}
1259 	case IP_OPTIONS:
1260 	case T_IP_OPTIONS: {
1261 		uint_t newlen;
1262 
1263 		if (ipp->ipp_fields & IPPF_LABEL_V4)
1264 			newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1265 		else
1266 			newlen = inlen;
1267 		if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1268 			return (EINVAL);
1269 		}
1270 		break;
1271 	}
1272 	case IP_PKTINFO: {
1273 		struct in_pktinfo *pktinfo;
1274 
1275 		/* Two different valid lengths */
1276 		if (inlen != sizeof (int) &&
1277 		    inlen != sizeof (struct in_pktinfo))
1278 			return (EINVAL);
1279 		if (inlen == sizeof (int))
1280 			break;
1281 
1282 		pktinfo = (struct in_pktinfo *)invalp;
1283 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1284 			switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1285 			    zoneid, ipst, B_FALSE)) {
1286 			case IPVL_UNICAST_UP:
1287 			case IPVL_UNICAST_DOWN:
1288 				break;
1289 			default:
1290 				return (EADDRNOTAVAIL);
1291 			}
1292 		}
1293 		if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1294 		    B_FALSE, ipst))
1295 			return (ENXIO);
1296 		break;
1297 	}
1298 	case IP_BOUND_IF:
1299 		ifindex = *(uint_t *)i1;
1300 
1301 		/* Just check it is ok. */
1302 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1303 			return (ENXIO);
1304 		break;
1305 	}
1306 	if (checkonly)
1307 		return (0);
1308 
1309 	/* Here we set the actual option value */
1310 	/*
1311 	 * conn_lock protects the bitfields, and is used to
1312 	 * set the fields atomically. Not needed for ixa settings since
1313 	 * the caller has an exclusive copy of the ixa.
1314 	 * We can not hold conn_lock across the multicast options though.
1315 	 */
1316 	switch (name) {
1317 	case IP_OPTIONS:
1318 	case T_IP_OPTIONS:
1319 		/* Save options for use by IP. */
1320 		mutex_enter(&connp->conn_lock);
1321 		error = optcom_pkt_set(invalp, inlen,
1322 		    (uchar_t **)&ipp->ipp_ipv4_options,
1323 		    &ipp->ipp_ipv4_options_len);
1324 		if (error != 0) {
1325 			mutex_exit(&connp->conn_lock);
1326 			return (error);
1327 		}
1328 		if (ipp->ipp_ipv4_options_len == 0) {
1329 			ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1330 		} else {
1331 			ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1332 		}
1333 		mutex_exit(&connp->conn_lock);
1334 		coa->coa_changed |= COA_HEADER_CHANGED;
1335 		coa->coa_changed |= COA_WROFF_CHANGED;
1336 		break;
1337 
1338 	case IP_TTL:
1339 		mutex_enter(&connp->conn_lock);
1340 		ipp->ipp_unicast_hops = *i1;
1341 		mutex_exit(&connp->conn_lock);
1342 		coa->coa_changed |= COA_HEADER_CHANGED;
1343 		break;
1344 	case IP_TOS:
1345 	case T_IP_TOS:
1346 		mutex_enter(&connp->conn_lock);
1347 		if (*i1 == -1) {
1348 			ipp->ipp_type_of_service = 0;
1349 		} else {
1350 			ipp->ipp_type_of_service = *i1;
1351 		}
1352 		mutex_exit(&connp->conn_lock);
1353 		coa->coa_changed |= COA_HEADER_CHANGED;
1354 		break;
1355 	case IP_MULTICAST_IF:
1356 		ixa->ixa_multicast_ifindex = ifindex;
1357 		ixa->ixa_multicast_ifaddr = addr;
1358 		coa->coa_changed |= COA_ROUTE_CHANGED;
1359 		break;
1360 	case IP_MULTICAST_TTL:
1361 		ixa->ixa_multicast_ttl = *invalp;
1362 		/* Handled automatically by ip_output */
1363 		break;
1364 	case IP_MULTICAST_LOOP:
1365 		if (*invalp != 0)
1366 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1367 		else
1368 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1369 		/* Handled automatically by ip_output */
1370 		break;
1371 	case IP_RECVOPTS:
1372 		mutex_enter(&connp->conn_lock);
1373 		connp->conn_recv_ancillary.crb_recvopts = onoff;
1374 		mutex_exit(&connp->conn_lock);
1375 		break;
1376 	case IP_RECVDSTADDR:
1377 		mutex_enter(&connp->conn_lock);
1378 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1379 		mutex_exit(&connp->conn_lock);
1380 		break;
1381 	case IP_RECVIF:
1382 		mutex_enter(&connp->conn_lock);
1383 		connp->conn_recv_ancillary.crb_recvif = onoff;
1384 		mutex_exit(&connp->conn_lock);
1385 		break;
1386 	case IP_RECVSLLA:
1387 		mutex_enter(&connp->conn_lock);
1388 		connp->conn_recv_ancillary.crb_recvslla = onoff;
1389 		mutex_exit(&connp->conn_lock);
1390 		break;
1391 	case IP_RECVTTL:
1392 		mutex_enter(&connp->conn_lock);
1393 		connp->conn_recv_ancillary.crb_recvttl = onoff;
1394 		mutex_exit(&connp->conn_lock);
1395 		break;
1396 	case IP_RECVTOS:
1397 		mutex_enter(&connp->conn_lock);
1398 		connp->conn_recv_ancillary.crb_recvtos = onoff;
1399 		mutex_exit(&connp->conn_lock);
1400 		break;
1401 	case IP_PKTINFO: {
1402 		/*
1403 		 * This also handles IP_RECVPKTINFO.
1404 		 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1405 		 * Differentiation is based on the size of the
1406 		 * argument passed in.
1407 		 */
1408 		struct in_pktinfo *pktinfo;
1409 
1410 		if (inlen == sizeof (int)) {
1411 			/* This is IP_RECVPKTINFO option. */
1412 			mutex_enter(&connp->conn_lock);
1413 			connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1414 			    onoff;
1415 			mutex_exit(&connp->conn_lock);
1416 			break;
1417 		}
1418 
1419 		/* This is IP_PKTINFO option. */
1420 		mutex_enter(&connp->conn_lock);
1421 		pktinfo = (struct in_pktinfo *)invalp;
1422 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1423 			ipp->ipp_fields |= IPPF_ADDR;
1424 			IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1425 			    &ipp->ipp_addr);
1426 		} else {
1427 			ipp->ipp_fields &= ~IPPF_ADDR;
1428 			ipp->ipp_addr = ipv6_all_zeros;
1429 		}
1430 		mutex_exit(&connp->conn_lock);
1431 		ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1432 		coa->coa_changed |= COA_ROUTE_CHANGED;
1433 		coa->coa_changed |= COA_HEADER_CHANGED;
1434 		break;
1435 	}
1436 	case IP_DONTFRAG:
1437 		if (onoff) {
1438 			ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1439 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1440 		} else {
1441 			ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1442 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1443 		}
1444 		/* Need to redo ip_attr_connect */
1445 		coa->coa_changed |= COA_ROUTE_CHANGED;
1446 		break;
1447 	case IP_ADD_MEMBERSHIP:
1448 	case IP_DROP_MEMBERSHIP:
1449 	case MCAST_JOIN_GROUP:
1450 	case MCAST_LEAVE_GROUP:
1451 		return (ip_opt_set_multicast_group(connp, name,
1452 		    invalp, B_FALSE, checkonly));
1453 
1454 	case IP_BLOCK_SOURCE:
1455 	case IP_UNBLOCK_SOURCE:
1456 	case IP_ADD_SOURCE_MEMBERSHIP:
1457 	case IP_DROP_SOURCE_MEMBERSHIP:
1458 	case MCAST_BLOCK_SOURCE:
1459 	case MCAST_UNBLOCK_SOURCE:
1460 	case MCAST_JOIN_SOURCE_GROUP:
1461 	case MCAST_LEAVE_SOURCE_GROUP:
1462 		return (ip_opt_set_multicast_sources(connp, name,
1463 		    invalp, B_FALSE, checkonly));
1464 
1465 	case IP_SEC_OPT:
1466 		mutex_enter(&connp->conn_lock);
1467 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1468 		mutex_exit(&connp->conn_lock);
1469 		if (error != 0) {
1470 			return (error);
1471 		}
1472 		/* This is an IPsec policy change - redo ip_attr_connect */
1473 		coa->coa_changed |= COA_ROUTE_CHANGED;
1474 		break;
1475 	case IP_NEXTHOP:
1476 		ixa->ixa_nexthop_v4 = addr;
1477 		if (addr != INADDR_ANY)
1478 			ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1479 		else
1480 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1481 		coa->coa_changed |= COA_ROUTE_CHANGED;
1482 		break;
1483 
1484 	case IP_BOUND_IF:
1485 		ixa->ixa_ifindex = ifindex;		/* Send */
1486 		mutex_enter(&connp->conn_lock);
1487 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1488 		connp->conn_bound_if = ifindex;		/* getsockopt */
1489 		mutex_exit(&connp->conn_lock);
1490 		coa->coa_changed |= COA_ROUTE_CHANGED;
1491 		break;
1492 	case IP_UNSPEC_SRC:
1493 		mutex_enter(&connp->conn_lock);
1494 		connp->conn_unspec_src = onoff;
1495 		if (onoff)
1496 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1497 		else
1498 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1499 
1500 		mutex_exit(&connp->conn_lock);
1501 		break;
1502 	case IP_BROADCAST_TTL:
1503 		ixa->ixa_broadcast_ttl = *invalp;
1504 		ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1505 		/* Handled automatically by ip_output */
1506 		break;
1507 	case MRT_INIT:
1508 	case MRT_DONE:
1509 	case MRT_ADD_VIF:
1510 	case MRT_DEL_VIF:
1511 	case MRT_ADD_MFC:
1512 	case MRT_DEL_MFC:
1513 	case MRT_ASSERT:
1514 		if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1515 			return (error);
1516 		}
1517 		error = ip_mrouter_set((int)name, connp, checkonly,
1518 		    (uchar_t *)invalp, inlen);
1519 		if (error) {
1520 			return (error);
1521 		}
1522 		return (0);
1523 
1524 	}
1525 	return (0);
1526 }
1527 
1528 /* Handle IPPROTO_IPV6 */
1529 static int
1530 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1531     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1532 {
1533 	conn_t		*connp = coa->coa_connp;
1534 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1535 	ip_pkt_t	*ipp = coa->coa_ipp;
1536 	int		*i1 = (int *)invalp;
1537 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1538 	uint_t		ifindex;
1539 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1540 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1541 	int		error;
1542 
1543 	if (connp->conn_family != AF_INET6)
1544 		return (EINVAL);
1545 
1546 	ifindex = UINT_MAX;
1547 	switch (name) {
1548 	case IPV6_MULTICAST_IF:
1549 		/*
1550 		 * The only possible error is EINVAL.
1551 		 * We call this option on both V4 and V6
1552 		 * If both fail, then this call returns
1553 		 * EINVAL. If at least one of them succeeds we
1554 		 * return success.
1555 		 */
1556 		ifindex = *(uint_t *)i1;
1557 
1558 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1559 		    !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1560 			return (EINVAL);
1561 		break;
1562 	case IPV6_UNICAST_HOPS:
1563 		/* Don't allow zero. -1 means to use default */
1564 		if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1565 			return (EINVAL);
1566 		break;
1567 	case IPV6_MULTICAST_HOPS:
1568 		/* -1 means use default */
1569 		if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1570 			return (EINVAL);
1571 		break;
1572 	case IPV6_MULTICAST_LOOP:
1573 		if (*i1 != 0 && *i1 != 1)
1574 			return (EINVAL);
1575 		break;
1576 	case IPV6_BOUND_IF:
1577 		ifindex = *(uint_t *)i1;
1578 
1579 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1580 			return (ENXIO);
1581 		break;
1582 	case IPV6_PKTINFO: {
1583 		struct in6_pktinfo *pkti;
1584 		boolean_t isv6;
1585 
1586 		if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1587 			return (EINVAL);
1588 		if (inlen == 0)
1589 			break;	/* Clear values below */
1590 
1591 		/*
1592 		 * Verify the source address and ifindex. Privileged users
1593 		 * can use any source address.
1594 		 */
1595 		pkti = (struct in6_pktinfo *)invalp;
1596 
1597 		/*
1598 		 * For link-local addresses we use the ipi6_ifindex when
1599 		 * we verify the local address.
1600 		 * If net_rawaccess then any source address can be used.
1601 		 */
1602 		if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1603 		    secpolicy_net_rawaccess(cr) != 0) {
1604 			uint_t scopeid = 0;
1605 			in6_addr_t *v6src = &pkti->ipi6_addr;
1606 			ipaddr_t v4src;
1607 			ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1608 
1609 			if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1610 				IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1611 				if (v4src != INADDR_ANY) {
1612 					laddr_type = ip_laddr_verify_v4(v4src,
1613 					    zoneid, ipst, B_FALSE);
1614 				}
1615 			} else {
1616 				if (IN6_IS_ADDR_LINKSCOPE(v6src))
1617 					scopeid = pkti->ipi6_ifindex;
1618 
1619 				laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1620 				    ipst, B_FALSE, scopeid);
1621 			}
1622 			switch (laddr_type) {
1623 			case IPVL_UNICAST_UP:
1624 			case IPVL_UNICAST_DOWN:
1625 				break;
1626 			default:
1627 				return (EADDRNOTAVAIL);
1628 			}
1629 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1630 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1631 			/* Allow any source */
1632 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1633 		}
1634 		isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1635 		if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1636 		    ipst))
1637 			return (ENXIO);
1638 		break;
1639 	}
1640 	case IPV6_HOPLIMIT:
1641 		/* It is only allowed as ancilary data */
1642 		if (!coa->coa_ancillary)
1643 			return (EINVAL);
1644 
1645 		if (inlen != 0 && inlen != sizeof (int))
1646 			return (EINVAL);
1647 		if (inlen == sizeof (int)) {
1648 			if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1649 				return (EINVAL);
1650 		}
1651 		break;
1652 	case IPV6_TCLASS:
1653 		if (inlen != 0 && inlen != sizeof (int))
1654 			return (EINVAL);
1655 		if (inlen == sizeof (int)) {
1656 			if (*i1 > 255 || *i1 < -1)
1657 				return (EINVAL);
1658 		}
1659 		break;
1660 	case IPV6_NEXTHOP:
1661 		if (inlen != 0 && inlen != sizeof (sin6_t))
1662 			return (EINVAL);
1663 		if (inlen == sizeof (sin6_t)) {
1664 			sin6_t *sin6 = (sin6_t *)invalp;
1665 			ire_t	*ire;
1666 
1667 			if (sin6->sin6_family != AF_INET6)
1668 				return (EAFNOSUPPORT);
1669 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1670 				return (EADDRNOTAVAIL);
1671 
1672 			/* Verify that the next-hop is on-link */
1673 			ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1674 			    0, 0, IRE_ONLINK, NULL, zoneid,
1675 			    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1676 			if (ire == NULL)
1677 				return (EHOSTUNREACH);
1678 			ire_refrele(ire);
1679 			break;
1680 		}
1681 		break;
1682 	case IPV6_RTHDR:
1683 	case IPV6_DSTOPTS:
1684 	case IPV6_RTHDRDSTOPTS:
1685 	case IPV6_HOPOPTS: {
1686 		/* All have the length field in the same place */
1687 		ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1688 		/*
1689 		 * Sanity checks - minimum size, size a multiple of
1690 		 * eight bytes, and matching size passed in.
1691 		 */
1692 		if (inlen != 0 &&
1693 		    inlen != (8 * (hopts->ip6h_len + 1)))
1694 			return (EINVAL);
1695 		break;
1696 	}
1697 	case IPV6_PATHMTU:
1698 		/* Can't be set */
1699 		return (EINVAL);
1700 
1701 	case IPV6_USE_MIN_MTU:
1702 		if (inlen != sizeof (int))
1703 			return (EINVAL);
1704 		if (*i1 < -1 || *i1 > 1)
1705 			return (EINVAL);
1706 		break;
1707 	case IPV6_SRC_PREFERENCES:
1708 		if (inlen != sizeof (uint32_t))
1709 			return (EINVAL);
1710 		break;
1711 	case IPV6_V6ONLY:
1712 		if (*i1 < 0 || *i1 > 1) {
1713 			return (EINVAL);
1714 		}
1715 		break;
1716 	}
1717 	if (checkonly)
1718 		return (0);
1719 
1720 	/* Here we set the actual option value */
1721 	/*
1722 	 * conn_lock protects the bitfields, and is used to
1723 	 * set the fields atomically. Not needed for ixa settings since
1724 	 * the caller has an exclusive copy of the ixa.
1725 	 * We can not hold conn_lock across the multicast options though.
1726 	 */
1727 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1728 	switch (name) {
1729 	case IPV6_MULTICAST_IF:
1730 		ixa->ixa_multicast_ifindex = ifindex;
1731 		/* Need to redo ip_attr_connect */
1732 		coa->coa_changed |= COA_ROUTE_CHANGED;
1733 		break;
1734 	case IPV6_UNICAST_HOPS:
1735 		/* -1 means use default */
1736 		mutex_enter(&connp->conn_lock);
1737 		if (*i1 == -1) {
1738 			ipp->ipp_unicast_hops = connp->conn_default_ttl;
1739 		} else {
1740 			ipp->ipp_unicast_hops = (uint8_t)*i1;
1741 		}
1742 		mutex_exit(&connp->conn_lock);
1743 		coa->coa_changed |= COA_HEADER_CHANGED;
1744 		break;
1745 	case IPV6_MULTICAST_HOPS:
1746 		/* -1 means use default */
1747 		if (*i1 == -1) {
1748 			ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1749 		} else {
1750 			ixa->ixa_multicast_ttl = (uint8_t)*i1;
1751 		}
1752 		/* Handled automatically by ip_output */
1753 		break;
1754 	case IPV6_MULTICAST_LOOP:
1755 		if (*i1 != 0)
1756 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1757 		else
1758 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1759 		/* Handled automatically by ip_output */
1760 		break;
1761 	case IPV6_JOIN_GROUP:
1762 	case IPV6_LEAVE_GROUP:
1763 	case MCAST_JOIN_GROUP:
1764 	case MCAST_LEAVE_GROUP:
1765 		return (ip_opt_set_multicast_group(connp, name,
1766 		    invalp, B_TRUE, checkonly));
1767 
1768 	case MCAST_BLOCK_SOURCE:
1769 	case MCAST_UNBLOCK_SOURCE:
1770 	case MCAST_JOIN_SOURCE_GROUP:
1771 	case MCAST_LEAVE_SOURCE_GROUP:
1772 		return (ip_opt_set_multicast_sources(connp, name,
1773 		    invalp, B_TRUE, checkonly));
1774 
1775 	case IPV6_BOUND_IF:
1776 		ixa->ixa_ifindex = ifindex;		/* Send */
1777 		mutex_enter(&connp->conn_lock);
1778 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1779 		connp->conn_bound_if = ifindex;		/* getsockopt */
1780 		mutex_exit(&connp->conn_lock);
1781 		coa->coa_changed |= COA_ROUTE_CHANGED;
1782 		break;
1783 	case IPV6_UNSPEC_SRC:
1784 		mutex_enter(&connp->conn_lock);
1785 		connp->conn_unspec_src = onoff;
1786 		if (onoff)
1787 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1788 		else
1789 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1790 		mutex_exit(&connp->conn_lock);
1791 		break;
1792 	case IPV6_RECVPKTINFO:
1793 		mutex_enter(&connp->conn_lock);
1794 		connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1795 		mutex_exit(&connp->conn_lock);
1796 		break;
1797 	case IPV6_RECVTCLASS:
1798 		mutex_enter(&connp->conn_lock);
1799 		connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1800 		mutex_exit(&connp->conn_lock);
1801 		break;
1802 	case IPV6_RECVPATHMTU:
1803 		mutex_enter(&connp->conn_lock);
1804 		connp->conn_ipv6_recvpathmtu = onoff;
1805 		mutex_exit(&connp->conn_lock);
1806 		break;
1807 	case IPV6_RECVHOPLIMIT:
1808 		mutex_enter(&connp->conn_lock);
1809 		connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1810 		    onoff;
1811 		mutex_exit(&connp->conn_lock);
1812 		break;
1813 	case IPV6_RECVHOPOPTS:
1814 		mutex_enter(&connp->conn_lock);
1815 		connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1816 		mutex_exit(&connp->conn_lock);
1817 		break;
1818 	case IPV6_RECVDSTOPTS:
1819 		mutex_enter(&connp->conn_lock);
1820 		connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1821 		mutex_exit(&connp->conn_lock);
1822 		break;
1823 	case _OLD_IPV6_RECVDSTOPTS:
1824 		mutex_enter(&connp->conn_lock);
1825 		connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1826 		    onoff;
1827 		mutex_exit(&connp->conn_lock);
1828 		break;
1829 	case IPV6_RECVRTHDRDSTOPTS:
1830 		mutex_enter(&connp->conn_lock);
1831 		connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1832 		    onoff;
1833 		mutex_exit(&connp->conn_lock);
1834 		break;
1835 	case IPV6_RECVRTHDR:
1836 		mutex_enter(&connp->conn_lock);
1837 		connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1838 		mutex_exit(&connp->conn_lock);
1839 		break;
1840 	case IPV6_PKTINFO:
1841 		mutex_enter(&connp->conn_lock);
1842 		if (inlen == 0) {
1843 			ipp->ipp_fields &= ~IPPF_ADDR;
1844 			ipp->ipp_addr = ipv6_all_zeros;
1845 			ixa->ixa_ifindex = 0;
1846 		} else {
1847 			struct in6_pktinfo *pkti;
1848 
1849 			pkti = (struct in6_pktinfo *)invalp;
1850 			ipp->ipp_addr = pkti->ipi6_addr;
1851 			if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1852 				ipp->ipp_fields |= IPPF_ADDR;
1853 			else
1854 				ipp->ipp_fields &= ~IPPF_ADDR;
1855 			ixa->ixa_ifindex = pkti->ipi6_ifindex;
1856 		}
1857 		mutex_exit(&connp->conn_lock);
1858 		/* Source and ifindex might have changed */
1859 		coa->coa_changed |= COA_HEADER_CHANGED;
1860 		coa->coa_changed |= COA_ROUTE_CHANGED;
1861 		break;
1862 	case IPV6_HOPLIMIT:
1863 		mutex_enter(&connp->conn_lock);
1864 		if (inlen == 0 || *i1 == -1) {
1865 			/* Revert to default */
1866 			ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1867 			ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1868 		} else {
1869 			ipp->ipp_hoplimit = *i1;
1870 			ipp->ipp_fields |= IPPF_HOPLIMIT;
1871 			/* Ensure that it sticks for multicast packets */
1872 			ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1873 		}
1874 		mutex_exit(&connp->conn_lock);
1875 		coa->coa_changed |= COA_HEADER_CHANGED;
1876 		break;
1877 	case IPV6_TCLASS:
1878 		/*
1879 		 * IPV6_TCLASS accepts -1 as use kernel default
1880 		 * and [0, 255] as the actualy traffic class.
1881 		 */
1882 		mutex_enter(&connp->conn_lock);
1883 		if (inlen == 0 || *i1 == -1) {
1884 			ipp->ipp_tclass = 0;
1885 			ipp->ipp_fields &= ~IPPF_TCLASS;
1886 		} else {
1887 			ipp->ipp_tclass = *i1;
1888 			ipp->ipp_fields |= IPPF_TCLASS;
1889 		}
1890 		mutex_exit(&connp->conn_lock);
1891 		coa->coa_changed |= COA_HEADER_CHANGED;
1892 		break;
1893 	case IPV6_NEXTHOP:
1894 		if (inlen == 0) {
1895 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1896 		} else {
1897 			sin6_t *sin6 = (sin6_t *)invalp;
1898 
1899 			ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1900 			if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1901 				ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1902 			else
1903 				ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1904 		}
1905 		coa->coa_changed |= COA_ROUTE_CHANGED;
1906 		break;
1907 	case IPV6_HOPOPTS:
1908 		mutex_enter(&connp->conn_lock);
1909 		error = optcom_pkt_set(invalp, inlen,
1910 		    (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1911 		if (error != 0) {
1912 			mutex_exit(&connp->conn_lock);
1913 			return (error);
1914 		}
1915 		if (ipp->ipp_hopoptslen == 0) {
1916 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1917 		} else {
1918 			ipp->ipp_fields |= IPPF_HOPOPTS;
1919 		}
1920 		mutex_exit(&connp->conn_lock);
1921 		coa->coa_changed |= COA_HEADER_CHANGED;
1922 		coa->coa_changed |= COA_WROFF_CHANGED;
1923 		break;
1924 	case IPV6_RTHDRDSTOPTS:
1925 		mutex_enter(&connp->conn_lock);
1926 		error = optcom_pkt_set(invalp, inlen,
1927 		    (uchar_t **)&ipp->ipp_rthdrdstopts,
1928 		    &ipp->ipp_rthdrdstoptslen);
1929 		if (error != 0) {
1930 			mutex_exit(&connp->conn_lock);
1931 			return (error);
1932 		}
1933 		if (ipp->ipp_rthdrdstoptslen == 0) {
1934 			ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1935 		} else {
1936 			ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1937 		}
1938 		mutex_exit(&connp->conn_lock);
1939 		coa->coa_changed |= COA_HEADER_CHANGED;
1940 		coa->coa_changed |= COA_WROFF_CHANGED;
1941 		break;
1942 	case IPV6_DSTOPTS:
1943 		mutex_enter(&connp->conn_lock);
1944 		error = optcom_pkt_set(invalp, inlen,
1945 		    (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1946 		if (error != 0) {
1947 			mutex_exit(&connp->conn_lock);
1948 			return (error);
1949 		}
1950 		if (ipp->ipp_dstoptslen == 0) {
1951 			ipp->ipp_fields &= ~IPPF_DSTOPTS;
1952 		} else {
1953 			ipp->ipp_fields |= IPPF_DSTOPTS;
1954 		}
1955 		mutex_exit(&connp->conn_lock);
1956 		coa->coa_changed |= COA_HEADER_CHANGED;
1957 		coa->coa_changed |= COA_WROFF_CHANGED;
1958 		break;
1959 	case IPV6_RTHDR:
1960 		mutex_enter(&connp->conn_lock);
1961 		error = optcom_pkt_set(invalp, inlen,
1962 		    (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1963 		if (error != 0) {
1964 			mutex_exit(&connp->conn_lock);
1965 			return (error);
1966 		}
1967 		if (ipp->ipp_rthdrlen == 0) {
1968 			ipp->ipp_fields &= ~IPPF_RTHDR;
1969 		} else {
1970 			ipp->ipp_fields |= IPPF_RTHDR;
1971 		}
1972 		mutex_exit(&connp->conn_lock);
1973 		coa->coa_changed |= COA_HEADER_CHANGED;
1974 		coa->coa_changed |= COA_WROFF_CHANGED;
1975 		break;
1976 
1977 	case IPV6_DONTFRAG:
1978 		if (onoff) {
1979 			ixa->ixa_flags |= IXAF_DONTFRAG;
1980 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1981 		} else {
1982 			ixa->ixa_flags &= ~IXAF_DONTFRAG;
1983 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1984 		}
1985 		/* Need to redo ip_attr_connect */
1986 		coa->coa_changed |= COA_ROUTE_CHANGED;
1987 		break;
1988 
1989 	case IPV6_USE_MIN_MTU:
1990 		ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1991 		ixa->ixa_use_min_mtu = *i1;
1992 		/* Need to redo ip_attr_connect */
1993 		coa->coa_changed |= COA_ROUTE_CHANGED;
1994 		break;
1995 
1996 	case IPV6_SEC_OPT:
1997 		mutex_enter(&connp->conn_lock);
1998 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1999 		mutex_exit(&connp->conn_lock);
2000 		if (error != 0) {
2001 			return (error);
2002 		}
2003 		/* This is an IPsec policy change - redo ip_attr_connect */
2004 		coa->coa_changed |= COA_ROUTE_CHANGED;
2005 		break;
2006 	case IPV6_SRC_PREFERENCES:
2007 		/*
2008 		 * This socket option only affects connected
2009 		 * sockets that haven't already bound to a specific
2010 		 * IPv6 address.  In other words, sockets that
2011 		 * don't call bind() with an address other than the
2012 		 * unspecified address and that call connect().
2013 		 * ip_set_destination_v6() passes these preferences
2014 		 * to the ipif_select_source_v6() function.
2015 		 */
2016 		mutex_enter(&connp->conn_lock);
2017 		error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
2018 		mutex_exit(&connp->conn_lock);
2019 		if (error != 0) {
2020 			return (error);
2021 		}
2022 		break;
2023 	case IPV6_V6ONLY:
2024 		mutex_enter(&connp->conn_lock);
2025 		connp->conn_ipv6_v6only = onoff;
2026 		mutex_exit(&connp->conn_lock);
2027 		break;
2028 	}
2029 	return (0);
2030 }
2031 
2032 /* Handle IPPROTO_UDP */
2033 /* ARGSUSED1 */
2034 static int
2035 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2037 {
2038 	conn_t		*connp = coa->coa_connp;
2039 	int		*i1 = (int *)invalp;
2040 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2041 	int		error;
2042 
2043 	switch (name) {
2044 	case UDP_ANONPRIVBIND:
2045 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2046 			return (error);
2047 		}
2048 		break;
2049 	}
2050 	if (checkonly)
2051 		return (0);
2052 
2053 	/* Here we set the actual option value */
2054 	mutex_enter(&connp->conn_lock);
2055 	switch (name) {
2056 	case UDP_ANONPRIVBIND:
2057 		connp->conn_anon_priv_bind = onoff;
2058 		break;
2059 	case UDP_EXCLBIND:
2060 		connp->conn_exclbind = onoff;
2061 		break;
2062 	}
2063 	mutex_exit(&connp->conn_lock);
2064 	return (0);
2065 }
2066 
2067 /* Handle IPPROTO_TCP */
2068 /* ARGSUSED1 */
2069 static int
2070 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2071     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2072 {
2073 	conn_t		*connp = coa->coa_connp;
2074 	int		*i1 = (int *)invalp;
2075 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2076 	int		error;
2077 
2078 	switch (name) {
2079 	case TCP_ANONPRIVBIND:
2080 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2081 			return (error);
2082 		}
2083 		break;
2084 	}
2085 	if (checkonly)
2086 		return (0);
2087 
2088 	/* Here we set the actual option value */
2089 	mutex_enter(&connp->conn_lock);
2090 	switch (name) {
2091 	case TCP_ANONPRIVBIND:
2092 		connp->conn_anon_priv_bind = onoff;
2093 		break;
2094 	case TCP_EXCLBIND:
2095 		connp->conn_exclbind = onoff;
2096 		break;
2097 	case TCP_RECVDSTADDR:
2098 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2099 		break;
2100 	}
2101 	mutex_exit(&connp->conn_lock);
2102 	return (0);
2103 }
2104 
2105 int
2106 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2107 {
2108 	sin_t		*sin;
2109 	sin6_t		*sin6;
2110 
2111 	if (connp->conn_family == AF_INET) {
2112 		if (*salenp < sizeof (sin_t))
2113 			return (EINVAL);
2114 
2115 		*salenp = sizeof (sin_t);
2116 		/* Fill zeroes and then initialize non-zero fields */
2117 		sin = (sin_t *)sa;
2118 		*sin = sin_null;
2119 		sin->sin_family = AF_INET;
2120 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2121 		    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2122 			sin->sin_addr.s_addr = connp->conn_saddr_v4;
2123 		} else {
2124 			/*
2125 			 * INADDR_ANY
2126 			 * conn_saddr is not set, we might be bound to
2127 			 * broadcast/multicast. Use conn_bound_addr as
2128 			 * local address instead (that could
2129 			 * also still be INADDR_ANY)
2130 			 */
2131 			sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2132 		}
2133 		sin->sin_port = connp->conn_lport;
2134 	} else {
2135 		if (*salenp < sizeof (sin6_t))
2136 			return (EINVAL);
2137 
2138 		*salenp = sizeof (sin6_t);
2139 		/* Fill zeroes and then initialize non-zero fields */
2140 		sin6 = (sin6_t *)sa;
2141 		*sin6 = sin6_null;
2142 		sin6->sin6_family = AF_INET6;
2143 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2144 			sin6->sin6_addr = connp->conn_saddr_v6;
2145 		} else {
2146 			/*
2147 			 * conn_saddr is not set, we might be bound to
2148 			 * broadcast/multicast. Use conn_bound_addr as
2149 			 * local address instead (which could
2150 			 * also still be unspecified)
2151 			 */
2152 			sin6->sin6_addr = connp->conn_bound_addr_v6;
2153 		}
2154 		sin6->sin6_port = connp->conn_lport;
2155 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2156 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2157 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2158 	}
2159 	return (0);
2160 }
2161 
2162 int
2163 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2164 {
2165 	struct sockaddr_in	*sin;
2166 	struct sockaddr_in6	*sin6;
2167 
2168 	if (connp->conn_family == AF_INET) {
2169 		if (*salenp < sizeof (sin_t))
2170 			return (EINVAL);
2171 
2172 		*salenp = sizeof (sin_t);
2173 		/* initialize */
2174 		sin = (sin_t *)sa;
2175 		*sin = sin_null;
2176 		sin->sin_family = AF_INET;
2177 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
2178 		sin->sin_port = connp->conn_fport;
2179 	} else {
2180 		if (*salenp < sizeof (sin6_t))
2181 			return (EINVAL);
2182 
2183 		*salenp = sizeof (sin6_t);
2184 		/* initialize */
2185 		sin6 = (sin6_t *)sa;
2186 		*sin6 = sin6_null;
2187 		sin6->sin6_family = AF_INET6;
2188 		sin6->sin6_addr = connp->conn_faddr_v6;
2189 		sin6->sin6_port =  connp->conn_fport;
2190 		sin6->sin6_flowinfo = connp->conn_flowinfo;
2191 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2192 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2193 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2194 	}
2195 	return (0);
2196 }
2197 
2198 static uint32_t	cksum_massage_options_v4(ipha_t *, netstack_t *);
2199 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2200 
2201 /*
2202  * Allocate and fill in conn_ht_iphc based on the current information
2203  * in the conn.
2204  * Normally used when we bind() and connect().
2205  * Returns failure if can't allocate memory, or if there is a problem
2206  * with a routing header/option.
2207  *
2208  * We allocate space for the transport header (ulp_hdr_len + extra) and
2209  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2210  * The extra is there for transports that want some spare room for future
2211  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2212  * excludes the extra part.
2213  *
2214  * We massage an routing option/header and store the ckecksum difference
2215  * in conn_sum.
2216  *
2217  * Caller needs to update conn_wroff if desired.
2218  */
2219 int
2220 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2221     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2222 {
2223 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2224 	ip_pkt_t	*ipp = &connp->conn_xmit_ipp;
2225 	uint_t		ip_hdr_length;
2226 	uchar_t		*hdrs;
2227 	uint_t		hdrs_len;
2228 
2229 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2230 
2231 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2233 		/* In case of TX label and IP options it can be too much */
2234 		if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2235 			/* Preserves existing TX errno for this */
2236 			return (EHOSTUNREACH);
2237 		}
2238 	} else {
2239 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2240 	}
2241 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2242 	hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2243 	ASSERT(hdrs_len != 0);
2244 
2245 	if (hdrs_len != connp->conn_ht_iphc_allocated) {
2246 		/* Allocate new before we free any old */
2247 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2248 		if (hdrs == NULL)
2249 			return (ENOMEM);
2250 
2251 		if (connp->conn_ht_iphc != NULL) {
2252 			kmem_free(connp->conn_ht_iphc,
2253 			    connp->conn_ht_iphc_allocated);
2254 		}
2255 		connp->conn_ht_iphc = hdrs;
2256 		connp->conn_ht_iphc_allocated = hdrs_len;
2257 	} else {
2258 		hdrs = connp->conn_ht_iphc;
2259 	}
2260 	hdrs_len -= extra;
2261 	connp->conn_ht_iphc_len = hdrs_len;
2262 
2263 	connp->conn_ht_ulp = hdrs + ip_hdr_length;
2264 	connp->conn_ht_ulp_len = ulp_hdr_length;
2265 
2266 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2267 		ipha_t	*ipha = (ipha_t *)hdrs;
2268 
2269 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2270 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2271 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2272 		ipha->ipha_length = htons(hdrs_len);
2273 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2274 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2275 		else
2276 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2277 
2278 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2279 			connp->conn_sum = cksum_massage_options_v4(ipha,
2280 			    connp->conn_netstack);
2281 		} else {
2282 			connp->conn_sum = 0;
2283 		}
2284 	} else {
2285 		ip6_t	*ip6h = (ip6_t *)hdrs;
2286 
2287 		ip6h->ip6_src = *v6src;
2288 		ip6h->ip6_dst = *v6dst;
2289 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2290 		    flowinfo);
2291 		ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2292 
2293 		if (ipp->ipp_fields & IPPF_RTHDR) {
2294 			connp->conn_sum = cksum_massage_options_v6(ip6h,
2295 			    ip_hdr_length, connp->conn_netstack);
2296 
2297 			/*
2298 			 * Verify that the first hop isn't a mapped address.
2299 			 * Routers along the path need to do this verification
2300 			 * for subsequent hops.
2301 			 */
2302 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2303 				return (EADDRNOTAVAIL);
2304 
2305 		} else {
2306 			connp->conn_sum = 0;
2307 		}
2308 	}
2309 	return (0);
2310 }
2311 
2312 /*
2313  * Prepend a header template to data_mp based on the ip_pkt_t
2314  * and the passed in source, destination and protocol.
2315  *
2316  * Returns failure if can't allocate memory, in which case data_mp is freed.
2317  * We allocate space for the transport header (ulp_hdr_len) and
2318  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2319  *
2320  * We massage an routing option/header and return the ckecksum difference
2321  * in *sump. This is in host byte order.
2322  *
2323  * Caller needs to update conn_wroff if desired.
2324  */
2325 mblk_t *
2326 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2327     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2328     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2329     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2330 {
2331 	uint_t		ip_hdr_length;
2332 	uchar_t		*hdrs;
2333 	uint_t		hdrs_len;
2334 	mblk_t		*mp;
2335 
2336 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2337 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2338 		ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2339 	} else {
2340 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2341 	}
2342 	hdrs_len = ip_hdr_length + ulp_hdr_length;
2343 	ASSERT(hdrs_len != 0);
2344 
2345 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2346 
2347 	/* Can we prepend to data_mp? */
2348 	if (data_mp != NULL &&
2349 	    data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2350 	    data_mp->b_datap->db_ref == 1) {
2351 		hdrs = data_mp->b_rptr - hdrs_len;
2352 		data_mp->b_rptr = hdrs;
2353 		mp = data_mp;
2354 	} else {
2355 		mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2356 		if (mp == NULL) {
2357 			freemsg(data_mp);
2358 			*errorp = ENOMEM;
2359 			return (NULL);
2360 		}
2361 		mp->b_wptr = mp->b_datap->db_lim;
2362 		hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2363 		mp->b_cont = data_mp;
2364 	}
2365 
2366 	/*
2367 	 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2368 	 * if PKTINFO (aka IPPF_ADDR) was set.
2369 	 */
2370 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2371 		ipha_t *ipha = (ipha_t *)hdrs;
2372 
2373 		ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2374 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2375 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2376 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2377 		ipha->ipha_length = htons(hdrs_len + data_length);
2378 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2379 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2380 		else
2381 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2382 
2383 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2384 			*sump = cksum_massage_options_v4(ipha,
2385 			    ixa->ixa_ipst->ips_netstack);
2386 		} else {
2387 			*sump = 0;
2388 		}
2389 	} else {
2390 		ip6_t *ip6h = (ip6_t *)hdrs;
2391 
2392 		ip6h->ip6_src = *v6src;
2393 		ip6h->ip6_dst = *v6dst;
2394 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2395 		ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2396 
2397 		if (ipp->ipp_fields & IPPF_RTHDR) {
2398 			*sump = cksum_massage_options_v6(ip6h,
2399 			    ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2400 
2401 			/*
2402 			 * Verify that the first hop isn't a mapped address.
2403 			 * Routers along the path need to do this verification
2404 			 * for subsequent hops.
2405 			 */
2406 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2407 				*errorp = EADDRNOTAVAIL;
2408 				freemsg(mp);
2409 				return (NULL);
2410 			}
2411 		} else {
2412 			*sump = 0;
2413 		}
2414 	}
2415 	return (mp);
2416 }
2417 
2418 /*
2419  * Massage a source route if any putting the first hop
2420  * in ipha_dst. Compute a starting value for the checksum which
2421  * takes into account that the original ipha_dst should be
2422  * included in the checksum but that IP will include the
2423  * first hop from the source route in the tcp checksum.
2424  */
2425 static uint32_t
2426 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2427 {
2428 	in_addr_t	dst;
2429 	uint32_t	cksum;
2430 
2431 	/* Get last hop then diff against first hop */
2432 	cksum = ip_massage_options(ipha, ns);
2433 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2434 	dst = ipha->ipha_dst;
2435 	cksum -= ((dst >> 16) + (dst & 0xffff));
2436 	if ((int)cksum < 0)
2437 		cksum--;
2438 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2439 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2440 	ASSERT(cksum < 0x10000);
2441 	return (ntohs(cksum));
2442 }
2443 
2444 static uint32_t
2445 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2446 {
2447 	uint8_t		*end;
2448 	ip6_rthdr_t	*rth;
2449 	uint32_t	cksum;
2450 
2451 	end = (uint8_t *)ip6h + ip_hdr_len;
2452 	rth = ip_find_rthdr_v6(ip6h, end);
2453 	if (rth == NULL)
2454 		return (0);
2455 
2456 	cksum = ip_massage_options_v6(ip6h, rth, ns);
2457 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2458 	ASSERT(cksum < 0x10000);
2459 	return (ntohs(cksum));
2460 }
2461 
2462 /*
2463  * ULPs that change the destination address need to call this for each
2464  * change to discard any state about a previous destination that might
2465  * have been multicast or multirt.
2466  */
2467 void
2468 ip_attr_newdst(ip_xmit_attr_t *ixa)
2469 {
2470 	ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2471 	    IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2472 	    IXAF_NO_LOOP_ZONEID_SET);
2473 }
2474 
2475 /*
2476  * Determine the nexthop which will be used.
2477  * Normally this is just the destination, but if a IPv4 source route, or
2478  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2479  * there.
2480  */
2481 void
2482 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2483     const in6_addr_t *dst, in6_addr_t *nexthop)
2484 {
2485 	if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2486 		*nexthop = *dst;
2487 		return;
2488 	}
2489 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2490 		ipaddr_t v4dst;
2491 		ipaddr_t v4nexthop;
2492 
2493 		IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2494 		v4nexthop = ip_pkt_source_route_v4(ipp);
2495 		if (v4nexthop == INADDR_ANY)
2496 			v4nexthop = v4dst;
2497 
2498 		IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2499 	} else {
2500 		const in6_addr_t *v6nexthop;
2501 
2502 		v6nexthop = ip_pkt_source_route_v6(ipp);
2503 		if (v6nexthop == NULL)
2504 			v6nexthop = dst;
2505 
2506 		*nexthop = *v6nexthop;
2507 	}
2508 }
2509 
2510 /*
2511  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2512  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2513  * case (connected latching is done in conn_connect).
2514  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2515  * set, but doesn't otherwise use the conn_t.
2516  *
2517  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2518  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2519  *
2520  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2521  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2522  *
2523  * Updates laddrp and uinfo if they are non-NULL.
2524  *
2525  * TSOL notes: The callers if ip_attr_connect must check if the destination
2526  * is different than before and in that case redo conn_update_label.
2527  * The callers of conn_connect do not need that since conn_connect
2528  * performs the conn_update_label.
2529  */
2530 int
2531 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2532     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2533     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2534     iulp_t *uinfo, uint32_t flags)
2535 {
2536 	in6_addr_t		laddr = *v6src;
2537 	int			error;
2538 
2539 	ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2540 
2541 	if (connp->conn_zone_is_global)
2542 		flags |= IPDF_ZONE_IS_GLOBAL;
2543 	else
2544 		flags &= ~IPDF_ZONE_IS_GLOBAL;
2545 
2546 	/*
2547 	 * Lookup the route to determine a source address and the uinfo.
2548 	 * If the ULP has a source route option then the caller will
2549 	 * have set v6nexthop to be the first hop.
2550 	 */
2551 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2552 		ipaddr_t v4dst;
2553 		ipaddr_t v4src, v4nexthop;
2554 
2555 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2556 		IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2557 		IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2558 
2559 		if (connp->conn_unspec_src || v4src != INADDR_ANY)
2560 			flags &= ~IPDF_SELECT_SRC;
2561 		else
2562 			flags |= IPDF_SELECT_SRC;
2563 
2564 		error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2565 		    uinfo, flags, connp->conn_mac_mode);
2566 		IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2567 	} else {
2568 		if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2569 			flags &= ~IPDF_SELECT_SRC;
2570 		else
2571 			flags |= IPDF_SELECT_SRC;
2572 
2573 		error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2574 		    uinfo, flags, connp->conn_mac_mode);
2575 	}
2576 	/* Pass out some address even if we hit a RTF_REJECT etc */
2577 	if (laddrp != NULL)
2578 		*laddrp = laddr;
2579 
2580 	if (error != 0)
2581 		return (error);
2582 
2583 	if (flags & IPDF_IPSEC) {
2584 		/*
2585 		 * Set any IPsec policy in ixa. Routine also looks at ULP
2586 		 * ports.
2587 		 */
2588 		ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2589 	}
2590 	return (0);
2591 }
2592 
2593 /*
2594  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2595  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2596  * usable for SCTP, since SCTP has multiple faddrs.
2597  *
2598  * Caller must hold conn_lock to provide atomic constency between the
2599  * conn_t's addresses and the ixa.
2600  * NOTE: this function drops and reaquires conn_lock since it can't be
2601  * held across ip_attr_connect/ip_set_destination.
2602  *
2603  * The caller needs to handle inserting in the receive-side fanout when
2604  * appropriate after conn_connect returns.
2605  */
2606 int
2607 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2608 {
2609 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2610 	in6_addr_t	nexthop;
2611 	in6_addr_t	saddr, faddr;
2612 	in_port_t	fport;
2613 	int		error;
2614 
2615 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2616 
2617 	if (connp->conn_ipversion == IPV4_VERSION)
2618 		ixa->ixa_flags |= IXAF_IS_IPV4;
2619 	else
2620 		ixa->ixa_flags &= ~IXAF_IS_IPV4;
2621 
2622 	/* We do IPsec latching below - hence no caching in ip_attr_connect */
2623 	flags &= ~IPDF_IPSEC;
2624 
2625 	/* In case we had previously done an ip_attr_connect */
2626 	ip_attr_newdst(ixa);
2627 
2628 	/*
2629 	 * Determine the nexthop and copy the addresses before dropping
2630 	 * conn_lock.
2631 	 */
2632 	ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2633 	    &connp->conn_faddr_v6, &nexthop);
2634 	saddr = connp->conn_saddr_v6;
2635 	faddr = connp->conn_faddr_v6;
2636 	fport = connp->conn_fport;
2637 
2638 	mutex_exit(&connp->conn_lock);
2639 	error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2640 	    &saddr, uinfo, flags | IPDF_VERIFY_DST);
2641 	mutex_enter(&connp->conn_lock);
2642 
2643 	/* Could have changed even if an error */
2644 	connp->conn_saddr_v6 = saddr;
2645 	if (error != 0)
2646 		return (error);
2647 
2648 	/*
2649 	 * Check whether Trusted Solaris policy allows communication with this
2650 	 * host, and pretend that the destination is unreachable if not.
2651 	 * Compute any needed label and place it in ipp_label_v4/v6.
2652 	 *
2653 	 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2654 	 * the packet.
2655 	 *
2656 	 * TSOL Note: Any concurrent threads would pick a different ixa
2657 	 * (and ipp if they are to change the ipp)  so we
2658 	 * don't have to worry about concurrent threads.
2659 	 */
2660 	if (is_system_labeled()) {
2661 		if (connp->conn_mlp_type != mlptSingle)
2662 			return (ECONNREFUSED);
2663 
2664 		/*
2665 		 * conn_update_label will set ipp_label* which will later
2666 		 * be used by conn_build_hdr_template.
2667 		 */
2668 		error = conn_update_label(connp, ixa,
2669 		    &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2670 		if (error != 0)
2671 			return (error);
2672 	}
2673 
2674 	/*
2675 	 * Ensure that we match on the selected local address.
2676 	 * This overrides conn_laddr in the case we had earlier bound to a
2677 	 * multicast or broadcast address.
2678 	 */
2679 	connp->conn_laddr_v6 = connp->conn_saddr_v6;
2680 
2681 	/*
2682 	 * Allow setting new policies.
2683 	 * The addresses/ports are already set, thus the IPsec policy calls
2684 	 * can handle their passed-in conn's.
2685 	 */
2686 	connp->conn_policy_cached = B_FALSE;
2687 
2688 	/*
2689 	 * Cache IPsec policy in this conn.  If we have per-socket policy,
2690 	 * we'll cache that.  If we don't, we'll inherit global policy.
2691 	 *
2692 	 * This is done before the caller inserts in the receive-side fanout.
2693 	 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2694 	 * for connections where we don't have a policy. This is to prevent
2695 	 * global policy lookups in the inbound path.
2696 	 *
2697 	 * If we insert before we set conn_policy_cached,
2698 	 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2699 	 * because global policy cound be non-empty. We normally call
2700 	 * ipsec_check_policy() for conn_policy_cached connections only if
2701 	 * conn_in_enforce_policy is set. But in this case,
2702 	 * conn_policy_cached can get set anytime since we made the
2703 	 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2704 	 * called, which will make the above assumption false.  Thus, we
2705 	 * need to insert after we set conn_policy_cached.
2706 	 */
2707 	error = ipsec_conn_cache_policy(connp,
2708 	    connp->conn_ipversion == IPV4_VERSION);
2709 	if (error != 0)
2710 		return (error);
2711 
2712 	/*
2713 	 * We defer to do LSO check until here since now we have better idea
2714 	 * whether IPsec is present. If the underlying ill is LSO capable,
2715 	 * copy its capability in so the ULP can decide whether to enable LSO
2716 	 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2717 	 * claim LSO for IPv6.
2718 	 *
2719 	 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2720 	 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2721 	 */
2722 	ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2723 
2724 	ASSERT(ixa->ixa_ire != NULL);
2725 	if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2726 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2727 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2728 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2729 	    (ixa->ixa_nce != NULL) &&
2730 	    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2731 	    ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2732 	    ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2733 		ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2734 		ixa->ixa_flags |= IXAF_LSO_CAPAB;
2735 	}
2736 
2737 	/* Check whether ZEROCOPY capability is usable for this connection. */
2738 	ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2739 
2740 	if ((flags & IPDF_ZCOPY) &&
2741 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2742 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2743 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2744 	    (ixa->ixa_nce != NULL) &&
2745 	    ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2746 		ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2747 	}
2748 	return (0);
2749 }
2750 
2751 /*
2752  * Predicates to check if the addresses match conn_last*
2753  */
2754 
2755 /*
2756  * Compare the conn against an address.
2757  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2758  */
2759 boolean_t
2760 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2761 {
2762 	ASSERT(connp->conn_family == AF_INET);
2763 	return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2764 	    sin->sin_port == connp->conn_lastdstport);
2765 }
2766 
2767 /*
2768  * Compare, including for mapped addresses
2769  */
2770 boolean_t
2771 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2772 {
2773 	return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2774 	    sin6->sin6_port == connp->conn_lastdstport &&
2775 	    sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2776 	    sin6->sin6_scope_id == connp->conn_lastscopeid);
2777 }
2778 
2779 /*
2780  * Compute a label and place it in the ip_packet_t.
2781  * Handles IPv4 and IPv6.
2782  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2783  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2784  * has been called.
2785  */
2786 int
2787 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2788     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2789 {
2790 	int		err;
2791 	ipaddr_t	v4dst;
2792 
2793 	if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2794 		uchar_t		opt_storage[IP_MAX_OPT_LENGTH];
2795 
2796 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2797 
2798 		err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2799 		    v4dst, opt_storage, ixa->ixa_ipst);
2800 		if (err == 0) {
2801 			/* Length contained in opt_storage[IPOPT_OLEN] */
2802 			err = optcom_pkt_set(opt_storage,
2803 			    opt_storage[IPOPT_OLEN],
2804 			    (uchar_t **)&ipp->ipp_label_v4,
2805 			    &ipp->ipp_label_len_v4);
2806 		}
2807 		if (err != 0) {
2808 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2809 			    char *, "conn(1) failed to update options(2) "
2810 			    "on ixa(3)",
2811 			    conn_t *, connp, char *, opt_storage,
2812 			    ip_xmit_attr_t *, ixa);
2813 		}
2814 		if (ipp->ipp_label_len_v4 != 0)
2815 			ipp->ipp_fields |= IPPF_LABEL_V4;
2816 		else
2817 			ipp->ipp_fields &= ~IPPF_LABEL_V4;
2818 	} else {
2819 		uchar_t		opt_storage[TSOL_MAX_IPV6_OPTION];
2820 		uint_t		optlen;
2821 
2822 		err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2823 		    v6dst, opt_storage, ixa->ixa_ipst);
2824 		if (err == 0) {
2825 			/*
2826 			 * Note that ipp_label_v6 is just the option - not
2827 			 * the hopopts extension header.
2828 			 *
2829 			 * Length contained in opt_storage[IPOPT_OLEN], but
2830 			 * that doesn't include the two byte options header.
2831 			 */
2832 			optlen = opt_storage[IPOPT_OLEN];
2833 			if (optlen != 0)
2834 				optlen += 2;
2835 
2836 			err = optcom_pkt_set(opt_storage, optlen,
2837 			    (uchar_t **)&ipp->ipp_label_v6,
2838 			    &ipp->ipp_label_len_v6);
2839 		}
2840 		if (err != 0) {
2841 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2842 			    char *, "conn(1) failed to update options(2) "
2843 			    "on ixa(3)",
2844 			    conn_t *, connp, char *, opt_storage,
2845 			    ip_xmit_attr_t *, ixa);
2846 		}
2847 		if (ipp->ipp_label_len_v6 != 0)
2848 			ipp->ipp_fields |= IPPF_LABEL_V6;
2849 		else
2850 			ipp->ipp_fields &= ~IPPF_LABEL_V6;
2851 	}
2852 	return (err);
2853 }
2854 
2855 /*
2856  * Inherit all options settings from the parent/listener to the eager.
2857  * Returns zero on success; ENOMEM if memory allocation failed.
2858  *
2859  * We assume that the eager has not had any work done i.e., the conn_ixa
2860  * and conn_xmit_ipp are all zero.
2861  * Furthermore we assume that no other thread can access the eager (because
2862  * it isn't inserted in any fanout list).
2863  */
2864 int
2865 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2866 {
2867 	cred_t	*credp;
2868 	int	err;
2869 	void	*notify_cookie;
2870 	uint32_t xmit_hint;
2871 
2872 	econnp->conn_family = lconnp->conn_family;
2873 	econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2874 	econnp->conn_wq = lconnp->conn_wq;
2875 	econnp->conn_rq = lconnp->conn_rq;
2876 
2877 	/*
2878 	 * Make a safe copy of the transmit attributes.
2879 	 * conn_connect will later be used by the caller to setup the ire etc.
2880 	 */
2881 	ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2882 	ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2883 	ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2884 	ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2885 
2886 	/* Preserve ixa_notify_cookie and xmit_hint */
2887 	notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2888 	xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2889 	ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2890 	econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2891 	econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2892 
2893 	econnp->conn_bound_if = lconnp->conn_bound_if;
2894 	econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2895 
2896 	/* Inherit all RECV options */
2897 	econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2898 
2899 	err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2900 	    KM_NOSLEEP);
2901 	if (err != 0)
2902 		return (err);
2903 
2904 	econnp->conn_zoneid = lconnp->conn_zoneid;
2905 	econnp->conn_allzones = lconnp->conn_allzones;
2906 
2907 	/* This is odd. Pick a flowlabel for each connection instead? */
2908 	econnp->conn_flowinfo = lconnp->conn_flowinfo;
2909 
2910 	econnp->conn_default_ttl = lconnp->conn_default_ttl;
2911 
2912 	/*
2913 	 * TSOL: tsol_input_proc() needs the eager's cred before the
2914 	 * eager is accepted
2915 	 */
2916 	ASSERT(lconnp->conn_cred != NULL);
2917 	econnp->conn_cred = credp = lconnp->conn_cred;
2918 	crhold(credp);
2919 	econnp->conn_cpid = lconnp->conn_cpid;
2920 	econnp->conn_open_time = ddi_get_lbolt64();
2921 
2922 	/*
2923 	 * Cache things in the ixa without any refhold.
2924 	 * Listener might not have set up ixa_cred
2925 	 */
2926 	ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2927 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2928 	econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2929 	if (is_system_labeled())
2930 		econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2931 
2932 	/*
2933 	 * If the caller has the process-wide flag set, then default to MAC
2934 	 * exempt mode.  This allows read-down to unlabeled hosts.
2935 	 */
2936 	if (getpflags(NET_MAC_AWARE, credp) != 0)
2937 		econnp->conn_mac_mode = CONN_MAC_AWARE;
2938 
2939 	econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2940 
2941 	/*
2942 	 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2943 	 * via soaccept()->soinheritoptions() which essentially applies
2944 	 * all the listener options to the new connection. The options that we
2945 	 * need to take care of are:
2946 	 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2947 	 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2948 	 * SO_SNDBUF, SO_RCVBUF.
2949 	 *
2950 	 * SO_RCVBUF:	conn_rcvbuf is set.
2951 	 * SO_SNDBUF:	conn_sndbuf is set.
2952 	 */
2953 
2954 	/* Could we define a struct and use a struct copy for this? */
2955 	econnp->conn_sndbuf = lconnp->conn_sndbuf;
2956 	econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2957 	econnp->conn_sndlowat = lconnp->conn_sndlowat;
2958 	econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2959 	econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2960 	econnp->conn_oobinline = lconnp->conn_oobinline;
2961 	econnp->conn_debug = lconnp->conn_debug;
2962 	econnp->conn_keepalive = lconnp->conn_keepalive;
2963 	econnp->conn_linger = lconnp->conn_linger;
2964 	econnp->conn_lingertime = lconnp->conn_lingertime;
2965 
2966 	/* Set the IP options */
2967 	econnp->conn_broadcast = lconnp->conn_broadcast;
2968 	econnp->conn_useloopback = lconnp->conn_useloopback;
2969 	econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2970 	return (0);
2971 }
2972