xref: /titanic_52/usr/src/uts/common/inet/ip/conn_opt.c (revision 2fb4439d628ad2df0775287be1abd1ed95e7d267)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #define	_SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/xti_inet.h>
33 #include <sys/ucred.h>
34 #include <sys/zone.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/debug.h>
39 #include <sys/atomic.h>
40 #include <sys/policy.h>
41 
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/kmem.h>
45 #include <sys/sdt.h>
46 #include <sys/socket.h>
47 #include <sys/ethernet.h>
48 #include <sys/mac.h>
49 #include <net/if.h>
50 #include <net/if_types.h>
51 #include <net/if_arp.h>
52 #include <net/route.h>
53 #include <sys/sockio.h>
54 #include <netinet/in.h>
55 #include <net/if_dl.h>
56 
57 #include <inet/common.h>
58 #include <inet/mi.h>
59 #include <inet/mib2.h>
60 #include <inet/nd.h>
61 #include <inet/arp.h>
62 #include <inet/snmpcom.h>
63 #include <inet/kstatcom.h>
64 
65 #include <netinet/igmp_var.h>
66 #include <netinet/ip6.h>
67 #include <netinet/icmp6.h>
68 #include <netinet/sctp.h>
69 
70 #include <inet/ip.h>
71 #include <inet/ip_impl.h>
72 #include <inet/ip6.h>
73 #include <inet/ip6_asp.h>
74 #include <inet/tcp.h>
75 #include <inet/ip_multi.h>
76 #include <inet/ip_if.h>
77 #include <inet/ip_ire.h>
78 #include <inet/ip_ftable.h>
79 #include <inet/ip_rts.h>
80 #include <inet/optcom.h>
81 #include <inet/ip_ndp.h>
82 #include <inet/ip_listutils.h>
83 #include <netinet/igmp.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/udp.h>
86 #include <inet/ipp_common.h>
87 
88 #include <net/pfkeyv2.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsec_impl.h>
91 #include <inet/ipdrop.h>
92 #include <inet/ip_netinfo.h>
93 
94 #include <inet/ipclassifier.h>
95 #include <inet/sctp_ip.h>
96 #include <inet/sctp/sctp_impl.h>
97 #include <inet/udp_impl.h>
98 #include <sys/sunddi.h>
99 
100 #include <sys/tsol/label.h>
101 #include <sys/tsol/tnet.h>
102 
103 /*
104  * Return how much size is needed for the different ancillary data items
105  */
106 uint_t
107 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
108     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
109 {
110 	uint_t		ancil_size;
111 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
112 
113 	/*
114 	 * If IP_RECVDSTADDR is set we include the destination IP
115 	 * address as an option. With IP_RECVOPTS we include all
116 	 * the IP options.
117 	 */
118 	ancil_size = 0;
119 	if (recv_ancillary.crb_recvdstaddr &&
120 	    (ira->ira_flags & IRAF_IS_IPV4)) {
121 		ancil_size += sizeof (struct T_opthdr) +
122 		    sizeof (struct in_addr);
123 		IP_STAT(ipst, conn_in_recvdstaddr);
124 	}
125 
126 	/*
127 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
128 	 * are different
129 	 */
130 	if (recv_ancillary.crb_ip_recvpktinfo &&
131 	    connp->conn_family == AF_INET) {
132 		ancil_size += sizeof (struct T_opthdr) +
133 		    sizeof (struct in_pktinfo);
134 		IP_STAT(ipst, conn_in_recvpktinfo);
135 	}
136 
137 	if ((recv_ancillary.crb_recvopts) &&
138 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
139 		ancil_size += sizeof (struct T_opthdr) +
140 		    ipp->ipp_ipv4_options_len;
141 		IP_STAT(ipst, conn_in_recvopts);
142 	}
143 
144 	if (recv_ancillary.crb_recvslla) {
145 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
146 		ill_t *ill;
147 
148 		/* Make sure ira_l2src is setup if not already */
149 		if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
150 			ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
151 			    ipst);
152 			if (ill != NULL) {
153 				ip_setl2src(mp, ira, ill);
154 				ill_refrele(ill);
155 			}
156 		}
157 		ancil_size += sizeof (struct T_opthdr) +
158 		    sizeof (struct sockaddr_dl);
159 		IP_STAT(ipst, conn_in_recvslla);
160 	}
161 
162 	if (recv_ancillary.crb_recvif) {
163 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
164 		IP_STAT(ipst, conn_in_recvif);
165 	}
166 
167 	/*
168 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
169 	 * are different
170 	 */
171 	if (recv_ancillary.crb_ip_recvpktinfo &&
172 	    connp->conn_family == AF_INET6) {
173 		ancil_size += sizeof (struct T_opthdr) +
174 		    sizeof (struct in6_pktinfo);
175 		IP_STAT(ipst, conn_in_recvpktinfo);
176 	}
177 
178 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
179 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
180 		IP_STAT(ipst, conn_in_recvhoplimit);
181 	}
182 
183 	if (recv_ancillary.crb_ipv6_recvtclass) {
184 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
185 		IP_STAT(ipst, conn_in_recvtclass);
186 	}
187 
188 	if (recv_ancillary.crb_ipv6_recvhopopts &&
189 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
190 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
191 		IP_STAT(ipst, conn_in_recvhopopts);
192 	}
193 	/*
194 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
195 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
196 	 * options that appear before a routing header.
197 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
198 	 */
199 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
200 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
201 		    (recv_ancillary.crb_ipv6_recvdstopts &&
202 		    recv_ancillary.crb_ipv6_recvrthdr)) {
203 			ancil_size += sizeof (struct T_opthdr) +
204 			    ipp->ipp_rthdrdstoptslen;
205 			IP_STAT(ipst, conn_in_recvrthdrdstopts);
206 		}
207 	}
208 	if ((recv_ancillary.crb_ipv6_recvrthdr) &&
209 	    (ipp->ipp_fields & IPPF_RTHDR)) {
210 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
211 		IP_STAT(ipst, conn_in_recvrthdr);
212 	}
213 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
214 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
215 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
216 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
217 		IP_STAT(ipst, conn_in_recvdstopts);
218 	}
219 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
220 		ancil_size += sizeof (struct T_opthdr) +
221 		    ucredminsize(ira->ira_cred);
222 		IP_STAT(ipst, conn_in_recvucred);
223 	}
224 
225 	/*
226 	 * If SO_TIMESTAMP is set allocate the appropriate sized
227 	 * buffer. Since gethrestime() expects a pointer aligned
228 	 * argument, we allocate space necessary for extra
229 	 * alignment (even though it might not be used).
230 	 */
231 	if (recv_ancillary.crb_timestamp) {
232 		ancil_size += sizeof (struct T_opthdr) +
233 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
234 		IP_STAT(ipst, conn_in_timestamp);
235 	}
236 
237 	/*
238 	 * If IP_RECVTTL is set allocate the appropriate sized buffer
239 	 */
240 	if (recv_ancillary.crb_recvttl &&
241 	    (ira->ira_flags & IRAF_IS_IPV4)) {
242 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
243 		IP_STAT(ipst, conn_in_recvttl);
244 	}
245 
246 	return (ancil_size);
247 }
248 
249 /*
250  * Lay down the ancillary data items at "ancil_buf".
251  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
252  * large buffer - ancil_size.
253  */
254 void
255 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
256     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
257 {
258 	/*
259 	 * Copy in destination address before options to avoid
260 	 * any padding issues.
261 	 */
262 	if (recv_ancillary.crb_recvdstaddr &&
263 	    (ira->ira_flags & IRAF_IS_IPV4)) {
264 		struct T_opthdr *toh;
265 		ipaddr_t *dstptr;
266 
267 		toh = (struct T_opthdr *)ancil_buf;
268 		toh->level = IPPROTO_IP;
269 		toh->name = IP_RECVDSTADDR;
270 		toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
271 		toh->status = 0;
272 		ancil_buf += sizeof (struct T_opthdr);
273 		dstptr = (ipaddr_t *)ancil_buf;
274 		*dstptr = ipp->ipp_addr_v4;
275 		ancil_buf += sizeof (ipaddr_t);
276 		ancil_size -= toh->len;
277 	}
278 
279 	/*
280 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
281 	 * are different
282 	 */
283 	if (recv_ancillary.crb_ip_recvpktinfo &&
284 	    connp->conn_family == AF_INET) {
285 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
286 		struct T_opthdr *toh;
287 		struct in_pktinfo *pktinfop;
288 		ill_t *ill;
289 		ipif_t *ipif;
290 
291 		toh = (struct T_opthdr *)ancil_buf;
292 		toh->level = IPPROTO_IP;
293 		toh->name = IP_PKTINFO;
294 		toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
295 		toh->status = 0;
296 		ancil_buf += sizeof (struct T_opthdr);
297 		pktinfop = (struct in_pktinfo *)ancil_buf;
298 
299 		pktinfop->ipi_ifindex = ira->ira_ruifindex;
300 		pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
301 
302 		/* Find a good address to report */
303 		ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
304 		if (ill != NULL) {
305 			ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
306 			if (ipif != NULL) {
307 				pktinfop->ipi_spec_dst.s_addr =
308 				    ipif->ipif_lcl_addr;
309 				ipif_refrele(ipif);
310 			}
311 			ill_refrele(ill);
312 		}
313 		pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
314 		ancil_buf += sizeof (struct in_pktinfo);
315 		ancil_size -= toh->len;
316 	}
317 
318 	if ((recv_ancillary.crb_recvopts) &&
319 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
320 		struct T_opthdr *toh;
321 
322 		toh = (struct T_opthdr *)ancil_buf;
323 		toh->level = IPPROTO_IP;
324 		toh->name = IP_RECVOPTS;
325 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
326 		toh->status = 0;
327 		ancil_buf += sizeof (struct T_opthdr);
328 		bcopy(ipp->ipp_ipv4_options, ancil_buf,
329 		    ipp->ipp_ipv4_options_len);
330 		ancil_buf += ipp->ipp_ipv4_options_len;
331 		ancil_size -= toh->len;
332 	}
333 
334 	if (recv_ancillary.crb_recvslla) {
335 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
336 		struct T_opthdr *toh;
337 		struct sockaddr_dl *dstptr;
338 		ill_t *ill;
339 		int alen = 0;
340 
341 		ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
342 		if (ill != NULL)
343 			alen = ill->ill_phys_addr_length;
344 
345 		/*
346 		 * For loopback multicast and broadcast the packet arrives
347 		 * with ira_ruifdex being the physical interface, but
348 		 * ira_l2src is all zero since ip_postfrag_loopback doesn't
349 		 * know our l2src. We don't report the address in that case.
350 		 */
351 		if (ira->ira_flags & IRAF_LOOPBACK)
352 			alen = 0;
353 
354 		toh = (struct T_opthdr *)ancil_buf;
355 		toh->level = IPPROTO_IP;
356 		toh->name = IP_RECVSLLA;
357 		toh->len = sizeof (struct T_opthdr) +
358 		    sizeof (struct sockaddr_dl);
359 		toh->status = 0;
360 		ancil_buf += sizeof (struct T_opthdr);
361 		dstptr = (struct sockaddr_dl *)ancil_buf;
362 		dstptr->sdl_family = AF_LINK;
363 		dstptr->sdl_index = ira->ira_ruifindex;
364 		if (ill != NULL)
365 			dstptr->sdl_type = ill->ill_type;
366 		else
367 			dstptr->sdl_type = 0;
368 		dstptr->sdl_nlen = 0;
369 		dstptr->sdl_alen = alen;
370 		dstptr->sdl_slen = 0;
371 		bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
372 		ancil_buf += sizeof (struct sockaddr_dl);
373 		ancil_size -= toh->len;
374 		if (ill != NULL)
375 			ill_refrele(ill);
376 	}
377 
378 	if (recv_ancillary.crb_recvif) {
379 		struct T_opthdr *toh;
380 		uint_t		*dstptr;
381 
382 		toh = (struct T_opthdr *)ancil_buf;
383 		toh->level = IPPROTO_IP;
384 		toh->name = IP_RECVIF;
385 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
386 		toh->status = 0;
387 		ancil_buf += sizeof (struct T_opthdr);
388 		dstptr = (uint_t *)ancil_buf;
389 		*dstptr = ira->ira_ruifindex;
390 		ancil_buf += sizeof (uint_t);
391 		ancil_size -= toh->len;
392 	}
393 
394 	/*
395 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
396 	 * are different
397 	 */
398 	if (recv_ancillary.crb_ip_recvpktinfo &&
399 	    connp->conn_family == AF_INET6) {
400 		struct T_opthdr *toh;
401 		struct in6_pktinfo *pkti;
402 
403 		toh = (struct T_opthdr *)ancil_buf;
404 		toh->level = IPPROTO_IPV6;
405 		toh->name = IPV6_PKTINFO;
406 		toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
407 		toh->status = 0;
408 		ancil_buf += sizeof (struct T_opthdr);
409 		pkti = (struct in6_pktinfo *)ancil_buf;
410 		if (ira->ira_flags & IRAF_IS_IPV4) {
411 			IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
412 			    &pkti->ipi6_addr);
413 		} else {
414 			pkti->ipi6_addr = ipp->ipp_addr;
415 		}
416 		pkti->ipi6_ifindex = ira->ira_ruifindex;
417 
418 		ancil_buf += sizeof (*pkti);
419 		ancil_size -= toh->len;
420 	}
421 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
422 		struct T_opthdr *toh;
423 
424 		toh = (struct T_opthdr *)ancil_buf;
425 		toh->level = IPPROTO_IPV6;
426 		toh->name = IPV6_HOPLIMIT;
427 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
428 		toh->status = 0;
429 		ancil_buf += sizeof (struct T_opthdr);
430 		*(uint_t *)ancil_buf = ipp->ipp_hoplimit;
431 		ancil_buf += sizeof (uint_t);
432 		ancil_size -= toh->len;
433 	}
434 	if (recv_ancillary.crb_ipv6_recvtclass) {
435 		struct T_opthdr *toh;
436 
437 		toh = (struct T_opthdr *)ancil_buf;
438 		toh->level = IPPROTO_IPV6;
439 		toh->name = IPV6_TCLASS;
440 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
441 		toh->status = 0;
442 		ancil_buf += sizeof (struct T_opthdr);
443 
444 		if (ira->ira_flags & IRAF_IS_IPV4)
445 			*(uint_t *)ancil_buf = ipp->ipp_type_of_service;
446 		else
447 			*(uint_t *)ancil_buf = ipp->ipp_tclass;
448 		ancil_buf += sizeof (uint_t);
449 		ancil_size -= toh->len;
450 	}
451 	if (recv_ancillary.crb_ipv6_recvhopopts &&
452 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
453 		struct T_opthdr *toh;
454 
455 		toh = (struct T_opthdr *)ancil_buf;
456 		toh->level = IPPROTO_IPV6;
457 		toh->name = IPV6_HOPOPTS;
458 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
459 		toh->status = 0;
460 		ancil_buf += sizeof (struct T_opthdr);
461 		bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
462 		ancil_buf += ipp->ipp_hopoptslen;
463 		ancil_size -= toh->len;
464 	}
465 	/*
466 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
467 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
468 	 * options that appear before a routing header.
469 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
470 	 */
471 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
472 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
473 		    (recv_ancillary.crb_ipv6_recvdstopts &&
474 		    recv_ancillary.crb_ipv6_recvrthdr)) {
475 			struct T_opthdr *toh;
476 
477 			toh = (struct T_opthdr *)ancil_buf;
478 			toh->level = IPPROTO_IPV6;
479 			toh->name = IPV6_DSTOPTS;
480 			toh->len = sizeof (struct T_opthdr) +
481 			    ipp->ipp_rthdrdstoptslen;
482 			toh->status = 0;
483 			ancil_buf += sizeof (struct T_opthdr);
484 			bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
485 			    ipp->ipp_rthdrdstoptslen);
486 			ancil_buf += ipp->ipp_rthdrdstoptslen;
487 			ancil_size -= toh->len;
488 		}
489 	}
490 	if (recv_ancillary.crb_ipv6_recvrthdr &&
491 	    (ipp->ipp_fields & IPPF_RTHDR)) {
492 		struct T_opthdr *toh;
493 
494 		toh = (struct T_opthdr *)ancil_buf;
495 		toh->level = IPPROTO_IPV6;
496 		toh->name = IPV6_RTHDR;
497 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
498 		toh->status = 0;
499 		ancil_buf += sizeof (struct T_opthdr);
500 		bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
501 		ancil_buf += ipp->ipp_rthdrlen;
502 		ancil_size -= toh->len;
503 	}
504 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
505 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
506 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
507 		struct T_opthdr *toh;
508 
509 		toh = (struct T_opthdr *)ancil_buf;
510 		toh->level = IPPROTO_IPV6;
511 		toh->name = IPV6_DSTOPTS;
512 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
513 		toh->status = 0;
514 		ancil_buf += sizeof (struct T_opthdr);
515 		bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
516 		ancil_buf += ipp->ipp_dstoptslen;
517 		ancil_size -= toh->len;
518 	}
519 
520 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
521 		struct T_opthdr *toh;
522 		cred_t		*rcr = connp->conn_cred;
523 
524 		toh = (struct T_opthdr *)ancil_buf;
525 		toh->level = SOL_SOCKET;
526 		toh->name = SCM_UCRED;
527 		toh->len = sizeof (struct T_opthdr) +
528 		    ucredminsize(ira->ira_cred);
529 		toh->status = 0;
530 		(void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
531 		ancil_buf += toh->len;
532 		ancil_size -= toh->len;
533 	}
534 	if (recv_ancillary.crb_timestamp) {
535 		struct	T_opthdr *toh;
536 
537 		toh = (struct T_opthdr *)ancil_buf;
538 		toh->level = SOL_SOCKET;
539 		toh->name = SCM_TIMESTAMP;
540 		toh->len = sizeof (struct T_opthdr) +
541 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
542 		toh->status = 0;
543 		ancil_buf += sizeof (struct T_opthdr);
544 		/* Align for gethrestime() */
545 		ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
546 		    sizeof (intptr_t));
547 		gethrestime((timestruc_t *)ancil_buf);
548 		ancil_buf = (uchar_t *)toh + toh->len;
549 		ancil_size -= toh->len;
550 	}
551 
552 	/*
553 	 * CAUTION:
554 	 * Due to aligment issues
555 	 * Processing of IP_RECVTTL option
556 	 * should always be the last. Adding
557 	 * any option processing after this will
558 	 * cause alignment panic.
559 	 */
560 	if (recv_ancillary.crb_recvttl &&
561 	    (ira->ira_flags & IRAF_IS_IPV4)) {
562 		struct	T_opthdr *toh;
563 		uint8_t	*dstptr;
564 
565 		toh = (struct T_opthdr *)ancil_buf;
566 		toh->level = IPPROTO_IP;
567 		toh->name = IP_RECVTTL;
568 		toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
569 		toh->status = 0;
570 		ancil_buf += sizeof (struct T_opthdr);
571 		dstptr = (uint8_t *)ancil_buf;
572 		*dstptr = ipp->ipp_hoplimit;
573 		ancil_buf += sizeof (uint8_t);
574 		ancil_size -= toh->len;
575 	}
576 
577 	/* Consumed all of allocated space */
578 	ASSERT(ancil_size == 0);
579 
580 }
581 
582 /*
583  * This routine retrieves the current status of socket options.
584  * It returns the size of the option retrieved, or -1.
585  */
586 int
587 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
588     uchar_t *ptr)
589 {
590 	int		*i1 = (int *)ptr;
591 	conn_t		*connp = coa->coa_connp;
592 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
593 	ip_pkt_t	*ipp = coa->coa_ipp;
594 	ip_stack_t	*ipst = ixa->ixa_ipst;
595 	uint_t		len;
596 
597 	ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
598 
599 	switch (level) {
600 	case SOL_SOCKET:
601 		switch (name) {
602 		case SO_DEBUG:
603 			*i1 = connp->conn_debug ? SO_DEBUG : 0;
604 			break;	/* goto sizeof (int) option return */
605 		case SO_KEEPALIVE:
606 			*i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
607 			break;
608 		case SO_LINGER:	{
609 			struct linger *lgr = (struct linger *)ptr;
610 
611 			lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
612 			lgr->l_linger = connp->conn_lingertime;
613 			}
614 			return (sizeof (struct linger));
615 
616 		case SO_OOBINLINE:
617 			*i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
618 			break;
619 		case SO_REUSEADDR:
620 			*i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
621 			break;	/* goto sizeof (int) option return */
622 		case SO_REUSEPORT:
623 			*i1 = connp->conn_reuseport ? SO_REUSEPORT : 0;
624 			break;	/* goto sizeof (int) option return */
625 		case SO_TYPE:
626 			*i1 = connp->conn_so_type;
627 			break;	/* goto sizeof (int) option return */
628 		case SO_DONTROUTE:
629 			*i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
630 			    SO_DONTROUTE : 0;
631 			break;	/* goto sizeof (int) option return */
632 		case SO_USELOOPBACK:
633 			*i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
634 			break;	/* goto sizeof (int) option return */
635 		case SO_BROADCAST:
636 			*i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
637 			break;	/* goto sizeof (int) option return */
638 
639 		case SO_SNDBUF:
640 			*i1 = connp->conn_sndbuf;
641 			break;	/* goto sizeof (int) option return */
642 		case SO_RCVBUF:
643 			*i1 = connp->conn_rcvbuf;
644 			break;	/* goto sizeof (int) option return */
645 		case SO_RCVTIMEO:
646 		case SO_SNDTIMEO:
647 			/*
648 			 * Pass these two options in order for third part
649 			 * protocol usage. Here just return directly.
650 			 */
651 			*i1 = 0;
652 			break;
653 		case SO_DGRAM_ERRIND:
654 			*i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
655 			break;	/* goto sizeof (int) option return */
656 		case SO_RECVUCRED:
657 			*i1 = connp->conn_recv_ancillary.crb_recvucred;
658 			break;	/* goto sizeof (int) option return */
659 		case SO_TIMESTAMP:
660 			*i1 = connp->conn_recv_ancillary.crb_timestamp;
661 			break;	/* goto sizeof (int) option return */
662 		case SO_VRRP:
663 			*i1 = connp->conn_isvrrp;
664 			break;	/* goto sizeof (int) option return */
665 		case SO_ANON_MLP:
666 			*i1 = connp->conn_anon_mlp;
667 			break;	/* goto sizeof (int) option return */
668 		case SO_MAC_EXEMPT:
669 			*i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
670 			break;	/* goto sizeof (int) option return */
671 		case SO_MAC_IMPLICIT:
672 			*i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
673 			break;	/* goto sizeof (int) option return */
674 		case SO_ALLZONES:
675 			*i1 = connp->conn_allzones;
676 			break;	/* goto sizeof (int) option return */
677 		case SO_EXCLBIND:
678 			*i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
679 			break;
680 		case SO_PROTOTYPE:
681 			*i1 = connp->conn_proto;
682 			break;
683 
684 		case SO_DOMAIN:
685 			*i1 = connp->conn_family;
686 			break;
687 		default:
688 			return (-1);
689 		}
690 		break;
691 	case IPPROTO_IP:
692 		if (connp->conn_family != AF_INET)
693 			return (-1);
694 		switch (name) {
695 		case IP_OPTIONS:
696 		case T_IP_OPTIONS:
697 			if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
698 				return (0);
699 
700 			len = ipp->ipp_ipv4_options_len;
701 			if (len > 0) {
702 				bcopy(ipp->ipp_ipv4_options, ptr, len);
703 			}
704 			return (len);
705 
706 		case IP_PKTINFO: {
707 			/*
708 			 * This also handles IP_RECVPKTINFO.
709 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
710 			 * Differentiation is based on the size of the
711 			 * argument passed in.
712 			 */
713 			struct in_pktinfo *pktinfo;
714 
715 #ifdef notdef
716 			/* optcom doesn't provide a length with "get" */
717 			if (inlen == sizeof (int)) {
718 				/* This is IP_RECVPKTINFO option. */
719 				*i1 = connp->conn_recv_ancillary.
720 				    crb_ip_recvpktinfo;
721 				return (sizeof (int));
722 			}
723 #endif
724 			/* XXX assumes that caller has room for max size! */
725 
726 			pktinfo = (struct in_pktinfo *)ptr;
727 			pktinfo->ipi_ifindex = ixa->ixa_ifindex;
728 			if (ipp->ipp_fields & IPPF_ADDR)
729 				pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
730 			else
731 				pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
732 			return (sizeof (struct in_pktinfo));
733 		}
734 		case IP_DONTFRAG:
735 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
736 			return (sizeof (int));
737 		case IP_TOS:
738 		case T_IP_TOS:
739 			*i1 = (int)ipp->ipp_type_of_service;
740 			break;	/* goto sizeof (int) option return */
741 		case IP_TTL:
742 			*i1 = (int)ipp->ipp_unicast_hops;
743 			break;	/* goto sizeof (int) option return */
744 		case IP_DHCPINIT_IF:
745 			return (-1);
746 		case IP_NEXTHOP:
747 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
748 				*(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
749 				return (sizeof (ipaddr_t));
750 			} else {
751 				return (0);
752 			}
753 
754 		case IP_MULTICAST_IF:
755 			/* 0 address if not set */
756 			*(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
757 			return (sizeof (ipaddr_t));
758 		case IP_MULTICAST_TTL:
759 			*(uchar_t *)ptr = ixa->ixa_multicast_ttl;
760 			return (sizeof (uchar_t));
761 		case IP_MULTICAST_LOOP:
762 			*ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
763 			return (sizeof (uint8_t));
764 		case IP_RECVOPTS:
765 			*i1 = connp->conn_recv_ancillary.crb_recvopts;
766 			break;	/* goto sizeof (int) option return */
767 		case IP_RECVDSTADDR:
768 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
769 			break;	/* goto sizeof (int) option return */
770 		case IP_RECVIF:
771 			*i1 = connp->conn_recv_ancillary.crb_recvif;
772 			break;	/* goto sizeof (int) option return */
773 		case IP_RECVSLLA:
774 			*i1 = connp->conn_recv_ancillary.crb_recvslla;
775 			break;	/* goto sizeof (int) option return */
776 		case IP_RECVTTL:
777 			*i1 = connp->conn_recv_ancillary.crb_recvttl;
778 			break;	/* goto sizeof (int) option return */
779 		case IP_ADD_MEMBERSHIP:
780 		case IP_DROP_MEMBERSHIP:
781 		case MCAST_JOIN_GROUP:
782 		case MCAST_LEAVE_GROUP:
783 		case IP_BLOCK_SOURCE:
784 		case IP_UNBLOCK_SOURCE:
785 		case IP_ADD_SOURCE_MEMBERSHIP:
786 		case IP_DROP_SOURCE_MEMBERSHIP:
787 		case MCAST_BLOCK_SOURCE:
788 		case MCAST_UNBLOCK_SOURCE:
789 		case MCAST_JOIN_SOURCE_GROUP:
790 		case MCAST_LEAVE_SOURCE_GROUP:
791 		case MRT_INIT:
792 		case MRT_DONE:
793 		case MRT_ADD_VIF:
794 		case MRT_DEL_VIF:
795 		case MRT_ADD_MFC:
796 		case MRT_DEL_MFC:
797 			/* cannot "get" the value for these */
798 			return (-1);
799 		case MRT_VERSION:
800 		case MRT_ASSERT:
801 			(void) ip_mrouter_get(name, connp, ptr);
802 			return (sizeof (int));
803 		case IP_SEC_OPT:
804 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
805 			    IPSEC_AF_V4));
806 		case IP_BOUND_IF:
807 			/* Zero if not set */
808 			*i1 = connp->conn_bound_if;
809 			break;	/* goto sizeof (int) option return */
810 		case IP_UNSPEC_SRC:
811 			*i1 = connp->conn_unspec_src;
812 			break;	/* goto sizeof (int) option return */
813 		case IP_BROADCAST_TTL:
814 			if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
815 				*(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
816 			else
817 				*(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
818 			return (sizeof (uchar_t));
819 		default:
820 			return (-1);
821 		}
822 		break;
823 	case IPPROTO_IPV6:
824 		if (connp->conn_family != AF_INET6)
825 			return (-1);
826 		switch (name) {
827 		case IPV6_UNICAST_HOPS:
828 			*i1 = (int)ipp->ipp_unicast_hops;
829 			break;	/* goto sizeof (int) option return */
830 		case IPV6_MULTICAST_IF:
831 			/* 0 index if not set */
832 			*i1 = ixa->ixa_multicast_ifindex;
833 			break;	/* goto sizeof (int) option return */
834 		case IPV6_MULTICAST_HOPS:
835 			*i1 = ixa->ixa_multicast_ttl;
836 			break;	/* goto sizeof (int) option return */
837 		case IPV6_MULTICAST_LOOP:
838 			*i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
839 			break;	/* goto sizeof (int) option return */
840 		case IPV6_JOIN_GROUP:
841 		case IPV6_LEAVE_GROUP:
842 		case MCAST_JOIN_GROUP:
843 		case MCAST_LEAVE_GROUP:
844 		case MCAST_BLOCK_SOURCE:
845 		case MCAST_UNBLOCK_SOURCE:
846 		case MCAST_JOIN_SOURCE_GROUP:
847 		case MCAST_LEAVE_SOURCE_GROUP:
848 			/* cannot "get" the value for these */
849 			return (-1);
850 		case IPV6_BOUND_IF:
851 			/* Zero if not set */
852 			*i1 = connp->conn_bound_if;
853 			break;	/* goto sizeof (int) option return */
854 		case IPV6_UNSPEC_SRC:
855 			*i1 = connp->conn_unspec_src;
856 			break;	/* goto sizeof (int) option return */
857 		case IPV6_RECVPKTINFO:
858 			*i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
859 			break;	/* goto sizeof (int) option return */
860 		case IPV6_RECVTCLASS:
861 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
862 			break;	/* goto sizeof (int) option return */
863 		case IPV6_RECVPATHMTU:
864 			*i1 = connp->conn_ipv6_recvpathmtu;
865 			break;	/* goto sizeof (int) option return */
866 		case IPV6_RECVHOPLIMIT:
867 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
868 			break;	/* goto sizeof (int) option return */
869 		case IPV6_RECVHOPOPTS:
870 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
871 			break;	/* goto sizeof (int) option return */
872 		case IPV6_RECVDSTOPTS:
873 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
874 			break;	/* goto sizeof (int) option return */
875 		case _OLD_IPV6_RECVDSTOPTS:
876 			*i1 =
877 			    connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
878 			break;	/* goto sizeof (int) option return */
879 		case IPV6_RECVRTHDRDSTOPTS:
880 			*i1 = connp->conn_recv_ancillary.
881 			    crb_ipv6_recvrthdrdstopts;
882 			break;	/* goto sizeof (int) option return */
883 		case IPV6_RECVRTHDR:
884 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
885 			break;	/* goto sizeof (int) option return */
886 		case IPV6_PKTINFO: {
887 			/* XXX assumes that caller has room for max size! */
888 			struct in6_pktinfo *pkti;
889 
890 			pkti = (struct in6_pktinfo *)ptr;
891 			pkti->ipi6_ifindex = ixa->ixa_ifindex;
892 			if (ipp->ipp_fields & IPPF_ADDR)
893 				pkti->ipi6_addr = ipp->ipp_addr;
894 			else
895 				pkti->ipi6_addr = ipv6_all_zeros;
896 			return (sizeof (struct in6_pktinfo));
897 		}
898 		case IPV6_TCLASS:
899 			*i1 = ipp->ipp_tclass;
900 			break;	/* goto sizeof (int) option return */
901 		case IPV6_NEXTHOP: {
902 			sin6_t *sin6 = (sin6_t *)ptr;
903 
904 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
905 				return (0);
906 
907 			*sin6 = sin6_null;
908 			sin6->sin6_family = AF_INET6;
909 			sin6->sin6_addr = ixa->ixa_nexthop_v6;
910 
911 			return (sizeof (sin6_t));
912 		}
913 		case IPV6_HOPOPTS:
914 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
915 				return (0);
916 			bcopy(ipp->ipp_hopopts, ptr,
917 			    ipp->ipp_hopoptslen);
918 			return (ipp->ipp_hopoptslen);
919 		case IPV6_RTHDRDSTOPTS:
920 			if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
921 				return (0);
922 			bcopy(ipp->ipp_rthdrdstopts, ptr,
923 			    ipp->ipp_rthdrdstoptslen);
924 			return (ipp->ipp_rthdrdstoptslen);
925 		case IPV6_RTHDR:
926 			if (!(ipp->ipp_fields & IPPF_RTHDR))
927 				return (0);
928 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
929 			return (ipp->ipp_rthdrlen);
930 		case IPV6_DSTOPTS:
931 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
932 				return (0);
933 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
934 			return (ipp->ipp_dstoptslen);
935 		case IPV6_PATHMTU:
936 			return (ip_fill_mtuinfo(connp, ixa,
937 			    (struct ip6_mtuinfo *)ptr));
938 		case IPV6_SEC_OPT:
939 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
940 			    IPSEC_AF_V6));
941 		case IPV6_SRC_PREFERENCES:
942 			return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
943 		case IPV6_DONTFRAG:
944 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
945 			return (sizeof (int));
946 		case IPV6_USE_MIN_MTU:
947 			if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
948 				*i1 = ixa->ixa_use_min_mtu;
949 			else
950 				*i1 = IPV6_USE_MIN_MTU_MULTICAST;
951 			break;
952 		case IPV6_V6ONLY:
953 			*i1 = connp->conn_ipv6_v6only;
954 			return (sizeof (int));
955 		default:
956 			return (-1);
957 		}
958 		break;
959 	case IPPROTO_UDP:
960 		switch (name) {
961 		case UDP_ANONPRIVBIND:
962 			*i1 = connp->conn_anon_priv_bind;
963 			break;
964 		case UDP_EXCLBIND:
965 			*i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
966 			break;
967 		default:
968 			return (-1);
969 		}
970 		break;
971 	case IPPROTO_TCP:
972 		switch (name) {
973 		case TCP_RECVDSTADDR:
974 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
975 			break;
976 		case TCP_ANONPRIVBIND:
977 			*i1 = connp->conn_anon_priv_bind;
978 			break;
979 		case TCP_EXCLBIND:
980 			*i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
981 			break;
982 		default:
983 			return (-1);
984 		}
985 		break;
986 	default:
987 		return (-1);
988 	}
989 	return (sizeof (int));
990 }
991 
992 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
993     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
994 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
995     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
996 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
997     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
998 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
999     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1000 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1001     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1002 
1003 /*
1004  * This routine sets the most common socket options including some
1005  * that are transport/ULP specific.
1006  * It returns errno or zero.
1007  *
1008  * For fixed length options, there is no sanity check
1009  * of passed in length is done. It is assumed *_optcom_req()
1010  * routines do the right thing.
1011  */
1012 int
1013 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1014     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1015 {
1016 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1017 
1018 	/* We have different functions for different levels */
1019 	switch (level) {
1020 	case SOL_SOCKET:
1021 		return (conn_opt_set_socket(coa, name, inlen, invalp,
1022 		    checkonly, cr));
1023 	case IPPROTO_IP:
1024 		return (conn_opt_set_ip(coa, name, inlen, invalp,
1025 		    checkonly, cr));
1026 	case IPPROTO_IPV6:
1027 		return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1028 		    checkonly, cr));
1029 	case IPPROTO_UDP:
1030 		return (conn_opt_set_udp(coa, name, inlen, invalp,
1031 		    checkonly, cr));
1032 	case IPPROTO_TCP:
1033 		return (conn_opt_set_tcp(coa, name, inlen, invalp,
1034 		    checkonly, cr));
1035 	default:
1036 		return (0);
1037 	}
1038 }
1039 
1040 /*
1041  * Handle SOL_SOCKET
1042  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1043  * it implement their own checks and setting of conn_proto.
1044  */
1045 /* ARGSUSED1 */
1046 static int
1047 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1048     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1049 {
1050 	conn_t		*connp = coa->coa_connp;
1051 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1052 	int		*i1 = (int *)invalp;
1053 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1054 
1055 	switch (name) {
1056 	case SO_ALLZONES:
1057 		if (IPCL_IS_BOUND(connp))
1058 			return (EINVAL);
1059 		break;
1060 	case SO_VRRP:
1061 		if (secpolicy_ip_config(cr, checkonly) != 0)
1062 			return (EACCES);
1063 		break;
1064 	case SO_MAC_EXEMPT:
1065 		if (secpolicy_net_mac_aware(cr) != 0)
1066 			return (EACCES);
1067 		if (IPCL_IS_BOUND(connp))
1068 			return (EINVAL);
1069 		break;
1070 	case SO_MAC_IMPLICIT:
1071 		if (secpolicy_net_mac_implicit(cr) != 0)
1072 			return (EACCES);
1073 		break;
1074 	}
1075 	if (checkonly)
1076 		return (0);
1077 
1078 	mutex_enter(&connp->conn_lock);
1079 	/* Here we set the actual option value */
1080 	switch (name) {
1081 	case SO_DEBUG:
1082 		connp->conn_debug = onoff;
1083 		break;
1084 	case SO_KEEPALIVE:
1085 		connp->conn_keepalive = onoff;
1086 		break;
1087 	case SO_LINGER: {
1088 		struct linger *lgr = (struct linger *)invalp;
1089 
1090 		if (lgr->l_onoff) {
1091 			connp->conn_linger = 1;
1092 			connp->conn_lingertime = lgr->l_linger;
1093 		} else {
1094 			connp->conn_linger = 0;
1095 			connp->conn_lingertime = 0;
1096 		}
1097 		break;
1098 	}
1099 	case SO_OOBINLINE:
1100 		connp->conn_oobinline = onoff;
1101 		coa->coa_changed |= COA_OOBINLINE_CHANGED;
1102 		break;
1103 	case SO_REUSEADDR:
1104 		connp->conn_reuseaddr = onoff;
1105 		break;
1106 	case SO_REUSEPORT:
1107 		connp->conn_reuseport = onoff;
1108 		break;
1109 	case SO_DONTROUTE:
1110 		if (onoff)
1111 			ixa->ixa_flags |= IXAF_DONTROUTE;
1112 		else
1113 			ixa->ixa_flags &= ~IXAF_DONTROUTE;
1114 		coa->coa_changed |= COA_ROUTE_CHANGED;
1115 		break;
1116 	case SO_USELOOPBACK:
1117 		connp->conn_useloopback = onoff;
1118 		break;
1119 	case SO_BROADCAST:
1120 		connp->conn_broadcast = onoff;
1121 		break;
1122 	case SO_SNDBUF:
1123 		/* ULP has range checked the value */
1124 		connp->conn_sndbuf = *i1;
1125 		coa->coa_changed |= COA_SNDBUF_CHANGED;
1126 		break;
1127 	case SO_RCVBUF:
1128 		/* ULP has range checked the value */
1129 		connp->conn_rcvbuf = *i1;
1130 		coa->coa_changed |= COA_RCVBUF_CHANGED;
1131 		break;
1132 	case SO_RCVTIMEO:
1133 	case SO_SNDTIMEO:
1134 		/*
1135 		 * Pass these two options in order for third part
1136 		 * protocol usage.
1137 		 */
1138 		break;
1139 	case SO_DGRAM_ERRIND:
1140 		connp->conn_dgram_errind = onoff;
1141 		break;
1142 	case SO_RECVUCRED:
1143 		connp->conn_recv_ancillary.crb_recvucred = onoff;
1144 		break;
1145 	case SO_ALLZONES:
1146 		connp->conn_allzones = onoff;
1147 		coa->coa_changed |= COA_ROUTE_CHANGED;
1148 		if (onoff)
1149 			ixa->ixa_zoneid = ALL_ZONES;
1150 		else
1151 			ixa->ixa_zoneid = connp->conn_zoneid;
1152 		break;
1153 	case SO_TIMESTAMP:
1154 		connp->conn_recv_ancillary.crb_timestamp = onoff;
1155 		break;
1156 	case SO_VRRP:
1157 		connp->conn_isvrrp = onoff;
1158 		break;
1159 	case SO_ANON_MLP:
1160 		connp->conn_anon_mlp = onoff;
1161 		break;
1162 	case SO_MAC_EXEMPT:
1163 		connp->conn_mac_mode = onoff ?
1164 		    CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1165 		break;
1166 	case SO_MAC_IMPLICIT:
1167 		connp->conn_mac_mode = onoff ?
1168 		    CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1169 		break;
1170 	case SO_EXCLBIND:
1171 		connp->conn_exclbind = onoff;
1172 		break;
1173 	}
1174 	mutex_exit(&connp->conn_lock);
1175 	return (0);
1176 }
1177 
1178 /* Handle IPPROTO_IP */
1179 static int
1180 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1181     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1182 {
1183 	conn_t		*connp = coa->coa_connp;
1184 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1185 	ip_pkt_t	*ipp = coa->coa_ipp;
1186 	int		*i1 = (int *)invalp;
1187 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1188 	ipaddr_t	addr = (ipaddr_t)*i1;
1189 	uint_t		ifindex;
1190 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1191 	ipif_t		*ipif;
1192 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1193 	int		error;
1194 
1195 	if (connp->conn_family != AF_INET)
1196 		return (EINVAL);
1197 
1198 	switch (name) {
1199 	case IP_TTL:
1200 		/* Don't allow zero */
1201 		if (*i1 < 1 || *i1 > 255)
1202 			return (EINVAL);
1203 		break;
1204 	case IP_MULTICAST_IF:
1205 		if (addr == INADDR_ANY) {
1206 			/* Clear */
1207 			ifindex = 0;
1208 			break;
1209 		}
1210 		ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1211 		if (ipif == NULL)
1212 			return (EHOSTUNREACH);
1213 		/* not supported by the virtual network iface */
1214 		if (IS_VNI(ipif->ipif_ill)) {
1215 			ipif_refrele(ipif);
1216 			return (EINVAL);
1217 		}
1218 		ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1219 		ipif_refrele(ipif);
1220 		break;
1221 	case IP_NEXTHOP: {
1222 		ire_t	*ire;
1223 
1224 		if (addr == INADDR_ANY) {
1225 			/* Clear */
1226 			break;
1227 		}
1228 		/* Verify that the next-hop is on-link */
1229 		ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1230 		    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1231 		if (ire == NULL)
1232 			return (EHOSTUNREACH);
1233 		ire_refrele(ire);
1234 		break;
1235 	}
1236 	case IP_OPTIONS:
1237 	case T_IP_OPTIONS: {
1238 		uint_t newlen;
1239 
1240 		if (ipp->ipp_fields & IPPF_LABEL_V4)
1241 			newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1242 		else
1243 			newlen = inlen;
1244 		if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1245 			return (EINVAL);
1246 		}
1247 		break;
1248 	}
1249 	case IP_PKTINFO: {
1250 		struct in_pktinfo *pktinfo;
1251 
1252 		/* Two different valid lengths */
1253 		if (inlen != sizeof (int) &&
1254 		    inlen != sizeof (struct in_pktinfo))
1255 			return (EINVAL);
1256 		if (inlen == sizeof (int))
1257 			break;
1258 
1259 		pktinfo = (struct in_pktinfo *)invalp;
1260 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1261 			switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1262 			    zoneid, ipst, B_FALSE)) {
1263 			case IPVL_UNICAST_UP:
1264 			case IPVL_UNICAST_DOWN:
1265 				break;
1266 			default:
1267 				return (EADDRNOTAVAIL);
1268 			}
1269 		}
1270 		if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1271 		    B_FALSE, ipst))
1272 			return (ENXIO);
1273 		break;
1274 	}
1275 	case IP_BOUND_IF:
1276 		ifindex = *(uint_t *)i1;
1277 
1278 		/* Just check it is ok. */
1279 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1280 			return (ENXIO);
1281 		break;
1282 	}
1283 	if (checkonly)
1284 		return (0);
1285 
1286 	/* Here we set the actual option value */
1287 	/*
1288 	 * conn_lock protects the bitfields, and is used to
1289 	 * set the fields atomically. Not needed for ixa settings since
1290 	 * the caller has an exclusive copy of the ixa.
1291 	 * We can not hold conn_lock across the multicast options though.
1292 	 */
1293 	switch (name) {
1294 	case IP_OPTIONS:
1295 	case T_IP_OPTIONS:
1296 		/* Save options for use by IP. */
1297 		mutex_enter(&connp->conn_lock);
1298 		error = optcom_pkt_set(invalp, inlen,
1299 		    (uchar_t **)&ipp->ipp_ipv4_options,
1300 		    &ipp->ipp_ipv4_options_len);
1301 		if (error != 0) {
1302 			mutex_exit(&connp->conn_lock);
1303 			return (error);
1304 		}
1305 		if (ipp->ipp_ipv4_options_len == 0) {
1306 			ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1307 		} else {
1308 			ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1309 		}
1310 		mutex_exit(&connp->conn_lock);
1311 		coa->coa_changed |= COA_HEADER_CHANGED;
1312 		coa->coa_changed |= COA_WROFF_CHANGED;
1313 		break;
1314 
1315 	case IP_TTL:
1316 		mutex_enter(&connp->conn_lock);
1317 		ipp->ipp_unicast_hops = *i1;
1318 		mutex_exit(&connp->conn_lock);
1319 		coa->coa_changed |= COA_HEADER_CHANGED;
1320 		break;
1321 	case IP_TOS:
1322 	case T_IP_TOS:
1323 		mutex_enter(&connp->conn_lock);
1324 		if (*i1 == -1) {
1325 			ipp->ipp_type_of_service = 0;
1326 		} else {
1327 			ipp->ipp_type_of_service = *i1;
1328 		}
1329 		mutex_exit(&connp->conn_lock);
1330 		coa->coa_changed |= COA_HEADER_CHANGED;
1331 		break;
1332 	case IP_MULTICAST_IF:
1333 		ixa->ixa_multicast_ifindex = ifindex;
1334 		ixa->ixa_multicast_ifaddr = addr;
1335 		coa->coa_changed |= COA_ROUTE_CHANGED;
1336 		break;
1337 	case IP_MULTICAST_TTL:
1338 		ixa->ixa_multicast_ttl = *invalp;
1339 		/* Handled automatically by ip_output */
1340 		break;
1341 	case IP_MULTICAST_LOOP:
1342 		if (*invalp != 0)
1343 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1344 		else
1345 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1346 		/* Handled automatically by ip_output */
1347 		break;
1348 	case IP_RECVOPTS:
1349 		mutex_enter(&connp->conn_lock);
1350 		connp->conn_recv_ancillary.crb_recvopts = onoff;
1351 		mutex_exit(&connp->conn_lock);
1352 		break;
1353 	case IP_RECVDSTADDR:
1354 		mutex_enter(&connp->conn_lock);
1355 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1356 		mutex_exit(&connp->conn_lock);
1357 		break;
1358 	case IP_RECVIF:
1359 		mutex_enter(&connp->conn_lock);
1360 		connp->conn_recv_ancillary.crb_recvif = onoff;
1361 		mutex_exit(&connp->conn_lock);
1362 		break;
1363 	case IP_RECVSLLA:
1364 		mutex_enter(&connp->conn_lock);
1365 		connp->conn_recv_ancillary.crb_recvslla = onoff;
1366 		mutex_exit(&connp->conn_lock);
1367 		break;
1368 	case IP_RECVTTL:
1369 		mutex_enter(&connp->conn_lock);
1370 		connp->conn_recv_ancillary.crb_recvttl = onoff;
1371 		mutex_exit(&connp->conn_lock);
1372 		break;
1373 	case IP_PKTINFO: {
1374 		/*
1375 		 * This also handles IP_RECVPKTINFO.
1376 		 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1377 		 * Differentiation is based on the size of the
1378 		 * argument passed in.
1379 		 */
1380 		struct in_pktinfo *pktinfo;
1381 
1382 		if (inlen == sizeof (int)) {
1383 			/* This is IP_RECVPKTINFO option. */
1384 			mutex_enter(&connp->conn_lock);
1385 			connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1386 			    onoff;
1387 			mutex_exit(&connp->conn_lock);
1388 			break;
1389 		}
1390 
1391 		/* This is IP_PKTINFO option. */
1392 		mutex_enter(&connp->conn_lock);
1393 		pktinfo = (struct in_pktinfo *)invalp;
1394 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1395 			ipp->ipp_fields |= IPPF_ADDR;
1396 			IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1397 			    &ipp->ipp_addr);
1398 		} else {
1399 			ipp->ipp_fields &= ~IPPF_ADDR;
1400 			ipp->ipp_addr = ipv6_all_zeros;
1401 		}
1402 		mutex_exit(&connp->conn_lock);
1403 		ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1404 		coa->coa_changed |= COA_ROUTE_CHANGED;
1405 		coa->coa_changed |= COA_HEADER_CHANGED;
1406 		break;
1407 	}
1408 	case IP_DONTFRAG:
1409 		if (onoff) {
1410 			ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1411 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1412 		} else {
1413 			ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1414 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1415 		}
1416 		/* Need to redo ip_attr_connect */
1417 		coa->coa_changed |= COA_ROUTE_CHANGED;
1418 		break;
1419 	case IP_ADD_MEMBERSHIP:
1420 	case IP_DROP_MEMBERSHIP:
1421 	case MCAST_JOIN_GROUP:
1422 	case MCAST_LEAVE_GROUP:
1423 		return (ip_opt_set_multicast_group(connp, name,
1424 		    invalp, B_FALSE, checkonly));
1425 
1426 	case IP_BLOCK_SOURCE:
1427 	case IP_UNBLOCK_SOURCE:
1428 	case IP_ADD_SOURCE_MEMBERSHIP:
1429 	case IP_DROP_SOURCE_MEMBERSHIP:
1430 	case MCAST_BLOCK_SOURCE:
1431 	case MCAST_UNBLOCK_SOURCE:
1432 	case MCAST_JOIN_SOURCE_GROUP:
1433 	case MCAST_LEAVE_SOURCE_GROUP:
1434 		return (ip_opt_set_multicast_sources(connp, name,
1435 		    invalp, B_FALSE, checkonly));
1436 
1437 	case IP_SEC_OPT:
1438 		mutex_enter(&connp->conn_lock);
1439 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1440 		mutex_exit(&connp->conn_lock);
1441 		if (error != 0) {
1442 			return (error);
1443 		}
1444 		/* This is an IPsec policy change - redo ip_attr_connect */
1445 		coa->coa_changed |= COA_ROUTE_CHANGED;
1446 		break;
1447 	case IP_NEXTHOP:
1448 		ixa->ixa_nexthop_v4 = addr;
1449 		if (addr != INADDR_ANY)
1450 			ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1451 		else
1452 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1453 		coa->coa_changed |= COA_ROUTE_CHANGED;
1454 		break;
1455 
1456 	case IP_BOUND_IF:
1457 		ixa->ixa_ifindex = ifindex;		/* Send */
1458 		mutex_enter(&connp->conn_lock);
1459 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1460 		connp->conn_bound_if = ifindex;		/* getsockopt */
1461 		mutex_exit(&connp->conn_lock);
1462 		coa->coa_changed |= COA_ROUTE_CHANGED;
1463 		break;
1464 	case IP_UNSPEC_SRC:
1465 		mutex_enter(&connp->conn_lock);
1466 		connp->conn_unspec_src = onoff;
1467 		if (onoff)
1468 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1469 		else
1470 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1471 
1472 		mutex_exit(&connp->conn_lock);
1473 		break;
1474 	case IP_BROADCAST_TTL:
1475 		ixa->ixa_broadcast_ttl = *invalp;
1476 		ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1477 		/* Handled automatically by ip_output */
1478 		break;
1479 	case MRT_INIT:
1480 	case MRT_DONE:
1481 	case MRT_ADD_VIF:
1482 	case MRT_DEL_VIF:
1483 	case MRT_ADD_MFC:
1484 	case MRT_DEL_MFC:
1485 	case MRT_ASSERT:
1486 		if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1487 			return (error);
1488 		}
1489 		error = ip_mrouter_set((int)name, connp, checkonly,
1490 		    (uchar_t *)invalp, inlen);
1491 		if (error) {
1492 			return (error);
1493 		}
1494 		return (0);
1495 
1496 	}
1497 	return (0);
1498 }
1499 
1500 /* Handle IPPROTO_IPV6 */
1501 static int
1502 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1503     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1504 {
1505 	conn_t		*connp = coa->coa_connp;
1506 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1507 	ip_pkt_t	*ipp = coa->coa_ipp;
1508 	int		*i1 = (int *)invalp;
1509 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1510 	uint_t		ifindex;
1511 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1512 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1513 	int		error;
1514 
1515 	if (connp->conn_family != AF_INET6)
1516 		return (EINVAL);
1517 
1518 	switch (name) {
1519 	case IPV6_MULTICAST_IF:
1520 		/*
1521 		 * The only possible error is EINVAL.
1522 		 * We call this option on both V4 and V6
1523 		 * If both fail, then this call returns
1524 		 * EINVAL. If at least one of them succeeds we
1525 		 * return success.
1526 		 */
1527 		ifindex = *(uint_t *)i1;
1528 
1529 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1530 		    !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1531 			return (EINVAL);
1532 		break;
1533 	case IPV6_UNICAST_HOPS:
1534 		/* Don't allow zero. -1 means to use default */
1535 		if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1536 			return (EINVAL);
1537 		break;
1538 	case IPV6_MULTICAST_HOPS:
1539 		/* -1 means use default */
1540 		if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1541 			return (EINVAL);
1542 		break;
1543 	case IPV6_MULTICAST_LOOP:
1544 		if (*i1 != 0 && *i1 != 1)
1545 			return (EINVAL);
1546 		break;
1547 	case IPV6_BOUND_IF:
1548 		ifindex = *(uint_t *)i1;
1549 
1550 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1551 			return (ENXIO);
1552 		break;
1553 	case IPV6_PKTINFO: {
1554 		struct in6_pktinfo *pkti;
1555 		boolean_t isv6;
1556 
1557 		if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1558 			return (EINVAL);
1559 		if (inlen == 0)
1560 			break;	/* Clear values below */
1561 
1562 		/*
1563 		 * Verify the source address and ifindex. Privileged users
1564 		 * can use any source address.
1565 		 */
1566 		pkti = (struct in6_pktinfo *)invalp;
1567 
1568 		/*
1569 		 * For link-local addresses we use the ipi6_ifindex when
1570 		 * we verify the local address.
1571 		 * If net_rawaccess then any source address can be used.
1572 		 */
1573 		if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1574 		    secpolicy_net_rawaccess(cr) != 0) {
1575 			uint_t scopeid = 0;
1576 			in6_addr_t *v6src = &pkti->ipi6_addr;
1577 			ipaddr_t v4src;
1578 			ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1579 
1580 			if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1581 				IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1582 				if (v4src != INADDR_ANY) {
1583 					laddr_type = ip_laddr_verify_v4(v4src,
1584 					    zoneid, ipst, B_FALSE);
1585 				}
1586 			} else {
1587 				if (IN6_IS_ADDR_LINKSCOPE(v6src))
1588 					scopeid = pkti->ipi6_ifindex;
1589 
1590 				laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1591 				    ipst, B_FALSE, scopeid);
1592 			}
1593 			switch (laddr_type) {
1594 			case IPVL_UNICAST_UP:
1595 			case IPVL_UNICAST_DOWN:
1596 				break;
1597 			default:
1598 				return (EADDRNOTAVAIL);
1599 			}
1600 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1601 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1602 			/* Allow any source */
1603 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1604 		}
1605 		isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1606 		if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1607 		    ipst))
1608 			return (ENXIO);
1609 		break;
1610 	}
1611 	case IPV6_HOPLIMIT:
1612 		/* It is only allowed as ancilary data */
1613 		if (!coa->coa_ancillary)
1614 			return (EINVAL);
1615 
1616 		if (inlen != 0 && inlen != sizeof (int))
1617 			return (EINVAL);
1618 		if (inlen == sizeof (int)) {
1619 			if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1620 				return (EINVAL);
1621 		}
1622 		break;
1623 	case IPV6_TCLASS:
1624 		if (inlen != 0 && inlen != sizeof (int))
1625 			return (EINVAL);
1626 		if (inlen == sizeof (int)) {
1627 			if (*i1 > 255 || *i1 < -1)
1628 				return (EINVAL);
1629 		}
1630 		break;
1631 	case IPV6_NEXTHOP:
1632 		if (inlen != 0 && inlen != sizeof (sin6_t))
1633 			return (EINVAL);
1634 		if (inlen == sizeof (sin6_t)) {
1635 			sin6_t *sin6 = (sin6_t *)invalp;
1636 			ire_t	*ire;
1637 
1638 			if (sin6->sin6_family != AF_INET6)
1639 				return (EAFNOSUPPORT);
1640 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1641 				return (EADDRNOTAVAIL);
1642 
1643 			/* Verify that the next-hop is on-link */
1644 			ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1645 			    0, 0, IRE_ONLINK, NULL, zoneid,
1646 			    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1647 			if (ire == NULL)
1648 				return (EHOSTUNREACH);
1649 			ire_refrele(ire);
1650 			break;
1651 		}
1652 		break;
1653 	case IPV6_RTHDR:
1654 	case IPV6_DSTOPTS:
1655 	case IPV6_RTHDRDSTOPTS:
1656 	case IPV6_HOPOPTS: {
1657 		/* All have the length field in the same place */
1658 		ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1659 		/*
1660 		 * Sanity checks - minimum size, size a multiple of
1661 		 * eight bytes, and matching size passed in.
1662 		 */
1663 		if (inlen != 0 &&
1664 		    inlen != (8 * (hopts->ip6h_len + 1)))
1665 			return (EINVAL);
1666 		break;
1667 	}
1668 	case IPV6_PATHMTU:
1669 		/* Can't be set */
1670 		return (EINVAL);
1671 
1672 	case IPV6_USE_MIN_MTU:
1673 		if (inlen != sizeof (int))
1674 			return (EINVAL);
1675 		if (*i1 < -1 || *i1 > 1)
1676 			return (EINVAL);
1677 		break;
1678 	case IPV6_SRC_PREFERENCES:
1679 		if (inlen != sizeof (uint32_t))
1680 			return (EINVAL);
1681 		break;
1682 	case IPV6_V6ONLY:
1683 		if (*i1 < 0 || *i1 > 1) {
1684 			return (EINVAL);
1685 		}
1686 		break;
1687 	}
1688 	if (checkonly)
1689 		return (0);
1690 
1691 	/* Here we set the actual option value */
1692 	/*
1693 	 * conn_lock protects the bitfields, and is used to
1694 	 * set the fields atomically. Not needed for ixa settings since
1695 	 * the caller has an exclusive copy of the ixa.
1696 	 * We can not hold conn_lock across the multicast options though.
1697 	 */
1698 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1699 	switch (name) {
1700 	case IPV6_MULTICAST_IF:
1701 		ixa->ixa_multicast_ifindex = ifindex;
1702 		/* Need to redo ip_attr_connect */
1703 		coa->coa_changed |= COA_ROUTE_CHANGED;
1704 		break;
1705 	case IPV6_UNICAST_HOPS:
1706 		/* -1 means use default */
1707 		mutex_enter(&connp->conn_lock);
1708 		if (*i1 == -1) {
1709 			ipp->ipp_unicast_hops = connp->conn_default_ttl;
1710 		} else {
1711 			ipp->ipp_unicast_hops = (uint8_t)*i1;
1712 		}
1713 		mutex_exit(&connp->conn_lock);
1714 		coa->coa_changed |= COA_HEADER_CHANGED;
1715 		break;
1716 	case IPV6_MULTICAST_HOPS:
1717 		/* -1 means use default */
1718 		if (*i1 == -1) {
1719 			ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1720 		} else {
1721 			ixa->ixa_multicast_ttl = (uint8_t)*i1;
1722 		}
1723 		/* Handled automatically by ip_output */
1724 		break;
1725 	case IPV6_MULTICAST_LOOP:
1726 		if (*i1 != 0)
1727 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1728 		else
1729 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1730 		/* Handled automatically by ip_output */
1731 		break;
1732 	case IPV6_JOIN_GROUP:
1733 	case IPV6_LEAVE_GROUP:
1734 	case MCAST_JOIN_GROUP:
1735 	case MCAST_LEAVE_GROUP:
1736 		return (ip_opt_set_multicast_group(connp, name,
1737 		    invalp, B_TRUE, checkonly));
1738 
1739 	case MCAST_BLOCK_SOURCE:
1740 	case MCAST_UNBLOCK_SOURCE:
1741 	case MCAST_JOIN_SOURCE_GROUP:
1742 	case MCAST_LEAVE_SOURCE_GROUP:
1743 		return (ip_opt_set_multicast_sources(connp, name,
1744 		    invalp, B_TRUE, checkonly));
1745 
1746 	case IPV6_BOUND_IF:
1747 		ixa->ixa_ifindex = ifindex;		/* Send */
1748 		mutex_enter(&connp->conn_lock);
1749 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1750 		connp->conn_bound_if = ifindex;		/* getsockopt */
1751 		mutex_exit(&connp->conn_lock);
1752 		coa->coa_changed |= COA_ROUTE_CHANGED;
1753 		break;
1754 	case IPV6_UNSPEC_SRC:
1755 		mutex_enter(&connp->conn_lock);
1756 		connp->conn_unspec_src = onoff;
1757 		if (onoff)
1758 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1759 		else
1760 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1761 		mutex_exit(&connp->conn_lock);
1762 		break;
1763 	case IPV6_RECVPKTINFO:
1764 		mutex_enter(&connp->conn_lock);
1765 		connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1766 		mutex_exit(&connp->conn_lock);
1767 		break;
1768 	case IPV6_RECVTCLASS:
1769 		mutex_enter(&connp->conn_lock);
1770 		connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1771 		mutex_exit(&connp->conn_lock);
1772 		break;
1773 	case IPV6_RECVPATHMTU:
1774 		mutex_enter(&connp->conn_lock);
1775 		connp->conn_ipv6_recvpathmtu = onoff;
1776 		mutex_exit(&connp->conn_lock);
1777 		break;
1778 	case IPV6_RECVHOPLIMIT:
1779 		mutex_enter(&connp->conn_lock);
1780 		connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1781 		    onoff;
1782 		mutex_exit(&connp->conn_lock);
1783 		break;
1784 	case IPV6_RECVHOPOPTS:
1785 		mutex_enter(&connp->conn_lock);
1786 		connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1787 		mutex_exit(&connp->conn_lock);
1788 		break;
1789 	case IPV6_RECVDSTOPTS:
1790 		mutex_enter(&connp->conn_lock);
1791 		connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1792 		mutex_exit(&connp->conn_lock);
1793 		break;
1794 	case _OLD_IPV6_RECVDSTOPTS:
1795 		mutex_enter(&connp->conn_lock);
1796 		connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1797 		    onoff;
1798 		mutex_exit(&connp->conn_lock);
1799 		break;
1800 	case IPV6_RECVRTHDRDSTOPTS:
1801 		mutex_enter(&connp->conn_lock);
1802 		connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1803 		    onoff;
1804 		mutex_exit(&connp->conn_lock);
1805 		break;
1806 	case IPV6_RECVRTHDR:
1807 		mutex_enter(&connp->conn_lock);
1808 		connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1809 		mutex_exit(&connp->conn_lock);
1810 		break;
1811 	case IPV6_PKTINFO:
1812 		mutex_enter(&connp->conn_lock);
1813 		if (inlen == 0) {
1814 			ipp->ipp_fields &= ~IPPF_ADDR;
1815 			ipp->ipp_addr = ipv6_all_zeros;
1816 			ixa->ixa_ifindex = 0;
1817 		} else {
1818 			struct in6_pktinfo *pkti;
1819 
1820 			pkti = (struct in6_pktinfo *)invalp;
1821 			ipp->ipp_addr = pkti->ipi6_addr;
1822 			if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1823 				ipp->ipp_fields |= IPPF_ADDR;
1824 			else
1825 				ipp->ipp_fields &= ~IPPF_ADDR;
1826 			ixa->ixa_ifindex = pkti->ipi6_ifindex;
1827 		}
1828 		mutex_exit(&connp->conn_lock);
1829 		/* Source and ifindex might have changed */
1830 		coa->coa_changed |= COA_HEADER_CHANGED;
1831 		coa->coa_changed |= COA_ROUTE_CHANGED;
1832 		break;
1833 	case IPV6_HOPLIMIT:
1834 		mutex_enter(&connp->conn_lock);
1835 		if (inlen == 0 || *i1 == -1) {
1836 			/* Revert to default */
1837 			ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1838 			ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1839 		} else {
1840 			ipp->ipp_hoplimit = *i1;
1841 			ipp->ipp_fields |= IPPF_HOPLIMIT;
1842 			/* Ensure that it sticks for multicast packets */
1843 			ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1844 		}
1845 		mutex_exit(&connp->conn_lock);
1846 		coa->coa_changed |= COA_HEADER_CHANGED;
1847 		break;
1848 	case IPV6_TCLASS:
1849 		/*
1850 		 * IPV6_TCLASS accepts -1 as use kernel default
1851 		 * and [0, 255] as the actualy traffic class.
1852 		 */
1853 		mutex_enter(&connp->conn_lock);
1854 		if (inlen == 0 || *i1 == -1) {
1855 			ipp->ipp_tclass = 0;
1856 			ipp->ipp_fields &= ~IPPF_TCLASS;
1857 		} else {
1858 			ipp->ipp_tclass = *i1;
1859 			ipp->ipp_fields |= IPPF_TCLASS;
1860 		}
1861 		mutex_exit(&connp->conn_lock);
1862 		coa->coa_changed |= COA_HEADER_CHANGED;
1863 		break;
1864 	case IPV6_NEXTHOP:
1865 		if (inlen == 0) {
1866 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1867 		} else {
1868 			sin6_t *sin6 = (sin6_t *)invalp;
1869 
1870 			ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1871 			if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1872 				ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1873 			else
1874 				ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1875 		}
1876 		coa->coa_changed |= COA_ROUTE_CHANGED;
1877 		break;
1878 	case IPV6_HOPOPTS:
1879 		mutex_enter(&connp->conn_lock);
1880 		error = optcom_pkt_set(invalp, inlen,
1881 		    (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1882 		if (error != 0) {
1883 			mutex_exit(&connp->conn_lock);
1884 			return (error);
1885 		}
1886 		if (ipp->ipp_hopoptslen == 0) {
1887 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1888 		} else {
1889 			ipp->ipp_fields |= IPPF_HOPOPTS;
1890 		}
1891 		mutex_exit(&connp->conn_lock);
1892 		coa->coa_changed |= COA_HEADER_CHANGED;
1893 		coa->coa_changed |= COA_WROFF_CHANGED;
1894 		break;
1895 	case IPV6_RTHDRDSTOPTS:
1896 		mutex_enter(&connp->conn_lock);
1897 		error = optcom_pkt_set(invalp, inlen,
1898 		    (uchar_t **)&ipp->ipp_rthdrdstopts,
1899 		    &ipp->ipp_rthdrdstoptslen);
1900 		if (error != 0) {
1901 			mutex_exit(&connp->conn_lock);
1902 			return (error);
1903 		}
1904 		if (ipp->ipp_rthdrdstoptslen == 0) {
1905 			ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1906 		} else {
1907 			ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1908 		}
1909 		mutex_exit(&connp->conn_lock);
1910 		coa->coa_changed |= COA_HEADER_CHANGED;
1911 		coa->coa_changed |= COA_WROFF_CHANGED;
1912 		break;
1913 	case IPV6_DSTOPTS:
1914 		mutex_enter(&connp->conn_lock);
1915 		error = optcom_pkt_set(invalp, inlen,
1916 		    (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1917 		if (error != 0) {
1918 			mutex_exit(&connp->conn_lock);
1919 			return (error);
1920 		}
1921 		if (ipp->ipp_dstoptslen == 0) {
1922 			ipp->ipp_fields &= ~IPPF_DSTOPTS;
1923 		} else {
1924 			ipp->ipp_fields |= IPPF_DSTOPTS;
1925 		}
1926 		mutex_exit(&connp->conn_lock);
1927 		coa->coa_changed |= COA_HEADER_CHANGED;
1928 		coa->coa_changed |= COA_WROFF_CHANGED;
1929 		break;
1930 	case IPV6_RTHDR:
1931 		mutex_enter(&connp->conn_lock);
1932 		error = optcom_pkt_set(invalp, inlen,
1933 		    (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1934 		if (error != 0) {
1935 			mutex_exit(&connp->conn_lock);
1936 			return (error);
1937 		}
1938 		if (ipp->ipp_rthdrlen == 0) {
1939 			ipp->ipp_fields &= ~IPPF_RTHDR;
1940 		} else {
1941 			ipp->ipp_fields |= IPPF_RTHDR;
1942 		}
1943 		mutex_exit(&connp->conn_lock);
1944 		coa->coa_changed |= COA_HEADER_CHANGED;
1945 		coa->coa_changed |= COA_WROFF_CHANGED;
1946 		break;
1947 
1948 	case IPV6_DONTFRAG:
1949 		if (onoff) {
1950 			ixa->ixa_flags |= IXAF_DONTFRAG;
1951 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1952 		} else {
1953 			ixa->ixa_flags &= ~IXAF_DONTFRAG;
1954 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1955 		}
1956 		/* Need to redo ip_attr_connect */
1957 		coa->coa_changed |= COA_ROUTE_CHANGED;
1958 		break;
1959 
1960 	case IPV6_USE_MIN_MTU:
1961 		ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1962 		ixa->ixa_use_min_mtu = *i1;
1963 		/* Need to redo ip_attr_connect */
1964 		coa->coa_changed |= COA_ROUTE_CHANGED;
1965 		break;
1966 
1967 	case IPV6_SEC_OPT:
1968 		mutex_enter(&connp->conn_lock);
1969 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1970 		mutex_exit(&connp->conn_lock);
1971 		if (error != 0) {
1972 			return (error);
1973 		}
1974 		/* This is an IPsec policy change - redo ip_attr_connect */
1975 		coa->coa_changed |= COA_ROUTE_CHANGED;
1976 		break;
1977 	case IPV6_SRC_PREFERENCES:
1978 		/*
1979 		 * This socket option only affects connected
1980 		 * sockets that haven't already bound to a specific
1981 		 * IPv6 address.  In other words, sockets that
1982 		 * don't call bind() with an address other than the
1983 		 * unspecified address and that call connect().
1984 		 * ip_set_destination_v6() passes these preferences
1985 		 * to the ipif_select_source_v6() function.
1986 		 */
1987 		mutex_enter(&connp->conn_lock);
1988 		error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1989 		mutex_exit(&connp->conn_lock);
1990 		if (error != 0) {
1991 			return (error);
1992 		}
1993 		break;
1994 	case IPV6_V6ONLY:
1995 		mutex_enter(&connp->conn_lock);
1996 		connp->conn_ipv6_v6only = onoff;
1997 		mutex_exit(&connp->conn_lock);
1998 		break;
1999 	}
2000 	return (0);
2001 }
2002 
2003 /* Handle IPPROTO_UDP */
2004 /* ARGSUSED1 */
2005 static int
2006 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2007     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2008 {
2009 	conn_t		*connp = coa->coa_connp;
2010 	int		*i1 = (int *)invalp;
2011 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2012 	int		error;
2013 
2014 	switch (name) {
2015 	case UDP_ANONPRIVBIND:
2016 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2017 			return (error);
2018 		}
2019 		break;
2020 	}
2021 	if (checkonly)
2022 		return (0);
2023 
2024 	/* Here we set the actual option value */
2025 	mutex_enter(&connp->conn_lock);
2026 	switch (name) {
2027 	case UDP_ANONPRIVBIND:
2028 		connp->conn_anon_priv_bind = onoff;
2029 		break;
2030 	case UDP_EXCLBIND:
2031 		connp->conn_exclbind = onoff;
2032 		break;
2033 	}
2034 	mutex_exit(&connp->conn_lock);
2035 	return (0);
2036 }
2037 
2038 /* Handle IPPROTO_TCP */
2039 /* ARGSUSED1 */
2040 static int
2041 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2042     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2043 {
2044 	conn_t		*connp = coa->coa_connp;
2045 	int		*i1 = (int *)invalp;
2046 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2047 	int		error;
2048 
2049 	switch (name) {
2050 	case TCP_ANONPRIVBIND:
2051 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2052 			return (error);
2053 		}
2054 		break;
2055 	}
2056 	if (checkonly)
2057 		return (0);
2058 
2059 	/* Here we set the actual option value */
2060 	mutex_enter(&connp->conn_lock);
2061 	switch (name) {
2062 	case TCP_ANONPRIVBIND:
2063 		connp->conn_anon_priv_bind = onoff;
2064 		break;
2065 	case TCP_EXCLBIND:
2066 		connp->conn_exclbind = onoff;
2067 		break;
2068 	case TCP_RECVDSTADDR:
2069 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2070 		break;
2071 	}
2072 	mutex_exit(&connp->conn_lock);
2073 	return (0);
2074 }
2075 
2076 int
2077 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2078 {
2079 	sin_t		*sin;
2080 	sin6_t		*sin6;
2081 
2082 	if (connp->conn_family == AF_INET) {
2083 		if (*salenp < sizeof (sin_t))
2084 			return (EINVAL);
2085 
2086 		*salenp = sizeof (sin_t);
2087 		/* Fill zeroes and then initialize non-zero fields */
2088 		sin = (sin_t *)sa;
2089 		*sin = sin_null;
2090 		sin->sin_family = AF_INET;
2091 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2092 		    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2093 			sin->sin_addr.s_addr = connp->conn_saddr_v4;
2094 		} else {
2095 			/*
2096 			 * INADDR_ANY
2097 			 * conn_saddr is not set, we might be bound to
2098 			 * broadcast/multicast. Use conn_bound_addr as
2099 			 * local address instead (that could
2100 			 * also still be INADDR_ANY)
2101 			 */
2102 			sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2103 		}
2104 		sin->sin_port = connp->conn_lport;
2105 	} else {
2106 		if (*salenp < sizeof (sin6_t))
2107 			return (EINVAL);
2108 
2109 		*salenp = sizeof (sin6_t);
2110 		/* Fill zeroes and then initialize non-zero fields */
2111 		sin6 = (sin6_t *)sa;
2112 		*sin6 = sin6_null;
2113 		sin6->sin6_family = AF_INET6;
2114 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2115 			sin6->sin6_addr = connp->conn_saddr_v6;
2116 		} else {
2117 			/*
2118 			 * conn_saddr is not set, we might be bound to
2119 			 * broadcast/multicast. Use conn_bound_addr as
2120 			 * local address instead (which could
2121 			 * also still be unspecified)
2122 			 */
2123 			sin6->sin6_addr = connp->conn_bound_addr_v6;
2124 		}
2125 		sin6->sin6_port = connp->conn_lport;
2126 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2127 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2128 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2129 	}
2130 	return (0);
2131 }
2132 
2133 int
2134 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2135 {
2136 	struct sockaddr_in	*sin;
2137 	struct sockaddr_in6	*sin6;
2138 
2139 	if (connp->conn_family == AF_INET) {
2140 		if (*salenp < sizeof (sin_t))
2141 			return (EINVAL);
2142 
2143 		*salenp = sizeof (sin_t);
2144 		/* initialize */
2145 		sin = (sin_t *)sa;
2146 		*sin = sin_null;
2147 		sin->sin_family = AF_INET;
2148 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
2149 		sin->sin_port = connp->conn_fport;
2150 	} else {
2151 		if (*salenp < sizeof (sin6_t))
2152 			return (EINVAL);
2153 
2154 		*salenp = sizeof (sin6_t);
2155 		/* initialize */
2156 		sin6 = (sin6_t *)sa;
2157 		*sin6 = sin6_null;
2158 		sin6->sin6_family = AF_INET6;
2159 		sin6->sin6_addr = connp->conn_faddr_v6;
2160 		sin6->sin6_port =  connp->conn_fport;
2161 		sin6->sin6_flowinfo = connp->conn_flowinfo;
2162 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2163 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2164 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2165 	}
2166 	return (0);
2167 }
2168 
2169 static uint32_t	cksum_massage_options_v4(ipha_t *, netstack_t *);
2170 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2171 
2172 /*
2173  * Allocate and fill in conn_ht_iphc based on the current information
2174  * in the conn.
2175  * Normally used when we bind() and connect().
2176  * Returns failure if can't allocate memory, or if there is a problem
2177  * with a routing header/option.
2178  *
2179  * We allocate space for the transport header (ulp_hdr_len + extra) and
2180  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2181  * The extra is there for transports that want some spare room for future
2182  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2183  * excludes the extra part.
2184  *
2185  * We massage an routing option/header and store the ckecksum difference
2186  * in conn_sum.
2187  *
2188  * Caller needs to update conn_wroff if desired.
2189  */
2190 int
2191 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2192     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2193 {
2194 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2195 	ip_pkt_t	*ipp = &connp->conn_xmit_ipp;
2196 	uint_t		ip_hdr_length;
2197 	uchar_t		*hdrs;
2198 	uint_t		hdrs_len;
2199 
2200 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2201 
2202 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2203 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2204 		/* In case of TX label and IP options it can be too much */
2205 		if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2206 			/* Preserves existing TX errno for this */
2207 			return (EHOSTUNREACH);
2208 		}
2209 	} else {
2210 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2211 	}
2212 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2213 	hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2214 	ASSERT(hdrs_len != 0);
2215 
2216 	if (hdrs_len != connp->conn_ht_iphc_allocated) {
2217 		/* Allocate new before we free any old */
2218 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2219 		if (hdrs == NULL)
2220 			return (ENOMEM);
2221 
2222 		if (connp->conn_ht_iphc != NULL) {
2223 			kmem_free(connp->conn_ht_iphc,
2224 			    connp->conn_ht_iphc_allocated);
2225 		}
2226 		connp->conn_ht_iphc = hdrs;
2227 		connp->conn_ht_iphc_allocated = hdrs_len;
2228 	} else {
2229 		hdrs = connp->conn_ht_iphc;
2230 	}
2231 	hdrs_len -= extra;
2232 	connp->conn_ht_iphc_len = hdrs_len;
2233 
2234 	connp->conn_ht_ulp = hdrs + ip_hdr_length;
2235 	connp->conn_ht_ulp_len = ulp_hdr_length;
2236 
2237 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2238 		ipha_t	*ipha = (ipha_t *)hdrs;
2239 
2240 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2241 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2242 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2243 		ipha->ipha_length = htons(hdrs_len);
2244 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2245 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2246 		else
2247 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2248 
2249 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2250 			connp->conn_sum = cksum_massage_options_v4(ipha,
2251 			    connp->conn_netstack);
2252 		} else {
2253 			connp->conn_sum = 0;
2254 		}
2255 	} else {
2256 		ip6_t	*ip6h = (ip6_t *)hdrs;
2257 
2258 		ip6h->ip6_src = *v6src;
2259 		ip6h->ip6_dst = *v6dst;
2260 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2261 		    flowinfo);
2262 		ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2263 
2264 		if (ipp->ipp_fields & IPPF_RTHDR) {
2265 			connp->conn_sum = cksum_massage_options_v6(ip6h,
2266 			    ip_hdr_length, connp->conn_netstack);
2267 
2268 			/*
2269 			 * Verify that the first hop isn't a mapped address.
2270 			 * Routers along the path need to do this verification
2271 			 * for subsequent hops.
2272 			 */
2273 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2274 				return (EADDRNOTAVAIL);
2275 
2276 		} else {
2277 			connp->conn_sum = 0;
2278 		}
2279 	}
2280 	return (0);
2281 }
2282 
2283 /*
2284  * Prepend a header template to data_mp based on the ip_pkt_t
2285  * and the passed in source, destination and protocol.
2286  *
2287  * Returns failure if can't allocate memory, in which case data_mp is freed.
2288  * We allocate space for the transport header (ulp_hdr_len) and
2289  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2290  *
2291  * We massage an routing option/header and return the ckecksum difference
2292  * in *sump. This is in host byte order.
2293  *
2294  * Caller needs to update conn_wroff if desired.
2295  */
2296 mblk_t *
2297 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2298     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2299     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2300     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2301 {
2302 	uint_t		ip_hdr_length;
2303 	uchar_t		*hdrs;
2304 	uint_t		hdrs_len;
2305 	mblk_t		*mp;
2306 
2307 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2308 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2309 		ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2310 	} else {
2311 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2312 	}
2313 	hdrs_len = ip_hdr_length + ulp_hdr_length;
2314 	ASSERT(hdrs_len != 0);
2315 
2316 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2317 
2318 	/* Can we prepend to data_mp? */
2319 	if (data_mp != NULL &&
2320 	    data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2321 	    data_mp->b_datap->db_ref == 1) {
2322 		hdrs = data_mp->b_rptr - hdrs_len;
2323 		data_mp->b_rptr = hdrs;
2324 		mp = data_mp;
2325 	} else {
2326 		mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2327 		if (mp == NULL) {
2328 			freemsg(data_mp);
2329 			*errorp = ENOMEM;
2330 			return (NULL);
2331 		}
2332 		mp->b_wptr = mp->b_datap->db_lim;
2333 		hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2334 		mp->b_cont = data_mp;
2335 	}
2336 
2337 	/*
2338 	 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2339 	 * if PKTINFO (aka IPPF_ADDR) was set.
2340 	 */
2341 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2342 		ipha_t *ipha = (ipha_t *)hdrs;
2343 
2344 		ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2345 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2346 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2347 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2348 		ipha->ipha_length = htons(hdrs_len + data_length);
2349 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2350 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2351 		else
2352 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2353 
2354 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2355 			*sump = cksum_massage_options_v4(ipha,
2356 			    ixa->ixa_ipst->ips_netstack);
2357 		} else {
2358 			*sump = 0;
2359 		}
2360 	} else {
2361 		ip6_t *ip6h = (ip6_t *)hdrs;
2362 
2363 		ip6h->ip6_src = *v6src;
2364 		ip6h->ip6_dst = *v6dst;
2365 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2366 		ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2367 
2368 		if (ipp->ipp_fields & IPPF_RTHDR) {
2369 			*sump = cksum_massage_options_v6(ip6h,
2370 			    ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2371 
2372 			/*
2373 			 * Verify that the first hop isn't a mapped address.
2374 			 * Routers along the path need to do this verification
2375 			 * for subsequent hops.
2376 			 */
2377 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2378 				*errorp = EADDRNOTAVAIL;
2379 				freemsg(mp);
2380 				return (NULL);
2381 			}
2382 		} else {
2383 			*sump = 0;
2384 		}
2385 	}
2386 	return (mp);
2387 }
2388 
2389 /*
2390  * Massage a source route if any putting the first hop
2391  * in ipha_dst. Compute a starting value for the checksum which
2392  * takes into account that the original ipha_dst should be
2393  * included in the checksum but that IP will include the
2394  * first hop from the source route in the tcp checksum.
2395  */
2396 static uint32_t
2397 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2398 {
2399 	in_addr_t	dst;
2400 	uint32_t	cksum;
2401 
2402 	/* Get last hop then diff against first hop */
2403 	cksum = ip_massage_options(ipha, ns);
2404 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405 	dst = ipha->ipha_dst;
2406 	cksum -= ((dst >> 16) + (dst & 0xffff));
2407 	if ((int)cksum < 0)
2408 		cksum--;
2409 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2410 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2411 	ASSERT(cksum < 0x10000);
2412 	return (ntohs(cksum));
2413 }
2414 
2415 static uint32_t
2416 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2417 {
2418 	uint8_t		*end;
2419 	ip6_rthdr_t	*rth;
2420 	uint32_t	cksum;
2421 
2422 	end = (uint8_t *)ip6h + ip_hdr_len;
2423 	rth = ip_find_rthdr_v6(ip6h, end);
2424 	if (rth == NULL)
2425 		return (0);
2426 
2427 	cksum = ip_massage_options_v6(ip6h, rth, ns);
2428 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2429 	ASSERT(cksum < 0x10000);
2430 	return (ntohs(cksum));
2431 }
2432 
2433 /*
2434  * ULPs that change the destination address need to call this for each
2435  * change to discard any state about a previous destination that might
2436  * have been multicast or multirt.
2437  */
2438 void
2439 ip_attr_newdst(ip_xmit_attr_t *ixa)
2440 {
2441 	ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2442 	    IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2443 	    IXAF_NO_LOOP_ZONEID_SET);
2444 }
2445 
2446 /*
2447  * Determine the nexthop which will be used.
2448  * Normally this is just the destination, but if a IPv4 source route, or
2449  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2450  * there.
2451  */
2452 void
2453 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2454     const in6_addr_t *dst, in6_addr_t *nexthop)
2455 {
2456 	if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2457 		*nexthop = *dst;
2458 		return;
2459 	}
2460 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2461 		ipaddr_t v4dst;
2462 		ipaddr_t v4nexthop;
2463 
2464 		IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2465 		v4nexthop = ip_pkt_source_route_v4(ipp);
2466 		if (v4nexthop == INADDR_ANY)
2467 			v4nexthop = v4dst;
2468 
2469 		IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2470 	} else {
2471 		const in6_addr_t *v6nexthop;
2472 
2473 		v6nexthop = ip_pkt_source_route_v6(ipp);
2474 		if (v6nexthop == NULL)
2475 			v6nexthop = dst;
2476 
2477 		*nexthop = *v6nexthop;
2478 	}
2479 }
2480 
2481 /*
2482  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2483  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2484  * case (connected latching is done in conn_connect).
2485  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2486  * set, but doesn't otherwise use the conn_t.
2487  *
2488  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2489  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2490  *
2491  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2492  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2493  *
2494  * Updates laddrp and uinfo if they are non-NULL.
2495  *
2496  * TSOL notes: The callers if ip_attr_connect must check if the destination
2497  * is different than before and in that case redo conn_update_label.
2498  * The callers of conn_connect do not need that since conn_connect
2499  * performs the conn_update_label.
2500  */
2501 int
2502 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2503     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2504     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2505     iulp_t *uinfo, uint32_t flags)
2506 {
2507 	in6_addr_t		laddr = *v6src;
2508 	int			error;
2509 
2510 	ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2511 
2512 	if (connp->conn_zone_is_global)
2513 		flags |= IPDF_ZONE_IS_GLOBAL;
2514 	else
2515 		flags &= ~IPDF_ZONE_IS_GLOBAL;
2516 
2517 	/*
2518 	 * Lookup the route to determine a source address and the uinfo.
2519 	 * If the ULP has a source route option then the caller will
2520 	 * have set v6nexthop to be the first hop.
2521 	 */
2522 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2523 		ipaddr_t v4dst;
2524 		ipaddr_t v4src, v4nexthop;
2525 
2526 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2527 		IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2528 		IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2529 
2530 		if (connp->conn_unspec_src || v4src != INADDR_ANY)
2531 			flags &= ~IPDF_SELECT_SRC;
2532 		else
2533 			flags |= IPDF_SELECT_SRC;
2534 
2535 		error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2536 		    uinfo, flags, connp->conn_mac_mode);
2537 		IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2538 	} else {
2539 		if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2540 			flags &= ~IPDF_SELECT_SRC;
2541 		else
2542 			flags |= IPDF_SELECT_SRC;
2543 
2544 		error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2545 		    uinfo, flags, connp->conn_mac_mode);
2546 	}
2547 	/* Pass out some address even if we hit a RTF_REJECT etc */
2548 	if (laddrp != NULL)
2549 		*laddrp = laddr;
2550 
2551 	if (error != 0)
2552 		return (error);
2553 
2554 	if (flags & IPDF_IPSEC) {
2555 		/*
2556 		 * Set any IPsec policy in ixa. Routine also looks at ULP
2557 		 * ports.
2558 		 */
2559 		ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2560 	}
2561 	return (0);
2562 }
2563 
2564 /*
2565  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2566  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2567  * usable for SCTP, since SCTP has multiple faddrs.
2568  *
2569  * Caller must hold conn_lock to provide atomic constency between the
2570  * conn_t's addresses and the ixa.
2571  * NOTE: this function drops and reaquires conn_lock since it can't be
2572  * held across ip_attr_connect/ip_set_destination.
2573  *
2574  * The caller needs to handle inserting in the receive-side fanout when
2575  * appropriate after conn_connect returns.
2576  */
2577 int
2578 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2579 {
2580 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2581 	in6_addr_t	nexthop;
2582 	in6_addr_t	saddr, faddr;
2583 	in_port_t	fport;
2584 	int		error;
2585 
2586 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2587 
2588 	if (connp->conn_ipversion == IPV4_VERSION)
2589 		ixa->ixa_flags |= IXAF_IS_IPV4;
2590 	else
2591 		ixa->ixa_flags &= ~IXAF_IS_IPV4;
2592 
2593 	/* We do IPsec latching below - hence no caching in ip_attr_connect */
2594 	flags &= ~IPDF_IPSEC;
2595 
2596 	/* In case we had previously done an ip_attr_connect */
2597 	ip_attr_newdst(ixa);
2598 
2599 	/*
2600 	 * Determine the nexthop and copy the addresses before dropping
2601 	 * conn_lock.
2602 	 */
2603 	ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2604 	    &connp->conn_faddr_v6, &nexthop);
2605 	saddr = connp->conn_saddr_v6;
2606 	faddr = connp->conn_faddr_v6;
2607 	fport = connp->conn_fport;
2608 
2609 	mutex_exit(&connp->conn_lock);
2610 	error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2611 	    &saddr, uinfo, flags | IPDF_VERIFY_DST);
2612 	mutex_enter(&connp->conn_lock);
2613 
2614 	/* Could have changed even if an error */
2615 	connp->conn_saddr_v6 = saddr;
2616 	if (error != 0)
2617 		return (error);
2618 
2619 	/*
2620 	 * Check whether Trusted Solaris policy allows communication with this
2621 	 * host, and pretend that the destination is unreachable if not.
2622 	 * Compute any needed label and place it in ipp_label_v4/v6.
2623 	 *
2624 	 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2625 	 * the packet.
2626 	 *
2627 	 * TSOL Note: Any concurrent threads would pick a different ixa
2628 	 * (and ipp if they are to change the ipp)  so we
2629 	 * don't have to worry about concurrent threads.
2630 	 */
2631 	if (is_system_labeled()) {
2632 		if (connp->conn_mlp_type != mlptSingle)
2633 			return (ECONNREFUSED);
2634 
2635 		/*
2636 		 * conn_update_label will set ipp_label* which will later
2637 		 * be used by conn_build_hdr_template.
2638 		 */
2639 		error = conn_update_label(connp, ixa,
2640 		    &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2641 		if (error != 0)
2642 			return (error);
2643 	}
2644 
2645 	/*
2646 	 * Ensure that we match on the selected local address.
2647 	 * This overrides conn_laddr in the case we had earlier bound to a
2648 	 * multicast or broadcast address.
2649 	 */
2650 	connp->conn_laddr_v6 = connp->conn_saddr_v6;
2651 
2652 	/*
2653 	 * Allow setting new policies.
2654 	 * The addresses/ports are already set, thus the IPsec policy calls
2655 	 * can handle their passed-in conn's.
2656 	 */
2657 	connp->conn_policy_cached = B_FALSE;
2658 
2659 	/*
2660 	 * Cache IPsec policy in this conn.  If we have per-socket policy,
2661 	 * we'll cache that.  If we don't, we'll inherit global policy.
2662 	 *
2663 	 * This is done before the caller inserts in the receive-side fanout.
2664 	 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2665 	 * for connections where we don't have a policy. This is to prevent
2666 	 * global policy lookups in the inbound path.
2667 	 *
2668 	 * If we insert before we set conn_policy_cached,
2669 	 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2670 	 * because global policy cound be non-empty. We normally call
2671 	 * ipsec_check_policy() for conn_policy_cached connections only if
2672 	 * conn_in_enforce_policy is set. But in this case,
2673 	 * conn_policy_cached can get set anytime since we made the
2674 	 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2675 	 * called, which will make the above assumption false.  Thus, we
2676 	 * need to insert after we set conn_policy_cached.
2677 	 */
2678 	error = ipsec_conn_cache_policy(connp,
2679 	    connp->conn_ipversion == IPV4_VERSION);
2680 	if (error != 0)
2681 		return (error);
2682 
2683 	/*
2684 	 * We defer to do LSO check until here since now we have better idea
2685 	 * whether IPsec is present. If the underlying ill is LSO capable,
2686 	 * copy its capability in so the ULP can decide whether to enable LSO
2687 	 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2688 	 * claim LSO for IPv6.
2689 	 *
2690 	 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2691 	 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2692 	 */
2693 	ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2694 
2695 	ASSERT(ixa->ixa_ire != NULL);
2696 	if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2697 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2698 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2699 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2700 	    (ixa->ixa_nce != NULL) &&
2701 	    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2702 	    ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2703 	    ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2704 		ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2705 		ixa->ixa_flags |= IXAF_LSO_CAPAB;
2706 	}
2707 
2708 	/* Check whether ZEROCOPY capability is usable for this connection. */
2709 	ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2710 
2711 	if ((flags & IPDF_ZCOPY) &&
2712 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2713 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2714 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2715 	    (ixa->ixa_nce != NULL) &&
2716 	    ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2717 		ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2718 	}
2719 	return (0);
2720 }
2721 
2722 /*
2723  * Predicates to check if the addresses match conn_last*
2724  */
2725 
2726 /*
2727  * Compare the conn against an address.
2728  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2729  */
2730 boolean_t
2731 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2732 {
2733 	ASSERT(connp->conn_family == AF_INET);
2734 	return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2735 	    sin->sin_port == connp->conn_lastdstport);
2736 }
2737 
2738 /*
2739  * Compare, including for mapped addresses
2740  */
2741 boolean_t
2742 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2743 {
2744 	return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2745 	    sin6->sin6_port == connp->conn_lastdstport &&
2746 	    sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2747 	    sin6->sin6_scope_id == connp->conn_lastscopeid);
2748 }
2749 
2750 /*
2751  * Compute a label and place it in the ip_packet_t.
2752  * Handles IPv4 and IPv6.
2753  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2754  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2755  * has been called.
2756  */
2757 int
2758 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2759     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2760 {
2761 	int		err;
2762 	ipaddr_t	v4dst;
2763 
2764 	if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2765 		uchar_t		opt_storage[IP_MAX_OPT_LENGTH];
2766 
2767 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2768 
2769 		err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2770 		    v4dst, opt_storage, ixa->ixa_ipst);
2771 		if (err == 0) {
2772 			/* Length contained in opt_storage[IPOPT_OLEN] */
2773 			err = optcom_pkt_set(opt_storage,
2774 			    opt_storage[IPOPT_OLEN],
2775 			    (uchar_t **)&ipp->ipp_label_v4,
2776 			    &ipp->ipp_label_len_v4);
2777 		}
2778 		if (err != 0) {
2779 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2780 			    char *, "conn(1) failed to update options(2) "
2781 			    "on ixa(3)",
2782 			    conn_t *, connp, char *, opt_storage,
2783 			    ip_xmit_attr_t *, ixa);
2784 		}
2785 		if (ipp->ipp_label_len_v4 != 0)
2786 			ipp->ipp_fields |= IPPF_LABEL_V4;
2787 		else
2788 			ipp->ipp_fields &= ~IPPF_LABEL_V4;
2789 	} else {
2790 		uchar_t		opt_storage[TSOL_MAX_IPV6_OPTION];
2791 		uint_t		optlen;
2792 
2793 		err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2794 		    v6dst, opt_storage, ixa->ixa_ipst);
2795 		if (err == 0) {
2796 			/*
2797 			 * Note that ipp_label_v6 is just the option - not
2798 			 * the hopopts extension header.
2799 			 *
2800 			 * Length contained in opt_storage[IPOPT_OLEN], but
2801 			 * that doesn't include the two byte options header.
2802 			 */
2803 			optlen = opt_storage[IPOPT_OLEN];
2804 			if (optlen != 0)
2805 				optlen += 2;
2806 
2807 			err = optcom_pkt_set(opt_storage, optlen,
2808 			    (uchar_t **)&ipp->ipp_label_v6,
2809 			    &ipp->ipp_label_len_v6);
2810 		}
2811 		if (err != 0) {
2812 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2813 			    char *, "conn(1) failed to update options(2) "
2814 			    "on ixa(3)",
2815 			    conn_t *, connp, char *, opt_storage,
2816 			    ip_xmit_attr_t *, ixa);
2817 		}
2818 		if (ipp->ipp_label_len_v6 != 0)
2819 			ipp->ipp_fields |= IPPF_LABEL_V6;
2820 		else
2821 			ipp->ipp_fields &= ~IPPF_LABEL_V6;
2822 	}
2823 	return (err);
2824 }
2825 
2826 /*
2827  * Inherit all options settings from the parent/listener to the eager.
2828  * Returns zero on success; ENOMEM if memory allocation failed.
2829  *
2830  * We assume that the eager has not had any work done i.e., the conn_ixa
2831  * and conn_xmit_ipp are all zero.
2832  * Furthermore we assume that no other thread can access the eager (because
2833  * it isn't inserted in any fanout list).
2834  */
2835 int
2836 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2837 {
2838 	cred_t	*credp;
2839 	int	err;
2840 	void	*notify_cookie;
2841 	uint32_t xmit_hint;
2842 
2843 	econnp->conn_family = lconnp->conn_family;
2844 	econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2845 	econnp->conn_wq = lconnp->conn_wq;
2846 	econnp->conn_rq = lconnp->conn_rq;
2847 
2848 	/*
2849 	 * Make a safe copy of the transmit attributes.
2850 	 * conn_connect will later be used by the caller to setup the ire etc.
2851 	 */
2852 	ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2853 	ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2854 	ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2855 	ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2856 
2857 	/* Preserve ixa_notify_cookie and xmit_hint */
2858 	notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2859 	xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2860 	ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2861 	econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2862 	econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2863 
2864 	econnp->conn_bound_if = lconnp->conn_bound_if;
2865 	econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2866 
2867 	/* Inherit all RECV options */
2868 	econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2869 
2870 	err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2871 	    KM_NOSLEEP);
2872 	if (err != 0)
2873 		return (err);
2874 
2875 	econnp->conn_zoneid = lconnp->conn_zoneid;
2876 	econnp->conn_allzones = lconnp->conn_allzones;
2877 
2878 	/* This is odd. Pick a flowlabel for each connection instead? */
2879 	econnp->conn_flowinfo = lconnp->conn_flowinfo;
2880 
2881 	econnp->conn_default_ttl = lconnp->conn_default_ttl;
2882 
2883 	/*
2884 	 * TSOL: tsol_input_proc() needs the eager's cred before the
2885 	 * eager is accepted
2886 	 */
2887 	ASSERT(lconnp->conn_cred != NULL);
2888 	econnp->conn_cred = credp = lconnp->conn_cred;
2889 	crhold(credp);
2890 	econnp->conn_cpid = lconnp->conn_cpid;
2891 	econnp->conn_open_time = ddi_get_lbolt64();
2892 
2893 	/*
2894 	 * Cache things in the ixa without any refhold.
2895 	 * Listener might not have set up ixa_cred
2896 	 */
2897 	ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2898 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2899 	econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2900 	if (is_system_labeled())
2901 		econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2902 
2903 	/*
2904 	 * If the caller has the process-wide flag set, then default to MAC
2905 	 * exempt mode.  This allows read-down to unlabeled hosts.
2906 	 */
2907 	if (getpflags(NET_MAC_AWARE, credp) != 0)
2908 		econnp->conn_mac_mode = CONN_MAC_AWARE;
2909 
2910 	econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2911 
2912 	/*
2913 	 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2914 	 * via soaccept()->soinheritoptions() which essentially applies
2915 	 * all the listener options to the new connection. The options that we
2916 	 * need to take care of are:
2917 	 * SO_DEBUG, SO_REUSEADDR, SO_REUSEPORT, SO_KEEPALIVE, SO_DONTROUTE,
2918 	 * SO_BROADCAST, SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND,
2919 	 * SO_LINGER, SO_SNDBUF, SO_RCVBUF.
2920 	 *
2921 	 * SO_RCVBUF:	conn_rcvbuf is set.
2922 	 * SO_SNDBUF:	conn_sndbuf is set.
2923 	 */
2924 
2925 	/* Could we define a struct and use a struct copy for this? */
2926 	econnp->conn_sndbuf = lconnp->conn_sndbuf;
2927 	econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2928 	econnp->conn_sndlowat = lconnp->conn_sndlowat;
2929 	econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2930 	econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2931 	econnp->conn_oobinline = lconnp->conn_oobinline;
2932 	econnp->conn_debug = lconnp->conn_debug;
2933 	econnp->conn_keepalive = lconnp->conn_keepalive;
2934 	econnp->conn_linger = lconnp->conn_linger;
2935 	econnp->conn_lingertime = lconnp->conn_lingertime;
2936 
2937 	/* Set the IP options */
2938 	econnp->conn_broadcast = lconnp->conn_broadcast;
2939 	econnp->conn_useloopback = lconnp->conn_useloopback;
2940 	econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2941 	econnp->conn_reuseport = lconnp->conn_reuseport;
2942 	return (0);
2943 }
2944