xref: /illumos-gate/usr/src/uts/common/inet/ip/conn_opt.c (revision fb2a9bae0030340ad72b9c26ba1ffee2ee3cafec)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/xti_inet.h>
34 #include <sys/ucred.h>
35 #include <sys/zone.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/atomic.h>
41 #include <sys/policy.h>
42 
43 #include <sys/systm.h>
44 #include <sys/param.h>
45 #include <sys/kmem.h>
46 #include <sys/sdt.h>
47 #include <sys/socket.h>
48 #include <sys/ethernet.h>
49 #include <sys/mac.h>
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/if_arp.h>
53 #include <net/route.h>
54 #include <sys/sockio.h>
55 #include <netinet/in.h>
56 #include <net/if_dl.h>
57 
58 #include <inet/common.h>
59 #include <inet/mi.h>
60 #include <inet/mib2.h>
61 #include <inet/nd.h>
62 #include <inet/arp.h>
63 #include <inet/snmpcom.h>
64 #include <inet/kstatcom.h>
65 
66 #include <netinet/igmp_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet/icmp6.h>
69 #include <netinet/sctp.h>
70 
71 #include <inet/ip.h>
72 #include <inet/ip_impl.h>
73 #include <inet/ip6.h>
74 #include <inet/ip6_asp.h>
75 #include <inet/tcp.h>
76 #include <inet/ip_multi.h>
77 #include <inet/ip_if.h>
78 #include <inet/ip_ire.h>
79 #include <inet/ip_ftable.h>
80 #include <inet/ip_rts.h>
81 #include <inet/optcom.h>
82 #include <inet/ip_ndp.h>
83 #include <inet/ip_listutils.h>
84 #include <netinet/igmp.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/udp.h>
87 #include <inet/ipp_common.h>
88 
89 #include <net/pfkeyv2.h>
90 #include <inet/sadb.h>
91 #include <inet/ipsec_impl.h>
92 #include <inet/ipdrop.h>
93 #include <inet/ip_netinfo.h>
94 
95 #include <inet/ipclassifier.h>
96 #include <inet/sctp_ip.h>
97 #include <inet/sctp/sctp_impl.h>
98 #include <inet/udp_impl.h>
99 #include <sys/sunddi.h>
100 
101 #include <sys/tsol/label.h>
102 #include <sys/tsol/tnet.h>
103 
104 /*
105  * Return how much size is needed for the different ancillary data items
106  */
107 uint_t
108 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
109     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
110 {
111 	uint_t		ancil_size;
112 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
113 
114 	/*
115 	 * If IP_RECVDSTADDR is set we include the destination IP
116 	 * address as an option. With IP_RECVOPTS we include all
117 	 * the IP options.
118 	 */
119 	ancil_size = 0;
120 	if (recv_ancillary.crb_recvdstaddr &&
121 	    (ira->ira_flags & IRAF_IS_IPV4)) {
122 		ancil_size += sizeof (struct T_opthdr) +
123 		    sizeof (struct in_addr);
124 		IP_STAT(ipst, conn_in_recvdstaddr);
125 	}
126 
127 	/*
128 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
129 	 * are different
130 	 */
131 	if (recv_ancillary.crb_ip_recvpktinfo &&
132 	    connp->conn_family == AF_INET) {
133 		ancil_size += sizeof (struct T_opthdr) +
134 		    sizeof (struct in_pktinfo);
135 		IP_STAT(ipst, conn_in_recvpktinfo);
136 	}
137 
138 	if ((recv_ancillary.crb_recvopts) &&
139 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
140 		ancil_size += sizeof (struct T_opthdr) +
141 		    ipp->ipp_ipv4_options_len;
142 		IP_STAT(ipst, conn_in_recvopts);
143 	}
144 
145 	if (recv_ancillary.crb_recvslla) {
146 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
147 		ill_t *ill;
148 
149 		/* Make sure ira_l2src is setup if not already */
150 		if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
151 			ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
152 			    ipst);
153 			if (ill != NULL) {
154 				ip_setl2src(mp, ira, ill);
155 				ill_refrele(ill);
156 			}
157 		}
158 		ancil_size += sizeof (struct T_opthdr) +
159 		    sizeof (struct sockaddr_dl);
160 		IP_STAT(ipst, conn_in_recvslla);
161 	}
162 
163 	if (recv_ancillary.crb_recvif) {
164 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
165 		IP_STAT(ipst, conn_in_recvif);
166 	}
167 
168 	/*
169 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
170 	 * are different
171 	 */
172 	if (recv_ancillary.crb_ip_recvpktinfo &&
173 	    connp->conn_family == AF_INET6) {
174 		ancil_size += sizeof (struct T_opthdr) +
175 		    sizeof (struct in6_pktinfo);
176 		IP_STAT(ipst, conn_in_recvpktinfo);
177 	}
178 
179 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
180 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
181 		IP_STAT(ipst, conn_in_recvhoplimit);
182 	}
183 
184 	if (recv_ancillary.crb_ipv6_recvtclass) {
185 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
186 		IP_STAT(ipst, conn_in_recvtclass);
187 	}
188 
189 	if (recv_ancillary.crb_ipv6_recvhopopts &&
190 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
191 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
192 		IP_STAT(ipst, conn_in_recvhopopts);
193 	}
194 	/*
195 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
196 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
197 	 * options that appear before a routing header.
198 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
199 	 */
200 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
201 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
202 		    (recv_ancillary.crb_ipv6_recvdstopts &&
203 		    recv_ancillary.crb_ipv6_recvrthdr)) {
204 			ancil_size += sizeof (struct T_opthdr) +
205 			    ipp->ipp_rthdrdstoptslen;
206 			IP_STAT(ipst, conn_in_recvrthdrdstopts);
207 		}
208 	}
209 	if ((recv_ancillary.crb_ipv6_recvrthdr) &&
210 	    (ipp->ipp_fields & IPPF_RTHDR)) {
211 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
212 		IP_STAT(ipst, conn_in_recvrthdr);
213 	}
214 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
215 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
216 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
217 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
218 		IP_STAT(ipst, conn_in_recvdstopts);
219 	}
220 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
221 		ancil_size += sizeof (struct T_opthdr) +
222 		    ucredminsize(ira->ira_cred);
223 		IP_STAT(ipst, conn_in_recvucred);
224 	}
225 
226 	/*
227 	 * If SO_TIMESTAMP is set allocate the appropriate sized
228 	 * buffer. Since gethrestime() expects a pointer aligned
229 	 * argument, we allocate space necessary for extra
230 	 * alignment (even though it might not be used).
231 	 */
232 	if (recv_ancillary.crb_timestamp) {
233 		ancil_size += sizeof (struct T_opthdr) +
234 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
235 		IP_STAT(ipst, conn_in_timestamp);
236 	}
237 
238 	/*
239 	 * If IP_RECVTTL is set allocate the appropriate sized buffer
240 	 */
241 	if (recv_ancillary.crb_recvttl &&
242 	    (ira->ira_flags & IRAF_IS_IPV4)) {
243 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
244 		IP_STAT(ipst, conn_in_recvttl);
245 	}
246 
247 	return (ancil_size);
248 }
249 
250 /*
251  * Lay down the ancillary data items at "ancil_buf".
252  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
253  * large buffer - ancil_size.
254  */
255 void
256 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
257     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
258 {
259 	/*
260 	 * Copy in destination address before options to avoid
261 	 * any padding issues.
262 	 */
263 	if (recv_ancillary.crb_recvdstaddr &&
264 	    (ira->ira_flags & IRAF_IS_IPV4)) {
265 		struct T_opthdr *toh;
266 		ipaddr_t *dstptr;
267 
268 		toh = (struct T_opthdr *)ancil_buf;
269 		toh->level = IPPROTO_IP;
270 		toh->name = IP_RECVDSTADDR;
271 		toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
272 		toh->status = 0;
273 		ancil_buf += sizeof (struct T_opthdr);
274 		dstptr = (ipaddr_t *)ancil_buf;
275 		*dstptr = ipp->ipp_addr_v4;
276 		ancil_buf += sizeof (ipaddr_t);
277 		ancil_size -= toh->len;
278 	}
279 
280 	/*
281 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
282 	 * are different
283 	 */
284 	if (recv_ancillary.crb_ip_recvpktinfo &&
285 	    connp->conn_family == AF_INET) {
286 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
287 		struct T_opthdr *toh;
288 		struct in_pktinfo *pktinfop;
289 		ill_t *ill;
290 		ipif_t *ipif;
291 
292 		toh = (struct T_opthdr *)ancil_buf;
293 		toh->level = IPPROTO_IP;
294 		toh->name = IP_PKTINFO;
295 		toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
296 		toh->status = 0;
297 		ancil_buf += sizeof (struct T_opthdr);
298 		pktinfop = (struct in_pktinfo *)ancil_buf;
299 
300 		pktinfop->ipi_ifindex = ira->ira_ruifindex;
301 		pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
302 
303 		/* Find a good address to report */
304 		ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
305 		if (ill != NULL) {
306 			ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
307 			if (ipif != NULL) {
308 				pktinfop->ipi_spec_dst.s_addr =
309 				    ipif->ipif_lcl_addr;
310 				ipif_refrele(ipif);
311 			}
312 			ill_refrele(ill);
313 		}
314 		pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
315 		ancil_buf += sizeof (struct in_pktinfo);
316 		ancil_size -= toh->len;
317 	}
318 
319 	if ((recv_ancillary.crb_recvopts) &&
320 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
321 		struct T_opthdr *toh;
322 
323 		toh = (struct T_opthdr *)ancil_buf;
324 		toh->level = IPPROTO_IP;
325 		toh->name = IP_RECVOPTS;
326 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
327 		toh->status = 0;
328 		ancil_buf += sizeof (struct T_opthdr);
329 		bcopy(ipp->ipp_ipv4_options, ancil_buf,
330 		    ipp->ipp_ipv4_options_len);
331 		ancil_buf += ipp->ipp_ipv4_options_len;
332 		ancil_size -= toh->len;
333 	}
334 
335 	if (recv_ancillary.crb_recvslla) {
336 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
337 		struct T_opthdr *toh;
338 		struct sockaddr_dl *dstptr;
339 		ill_t *ill;
340 		int alen = 0;
341 
342 		ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
343 		if (ill != NULL)
344 			alen = ill->ill_phys_addr_length;
345 
346 		/*
347 		 * For loopback multicast and broadcast the packet arrives
348 		 * with ira_ruifdex being the physical interface, but
349 		 * ira_l2src is all zero since ip_postfrag_loopback doesn't
350 		 * know our l2src. We don't report the address in that case.
351 		 */
352 		if (ira->ira_flags & IRAF_LOOPBACK)
353 			alen = 0;
354 
355 		toh = (struct T_opthdr *)ancil_buf;
356 		toh->level = IPPROTO_IP;
357 		toh->name = IP_RECVSLLA;
358 		toh->len = sizeof (struct T_opthdr) +
359 		    sizeof (struct sockaddr_dl);
360 		toh->status = 0;
361 		ancil_buf += sizeof (struct T_opthdr);
362 		dstptr = (struct sockaddr_dl *)ancil_buf;
363 		dstptr->sdl_family = AF_LINK;
364 		dstptr->sdl_index = ira->ira_ruifindex;
365 		if (ill != NULL)
366 			dstptr->sdl_type = ill->ill_type;
367 		else
368 			dstptr->sdl_type = 0;
369 		dstptr->sdl_nlen = 0;
370 		dstptr->sdl_alen = alen;
371 		dstptr->sdl_slen = 0;
372 		bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
373 		ancil_buf += sizeof (struct sockaddr_dl);
374 		ancil_size -= toh->len;
375 		if (ill != NULL)
376 			ill_refrele(ill);
377 	}
378 
379 	if (recv_ancillary.crb_recvif) {
380 		struct T_opthdr *toh;
381 		uint_t		*dstptr;
382 
383 		toh = (struct T_opthdr *)ancil_buf;
384 		toh->level = IPPROTO_IP;
385 		toh->name = IP_RECVIF;
386 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
387 		toh->status = 0;
388 		ancil_buf += sizeof (struct T_opthdr);
389 		dstptr = (uint_t *)ancil_buf;
390 		*dstptr = ira->ira_ruifindex;
391 		ancil_buf += sizeof (uint_t);
392 		ancil_size -= toh->len;
393 	}
394 
395 	/*
396 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
397 	 * are different
398 	 */
399 	if (recv_ancillary.crb_ip_recvpktinfo &&
400 	    connp->conn_family == AF_INET6) {
401 		struct T_opthdr *toh;
402 		struct in6_pktinfo *pkti;
403 
404 		toh = (struct T_opthdr *)ancil_buf;
405 		toh->level = IPPROTO_IPV6;
406 		toh->name = IPV6_PKTINFO;
407 		toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
408 		toh->status = 0;
409 		ancil_buf += sizeof (struct T_opthdr);
410 		pkti = (struct in6_pktinfo *)ancil_buf;
411 		if (ira->ira_flags & IRAF_IS_IPV4) {
412 			IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
413 			    &pkti->ipi6_addr);
414 		} else {
415 			pkti->ipi6_addr = ipp->ipp_addr;
416 		}
417 		pkti->ipi6_ifindex = ira->ira_ruifindex;
418 
419 		ancil_buf += sizeof (*pkti);
420 		ancil_size -= toh->len;
421 	}
422 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
423 		struct T_opthdr *toh;
424 
425 		toh = (struct T_opthdr *)ancil_buf;
426 		toh->level = IPPROTO_IPV6;
427 		toh->name = IPV6_HOPLIMIT;
428 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
429 		toh->status = 0;
430 		ancil_buf += sizeof (struct T_opthdr);
431 		*(uint_t *)ancil_buf = ipp->ipp_hoplimit;
432 		ancil_buf += sizeof (uint_t);
433 		ancil_size -= toh->len;
434 	}
435 	if (recv_ancillary.crb_ipv6_recvtclass) {
436 		struct T_opthdr *toh;
437 
438 		toh = (struct T_opthdr *)ancil_buf;
439 		toh->level = IPPROTO_IPV6;
440 		toh->name = IPV6_TCLASS;
441 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
442 		toh->status = 0;
443 		ancil_buf += sizeof (struct T_opthdr);
444 
445 		if (ira->ira_flags & IRAF_IS_IPV4)
446 			*(uint_t *)ancil_buf = ipp->ipp_type_of_service;
447 		else
448 			*(uint_t *)ancil_buf = ipp->ipp_tclass;
449 		ancil_buf += sizeof (uint_t);
450 		ancil_size -= toh->len;
451 	}
452 	if (recv_ancillary.crb_ipv6_recvhopopts &&
453 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
454 		struct T_opthdr *toh;
455 
456 		toh = (struct T_opthdr *)ancil_buf;
457 		toh->level = IPPROTO_IPV6;
458 		toh->name = IPV6_HOPOPTS;
459 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
460 		toh->status = 0;
461 		ancil_buf += sizeof (struct T_opthdr);
462 		bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
463 		ancil_buf += ipp->ipp_hopoptslen;
464 		ancil_size -= toh->len;
465 	}
466 	/*
467 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
468 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
469 	 * options that appear before a routing header.
470 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
471 	 */
472 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
473 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
474 		    (recv_ancillary.crb_ipv6_recvdstopts &&
475 		    recv_ancillary.crb_ipv6_recvrthdr)) {
476 			struct T_opthdr *toh;
477 
478 			toh = (struct T_opthdr *)ancil_buf;
479 			toh->level = IPPROTO_IPV6;
480 			toh->name = IPV6_DSTOPTS;
481 			toh->len = sizeof (struct T_opthdr) +
482 			    ipp->ipp_rthdrdstoptslen;
483 			toh->status = 0;
484 			ancil_buf += sizeof (struct T_opthdr);
485 			bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
486 			    ipp->ipp_rthdrdstoptslen);
487 			ancil_buf += ipp->ipp_rthdrdstoptslen;
488 			ancil_size -= toh->len;
489 		}
490 	}
491 	if (recv_ancillary.crb_ipv6_recvrthdr &&
492 	    (ipp->ipp_fields & IPPF_RTHDR)) {
493 		struct T_opthdr *toh;
494 
495 		toh = (struct T_opthdr *)ancil_buf;
496 		toh->level = IPPROTO_IPV6;
497 		toh->name = IPV6_RTHDR;
498 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
499 		toh->status = 0;
500 		ancil_buf += sizeof (struct T_opthdr);
501 		bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
502 		ancil_buf += ipp->ipp_rthdrlen;
503 		ancil_size -= toh->len;
504 	}
505 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
506 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
507 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
508 		struct T_opthdr *toh;
509 
510 		toh = (struct T_opthdr *)ancil_buf;
511 		toh->level = IPPROTO_IPV6;
512 		toh->name = IPV6_DSTOPTS;
513 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
514 		toh->status = 0;
515 		ancil_buf += sizeof (struct T_opthdr);
516 		bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
517 		ancil_buf += ipp->ipp_dstoptslen;
518 		ancil_size -= toh->len;
519 	}
520 
521 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
522 		struct T_opthdr *toh;
523 		cred_t		*rcr = connp->conn_cred;
524 
525 		toh = (struct T_opthdr *)ancil_buf;
526 		toh->level = SOL_SOCKET;
527 		toh->name = SCM_UCRED;
528 		toh->len = sizeof (struct T_opthdr) +
529 		    ucredminsize(ira->ira_cred);
530 		toh->status = 0;
531 		(void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
532 		ancil_buf += toh->len;
533 		ancil_size -= toh->len;
534 	}
535 	if (recv_ancillary.crb_timestamp) {
536 		struct	T_opthdr *toh;
537 
538 		toh = (struct T_opthdr *)ancil_buf;
539 		toh->level = SOL_SOCKET;
540 		toh->name = SCM_TIMESTAMP;
541 		toh->len = sizeof (struct T_opthdr) +
542 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
543 		toh->status = 0;
544 		ancil_buf += sizeof (struct T_opthdr);
545 		/* Align for gethrestime() */
546 		ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
547 		    sizeof (intptr_t));
548 		gethrestime((timestruc_t *)ancil_buf);
549 		ancil_buf = (uchar_t *)toh + toh->len;
550 		ancil_size -= toh->len;
551 	}
552 
553 	/*
554 	 * CAUTION:
555 	 * Due to aligment issues
556 	 * Processing of IP_RECVTTL option
557 	 * should always be the last. Adding
558 	 * any option processing after this will
559 	 * cause alignment panic.
560 	 */
561 	if (recv_ancillary.crb_recvttl &&
562 	    (ira->ira_flags & IRAF_IS_IPV4)) {
563 		struct	T_opthdr *toh;
564 		uint8_t	*dstptr;
565 
566 		toh = (struct T_opthdr *)ancil_buf;
567 		toh->level = IPPROTO_IP;
568 		toh->name = IP_RECVTTL;
569 		toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
570 		toh->status = 0;
571 		ancil_buf += sizeof (struct T_opthdr);
572 		dstptr = (uint8_t *)ancil_buf;
573 		*dstptr = ipp->ipp_hoplimit;
574 		ancil_buf += sizeof (uint8_t);
575 		ancil_size -= toh->len;
576 	}
577 
578 	/* Consumed all of allocated space */
579 	ASSERT(ancil_size == 0);
580 
581 }
582 
583 /*
584  * This routine retrieves the current status of socket options.
585  * It returns the size of the option retrieved, or -1.
586  */
587 int
588 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
589     uchar_t *ptr)
590 {
591 	int		*i1 = (int *)ptr;
592 	conn_t		*connp = coa->coa_connp;
593 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
594 	ip_pkt_t	*ipp = coa->coa_ipp;
595 	ip_stack_t	*ipst = ixa->ixa_ipst;
596 	uint_t		len;
597 
598 	ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
599 
600 	switch (level) {
601 	case SOL_SOCKET:
602 		switch (name) {
603 		case SO_DEBUG:
604 			*i1 = connp->conn_debug ? SO_DEBUG : 0;
605 			break;	/* goto sizeof (int) option return */
606 		case SO_KEEPALIVE:
607 			*i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
608 			break;
609 		case SO_LINGER:	{
610 			struct linger *lgr = (struct linger *)ptr;
611 
612 			lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
613 			lgr->l_linger = connp->conn_lingertime;
614 			}
615 			return (sizeof (struct linger));
616 
617 		case SO_OOBINLINE:
618 			*i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
619 			break;
620 		case SO_REUSEADDR:
621 			*i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
622 			break;	/* goto sizeof (int) option return */
623 		case SO_TYPE:
624 			*i1 = connp->conn_so_type;
625 			break;	/* goto sizeof (int) option return */
626 		case SO_DONTROUTE:
627 			*i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
628 			    SO_DONTROUTE : 0;
629 			break;	/* goto sizeof (int) option return */
630 		case SO_USELOOPBACK:
631 			*i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
632 			break;	/* goto sizeof (int) option return */
633 		case SO_BROADCAST:
634 			*i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
635 			break;	/* goto sizeof (int) option return */
636 
637 		case SO_SNDBUF:
638 			*i1 = connp->conn_sndbuf;
639 			break;	/* goto sizeof (int) option return */
640 		case SO_RCVBUF:
641 			*i1 = connp->conn_rcvbuf;
642 			break;	/* goto sizeof (int) option return */
643 		case SO_RCVTIMEO:
644 		case SO_SNDTIMEO:
645 			/*
646 			 * Pass these two options in order for third part
647 			 * protocol usage. Here just return directly.
648 			 */
649 			*i1 = 0;
650 			break;
651 		case SO_DGRAM_ERRIND:
652 			*i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
653 			break;	/* goto sizeof (int) option return */
654 		case SO_RECVUCRED:
655 			*i1 = connp->conn_recv_ancillary.crb_recvucred;
656 			break;	/* goto sizeof (int) option return */
657 		case SO_TIMESTAMP:
658 			*i1 = connp->conn_recv_ancillary.crb_timestamp;
659 			break;	/* goto sizeof (int) option return */
660 		case SO_VRRP:
661 			*i1 = connp->conn_isvrrp;
662 			break;	/* goto sizeof (int) option return */
663 		case SO_ANON_MLP:
664 			*i1 = connp->conn_anon_mlp;
665 			break;	/* goto sizeof (int) option return */
666 		case SO_MAC_EXEMPT:
667 			*i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
668 			break;	/* goto sizeof (int) option return */
669 		case SO_MAC_IMPLICIT:
670 			*i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
671 			break;	/* goto sizeof (int) option return */
672 		case SO_ALLZONES:
673 			*i1 = connp->conn_allzones;
674 			break;	/* goto sizeof (int) option return */
675 		case SO_EXCLBIND:
676 			*i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
677 			break;
678 		case SO_PROTOTYPE:
679 			*i1 = connp->conn_proto;
680 			break;
681 
682 		case SO_DOMAIN:
683 			*i1 = connp->conn_family;
684 			break;
685 		default:
686 			return (-1);
687 		}
688 		break;
689 	case IPPROTO_IP:
690 		if (connp->conn_family != AF_INET)
691 			return (-1);
692 		switch (name) {
693 		case IP_OPTIONS:
694 		case T_IP_OPTIONS:
695 			if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
696 				return (0);
697 
698 			len = ipp->ipp_ipv4_options_len;
699 			if (len > 0) {
700 				bcopy(ipp->ipp_ipv4_options, ptr, len);
701 			}
702 			return (len);
703 
704 		case IP_PKTINFO: {
705 			/*
706 			 * This also handles IP_RECVPKTINFO.
707 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
708 			 * Differentiation is based on the size of the
709 			 * argument passed in.
710 			 */
711 			struct in_pktinfo *pktinfo;
712 
713 #ifdef notdef
714 			/* optcom doesn't provide a length with "get" */
715 			if (inlen == sizeof (int)) {
716 				/* This is IP_RECVPKTINFO option. */
717 				*i1 = connp->conn_recv_ancillary.
718 				    crb_ip_recvpktinfo;
719 				return (sizeof (int));
720 			}
721 #endif
722 			/* XXX assumes that caller has room for max size! */
723 
724 			pktinfo = (struct in_pktinfo *)ptr;
725 			pktinfo->ipi_ifindex = ixa->ixa_ifindex;
726 			if (ipp->ipp_fields & IPPF_ADDR)
727 				pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
728 			else
729 				pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
730 			return (sizeof (struct in_pktinfo));
731 		}
732 		case IP_DONTFRAG:
733 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
734 			return (sizeof (int));
735 		case IP_TOS:
736 		case T_IP_TOS:
737 			*i1 = (int)ipp->ipp_type_of_service;
738 			break;	/* goto sizeof (int) option return */
739 		case IP_TTL:
740 			*i1 = (int)ipp->ipp_unicast_hops;
741 			break;	/* goto sizeof (int) option return */
742 		case IP_DHCPINIT_IF:
743 			return (-1);
744 		case IP_NEXTHOP:
745 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
746 				*(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
747 				return (sizeof (ipaddr_t));
748 			} else {
749 				return (0);
750 			}
751 
752 		case IP_MULTICAST_IF:
753 			/* 0 address if not set */
754 			*(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
755 			return (sizeof (ipaddr_t));
756 		case IP_MULTICAST_TTL:
757 			*(uchar_t *)ptr = ixa->ixa_multicast_ttl;
758 			return (sizeof (uchar_t));
759 		case IP_MULTICAST_LOOP:
760 			*ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
761 			return (sizeof (uint8_t));
762 		case IP_RECVOPTS:
763 			*i1 = connp->conn_recv_ancillary.crb_recvopts;
764 			break;	/* goto sizeof (int) option return */
765 		case IP_RECVDSTADDR:
766 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
767 			break;	/* goto sizeof (int) option return */
768 		case IP_RECVIF:
769 			*i1 = connp->conn_recv_ancillary.crb_recvif;
770 			break;	/* goto sizeof (int) option return */
771 		case IP_RECVSLLA:
772 			*i1 = connp->conn_recv_ancillary.crb_recvslla;
773 			break;	/* goto sizeof (int) option return */
774 		case IP_RECVTTL:
775 			*i1 = connp->conn_recv_ancillary.crb_recvttl;
776 			break;	/* goto sizeof (int) option return */
777 		case IP_ADD_MEMBERSHIP:
778 		case IP_DROP_MEMBERSHIP:
779 		case MCAST_JOIN_GROUP:
780 		case MCAST_LEAVE_GROUP:
781 		case IP_BLOCK_SOURCE:
782 		case IP_UNBLOCK_SOURCE:
783 		case IP_ADD_SOURCE_MEMBERSHIP:
784 		case IP_DROP_SOURCE_MEMBERSHIP:
785 		case MCAST_BLOCK_SOURCE:
786 		case MCAST_UNBLOCK_SOURCE:
787 		case MCAST_JOIN_SOURCE_GROUP:
788 		case MCAST_LEAVE_SOURCE_GROUP:
789 		case MRT_INIT:
790 		case MRT_DONE:
791 		case MRT_ADD_VIF:
792 		case MRT_DEL_VIF:
793 		case MRT_ADD_MFC:
794 		case MRT_DEL_MFC:
795 			/* cannot "get" the value for these */
796 			return (-1);
797 		case MRT_VERSION:
798 		case MRT_ASSERT:
799 			(void) ip_mrouter_get(name, connp, ptr);
800 			return (sizeof (int));
801 		case IP_SEC_OPT:
802 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
803 			    IPSEC_AF_V4));
804 		case IP_BOUND_IF:
805 			/* Zero if not set */
806 			*i1 = connp->conn_bound_if;
807 			break;	/* goto sizeof (int) option return */
808 		case IP_UNSPEC_SRC:
809 			*i1 = connp->conn_unspec_src;
810 			break;	/* goto sizeof (int) option return */
811 		case IP_BROADCAST_TTL:
812 			if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
813 				*(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
814 			else
815 				*(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
816 			return (sizeof (uchar_t));
817 		default:
818 			return (-1);
819 		}
820 		break;
821 	case IPPROTO_IPV6:
822 		if (connp->conn_family != AF_INET6)
823 			return (-1);
824 		switch (name) {
825 		case IPV6_UNICAST_HOPS:
826 			*i1 = (int)ipp->ipp_unicast_hops;
827 			break;	/* goto sizeof (int) option return */
828 		case IPV6_MULTICAST_IF:
829 			/* 0 index if not set */
830 			*i1 = ixa->ixa_multicast_ifindex;
831 			break;	/* goto sizeof (int) option return */
832 		case IPV6_MULTICAST_HOPS:
833 			*i1 = ixa->ixa_multicast_ttl;
834 			break;	/* goto sizeof (int) option return */
835 		case IPV6_MULTICAST_LOOP:
836 			*i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
837 			break;	/* goto sizeof (int) option return */
838 		case IPV6_JOIN_GROUP:
839 		case IPV6_LEAVE_GROUP:
840 		case MCAST_JOIN_GROUP:
841 		case MCAST_LEAVE_GROUP:
842 		case MCAST_BLOCK_SOURCE:
843 		case MCAST_UNBLOCK_SOURCE:
844 		case MCAST_JOIN_SOURCE_GROUP:
845 		case MCAST_LEAVE_SOURCE_GROUP:
846 			/* cannot "get" the value for these */
847 			return (-1);
848 		case IPV6_BOUND_IF:
849 			/* Zero if not set */
850 			*i1 = connp->conn_bound_if;
851 			break;	/* goto sizeof (int) option return */
852 		case IPV6_UNSPEC_SRC:
853 			*i1 = connp->conn_unspec_src;
854 			break;	/* goto sizeof (int) option return */
855 		case IPV6_RECVPKTINFO:
856 			*i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
857 			break;	/* goto sizeof (int) option return */
858 		case IPV6_RECVTCLASS:
859 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
860 			break;	/* goto sizeof (int) option return */
861 		case IPV6_RECVPATHMTU:
862 			*i1 = connp->conn_ipv6_recvpathmtu;
863 			break;	/* goto sizeof (int) option return */
864 		case IPV6_RECVHOPLIMIT:
865 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
866 			break;	/* goto sizeof (int) option return */
867 		case IPV6_RECVHOPOPTS:
868 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
869 			break;	/* goto sizeof (int) option return */
870 		case IPV6_RECVDSTOPTS:
871 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
872 			break;	/* goto sizeof (int) option return */
873 		case _OLD_IPV6_RECVDSTOPTS:
874 			*i1 =
875 			    connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
876 			break;	/* goto sizeof (int) option return */
877 		case IPV6_RECVRTHDRDSTOPTS:
878 			*i1 = connp->conn_recv_ancillary.
879 			    crb_ipv6_recvrthdrdstopts;
880 			break;	/* goto sizeof (int) option return */
881 		case IPV6_RECVRTHDR:
882 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
883 			break;	/* goto sizeof (int) option return */
884 		case IPV6_PKTINFO: {
885 			/* XXX assumes that caller has room for max size! */
886 			struct in6_pktinfo *pkti;
887 
888 			pkti = (struct in6_pktinfo *)ptr;
889 			pkti->ipi6_ifindex = ixa->ixa_ifindex;
890 			if (ipp->ipp_fields & IPPF_ADDR)
891 				pkti->ipi6_addr = ipp->ipp_addr;
892 			else
893 				pkti->ipi6_addr = ipv6_all_zeros;
894 			return (sizeof (struct in6_pktinfo));
895 		}
896 		case IPV6_TCLASS:
897 			*i1 = ipp->ipp_tclass;
898 			break;	/* goto sizeof (int) option return */
899 		case IPV6_NEXTHOP: {
900 			sin6_t *sin6 = (sin6_t *)ptr;
901 
902 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
903 				return (0);
904 
905 			*sin6 = sin6_null;
906 			sin6->sin6_family = AF_INET6;
907 			sin6->sin6_addr = ixa->ixa_nexthop_v6;
908 
909 			return (sizeof (sin6_t));
910 		}
911 		case IPV6_HOPOPTS:
912 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
913 				return (0);
914 			bcopy(ipp->ipp_hopopts, ptr,
915 			    ipp->ipp_hopoptslen);
916 			return (ipp->ipp_hopoptslen);
917 		case IPV6_RTHDRDSTOPTS:
918 			if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
919 				return (0);
920 			bcopy(ipp->ipp_rthdrdstopts, ptr,
921 			    ipp->ipp_rthdrdstoptslen);
922 			return (ipp->ipp_rthdrdstoptslen);
923 		case IPV6_RTHDR:
924 			if (!(ipp->ipp_fields & IPPF_RTHDR))
925 				return (0);
926 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
927 			return (ipp->ipp_rthdrlen);
928 		case IPV6_DSTOPTS:
929 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
930 				return (0);
931 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
932 			return (ipp->ipp_dstoptslen);
933 		case IPV6_PATHMTU:
934 			return (ip_fill_mtuinfo(connp, ixa,
935 			    (struct ip6_mtuinfo *)ptr));
936 		case IPV6_SEC_OPT:
937 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
938 			    IPSEC_AF_V6));
939 		case IPV6_SRC_PREFERENCES:
940 			return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
941 		case IPV6_DONTFRAG:
942 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
943 			return (sizeof (int));
944 		case IPV6_USE_MIN_MTU:
945 			if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
946 				*i1 = ixa->ixa_use_min_mtu;
947 			else
948 				*i1 = IPV6_USE_MIN_MTU_MULTICAST;
949 			break;
950 		case IPV6_V6ONLY:
951 			*i1 = connp->conn_ipv6_v6only;
952 			return (sizeof (int));
953 		default:
954 			return (-1);
955 		}
956 		break;
957 	case IPPROTO_UDP:
958 		switch (name) {
959 		case UDP_ANONPRIVBIND:
960 			*i1 = connp->conn_anon_priv_bind;
961 			break;
962 		case UDP_EXCLBIND:
963 			*i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
964 			break;
965 		default:
966 			return (-1);
967 		}
968 		break;
969 	case IPPROTO_TCP:
970 		switch (name) {
971 		case TCP_RECVDSTADDR:
972 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
973 			break;
974 		case TCP_ANONPRIVBIND:
975 			*i1 = connp->conn_anon_priv_bind;
976 			break;
977 		case TCP_EXCLBIND:
978 			*i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
979 			break;
980 		default:
981 			return (-1);
982 		}
983 		break;
984 	default:
985 		return (-1);
986 	}
987 	return (sizeof (int));
988 }
989 
990 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
991     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
992 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
993     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
994 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
995     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
996 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
997     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
998 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
999     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1000 
1001 /*
1002  * This routine sets the most common socket options including some
1003  * that are transport/ULP specific.
1004  * It returns errno or zero.
1005  *
1006  * For fixed length options, there is no sanity check
1007  * of passed in length is done. It is assumed *_optcom_req()
1008  * routines do the right thing.
1009  */
1010 int
1011 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1012     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1013 {
1014 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1015 
1016 	/* We have different functions for different levels */
1017 	switch (level) {
1018 	case SOL_SOCKET:
1019 		return (conn_opt_set_socket(coa, name, inlen, invalp,
1020 		    checkonly, cr));
1021 	case IPPROTO_IP:
1022 		return (conn_opt_set_ip(coa, name, inlen, invalp,
1023 		    checkonly, cr));
1024 	case IPPROTO_IPV6:
1025 		return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1026 		    checkonly, cr));
1027 	case IPPROTO_UDP:
1028 		return (conn_opt_set_udp(coa, name, inlen, invalp,
1029 		    checkonly, cr));
1030 	case IPPROTO_TCP:
1031 		return (conn_opt_set_tcp(coa, name, inlen, invalp,
1032 		    checkonly, cr));
1033 	default:
1034 		return (0);
1035 	}
1036 }
1037 
1038 /*
1039  * Handle SOL_SOCKET
1040  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1041  * it implement their own checks and setting of conn_proto.
1042  */
1043 /* ARGSUSED1 */
1044 static int
1045 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1046     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1047 {
1048 	conn_t		*connp = coa->coa_connp;
1049 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1050 	int		*i1 = (int *)invalp;
1051 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1052 
1053 	switch (name) {
1054 	case SO_ALLZONES:
1055 		if (IPCL_IS_BOUND(connp))
1056 			return (EINVAL);
1057 		break;
1058 	case SO_VRRP:
1059 		if (secpolicy_ip_config(cr, checkonly) != 0)
1060 			return (EACCES);
1061 		break;
1062 	case SO_MAC_EXEMPT:
1063 		if (secpolicy_net_mac_aware(cr) != 0)
1064 			return (EACCES);
1065 		if (IPCL_IS_BOUND(connp))
1066 			return (EINVAL);
1067 		break;
1068 	case SO_MAC_IMPLICIT:
1069 		if (secpolicy_net_mac_implicit(cr) != 0)
1070 			return (EACCES);
1071 		break;
1072 	}
1073 	if (checkonly)
1074 		return (0);
1075 
1076 	mutex_enter(&connp->conn_lock);
1077 	/* Here we set the actual option value */
1078 	switch (name) {
1079 	case SO_DEBUG:
1080 		connp->conn_debug = onoff;
1081 		break;
1082 	case SO_KEEPALIVE:
1083 		connp->conn_keepalive = onoff;
1084 		break;
1085 	case SO_LINGER: {
1086 		struct linger *lgr = (struct linger *)invalp;
1087 
1088 		if (lgr->l_onoff) {
1089 			connp->conn_linger = 1;
1090 			connp->conn_lingertime = lgr->l_linger;
1091 		} else {
1092 			connp->conn_linger = 0;
1093 			connp->conn_lingertime = 0;
1094 		}
1095 		break;
1096 	}
1097 	case SO_OOBINLINE:
1098 		connp->conn_oobinline = onoff;
1099 		coa->coa_changed |= COA_OOBINLINE_CHANGED;
1100 		break;
1101 	case SO_REUSEADDR:
1102 		connp->conn_reuseaddr = onoff;
1103 		break;
1104 	case SO_DONTROUTE:
1105 		if (onoff)
1106 			ixa->ixa_flags |= IXAF_DONTROUTE;
1107 		else
1108 			ixa->ixa_flags &= ~IXAF_DONTROUTE;
1109 		coa->coa_changed |= COA_ROUTE_CHANGED;
1110 		break;
1111 	case SO_USELOOPBACK:
1112 		connp->conn_useloopback = onoff;
1113 		break;
1114 	case SO_BROADCAST:
1115 		connp->conn_broadcast = onoff;
1116 		break;
1117 	case SO_SNDBUF:
1118 		/* ULP has range checked the value */
1119 		connp->conn_sndbuf = *i1;
1120 		coa->coa_changed |= COA_SNDBUF_CHANGED;
1121 		break;
1122 	case SO_RCVBUF:
1123 		/* ULP has range checked the value */
1124 		connp->conn_rcvbuf = *i1;
1125 		coa->coa_changed |= COA_RCVBUF_CHANGED;
1126 		break;
1127 	case SO_RCVTIMEO:
1128 	case SO_SNDTIMEO:
1129 		/*
1130 		 * Pass these two options in order for third part
1131 		 * protocol usage.
1132 		 */
1133 		break;
1134 	case SO_DGRAM_ERRIND:
1135 		connp->conn_dgram_errind = onoff;
1136 		break;
1137 	case SO_RECVUCRED:
1138 		connp->conn_recv_ancillary.crb_recvucred = onoff;
1139 		break;
1140 	case SO_ALLZONES:
1141 		connp->conn_allzones = onoff;
1142 		coa->coa_changed |= COA_ROUTE_CHANGED;
1143 		if (onoff)
1144 			ixa->ixa_zoneid = ALL_ZONES;
1145 		else
1146 			ixa->ixa_zoneid = connp->conn_zoneid;
1147 		break;
1148 	case SO_TIMESTAMP:
1149 		connp->conn_recv_ancillary.crb_timestamp = onoff;
1150 		break;
1151 	case SO_VRRP:
1152 		connp->conn_isvrrp = onoff;
1153 		break;
1154 	case SO_ANON_MLP:
1155 		connp->conn_anon_mlp = onoff;
1156 		break;
1157 	case SO_MAC_EXEMPT:
1158 		connp->conn_mac_mode = onoff ?
1159 		    CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1160 		break;
1161 	case SO_MAC_IMPLICIT:
1162 		connp->conn_mac_mode = onoff ?
1163 		    CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1164 		break;
1165 	case SO_EXCLBIND:
1166 		connp->conn_exclbind = onoff;
1167 		break;
1168 	}
1169 	mutex_exit(&connp->conn_lock);
1170 	return (0);
1171 }
1172 
1173 /* Handle IPPROTO_IP */
1174 static int
1175 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1176     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1177 {
1178 	conn_t		*connp = coa->coa_connp;
1179 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1180 	ip_pkt_t	*ipp = coa->coa_ipp;
1181 	int		*i1 = (int *)invalp;
1182 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1183 	ipaddr_t	addr = (ipaddr_t)*i1;
1184 	uint_t		ifindex;
1185 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1186 	ipif_t		*ipif;
1187 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1188 	int		error;
1189 
1190 	if (connp->conn_family != AF_INET)
1191 		return (EINVAL);
1192 
1193 	switch (name) {
1194 	case IP_TTL:
1195 		/* Don't allow zero */
1196 		if (*i1 < 1 || *i1 > 255)
1197 			return (EINVAL);
1198 		break;
1199 	case IP_MULTICAST_IF:
1200 		if (addr == INADDR_ANY) {
1201 			/* Clear */
1202 			ifindex = 0;
1203 			break;
1204 		}
1205 		ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1206 		if (ipif == NULL)
1207 			return (EHOSTUNREACH);
1208 		/* not supported by the virtual network iface */
1209 		if (IS_VNI(ipif->ipif_ill)) {
1210 			ipif_refrele(ipif);
1211 			return (EINVAL);
1212 		}
1213 		ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1214 		ipif_refrele(ipif);
1215 		break;
1216 	case IP_NEXTHOP: {
1217 		ire_t	*ire;
1218 
1219 		if (addr == INADDR_ANY) {
1220 			/* Clear */
1221 			break;
1222 		}
1223 		/* Verify that the next-hop is on-link */
1224 		ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1225 		    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1226 		if (ire == NULL)
1227 			return (EHOSTUNREACH);
1228 		ire_refrele(ire);
1229 		break;
1230 	}
1231 	case IP_OPTIONS:
1232 	case T_IP_OPTIONS: {
1233 		uint_t newlen;
1234 
1235 		if (ipp->ipp_fields & IPPF_LABEL_V4)
1236 			newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1237 		else
1238 			newlen = inlen;
1239 		if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1240 			return (EINVAL);
1241 		}
1242 		break;
1243 	}
1244 	case IP_PKTINFO: {
1245 		struct in_pktinfo *pktinfo;
1246 
1247 		/* Two different valid lengths */
1248 		if (inlen != sizeof (int) &&
1249 		    inlen != sizeof (struct in_pktinfo))
1250 			return (EINVAL);
1251 		if (inlen == sizeof (int))
1252 			break;
1253 
1254 		pktinfo = (struct in_pktinfo *)invalp;
1255 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1256 			switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1257 			    zoneid, ipst, B_FALSE)) {
1258 			case IPVL_UNICAST_UP:
1259 			case IPVL_UNICAST_DOWN:
1260 				break;
1261 			default:
1262 				return (EADDRNOTAVAIL);
1263 			}
1264 		}
1265 		if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1266 		    B_FALSE, ipst))
1267 			return (ENXIO);
1268 		break;
1269 	}
1270 	case IP_BOUND_IF:
1271 		ifindex = *(uint_t *)i1;
1272 
1273 		/* Just check it is ok. */
1274 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1275 			return (ENXIO);
1276 		break;
1277 	}
1278 	if (checkonly)
1279 		return (0);
1280 
1281 	/* Here we set the actual option value */
1282 	/*
1283 	 * conn_lock protects the bitfields, and is used to
1284 	 * set the fields atomically. Not needed for ixa settings since
1285 	 * the caller has an exclusive copy of the ixa.
1286 	 * We can not hold conn_lock across the multicast options though.
1287 	 */
1288 	switch (name) {
1289 	case IP_OPTIONS:
1290 	case T_IP_OPTIONS:
1291 		/* Save options for use by IP. */
1292 		mutex_enter(&connp->conn_lock);
1293 		error = optcom_pkt_set(invalp, inlen,
1294 		    (uchar_t **)&ipp->ipp_ipv4_options,
1295 		    &ipp->ipp_ipv4_options_len);
1296 		if (error != 0) {
1297 			mutex_exit(&connp->conn_lock);
1298 			return (error);
1299 		}
1300 		if (ipp->ipp_ipv4_options_len == 0) {
1301 			ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1302 		} else {
1303 			ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1304 		}
1305 		mutex_exit(&connp->conn_lock);
1306 		coa->coa_changed |= COA_HEADER_CHANGED;
1307 		coa->coa_changed |= COA_WROFF_CHANGED;
1308 		break;
1309 
1310 	case IP_TTL:
1311 		mutex_enter(&connp->conn_lock);
1312 		ipp->ipp_unicast_hops = *i1;
1313 		mutex_exit(&connp->conn_lock);
1314 		coa->coa_changed |= COA_HEADER_CHANGED;
1315 		break;
1316 	case IP_TOS:
1317 	case T_IP_TOS:
1318 		mutex_enter(&connp->conn_lock);
1319 		if (*i1 == -1) {
1320 			ipp->ipp_type_of_service = 0;
1321 		} else {
1322 			ipp->ipp_type_of_service = *i1;
1323 		}
1324 		mutex_exit(&connp->conn_lock);
1325 		coa->coa_changed |= COA_HEADER_CHANGED;
1326 		break;
1327 	case IP_MULTICAST_IF:
1328 		ixa->ixa_multicast_ifindex = ifindex;
1329 		ixa->ixa_multicast_ifaddr = addr;
1330 		coa->coa_changed |= COA_ROUTE_CHANGED;
1331 		break;
1332 	case IP_MULTICAST_TTL:
1333 		ixa->ixa_multicast_ttl = *invalp;
1334 		/* Handled automatically by ip_output */
1335 		break;
1336 	case IP_MULTICAST_LOOP:
1337 		if (*invalp != 0)
1338 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1339 		else
1340 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1341 		/* Handled automatically by ip_output */
1342 		break;
1343 	case IP_RECVOPTS:
1344 		mutex_enter(&connp->conn_lock);
1345 		connp->conn_recv_ancillary.crb_recvopts = onoff;
1346 		mutex_exit(&connp->conn_lock);
1347 		break;
1348 	case IP_RECVDSTADDR:
1349 		mutex_enter(&connp->conn_lock);
1350 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1351 		mutex_exit(&connp->conn_lock);
1352 		break;
1353 	case IP_RECVIF:
1354 		mutex_enter(&connp->conn_lock);
1355 		connp->conn_recv_ancillary.crb_recvif = onoff;
1356 		mutex_exit(&connp->conn_lock);
1357 		break;
1358 	case IP_RECVSLLA:
1359 		mutex_enter(&connp->conn_lock);
1360 		connp->conn_recv_ancillary.crb_recvslla = onoff;
1361 		mutex_exit(&connp->conn_lock);
1362 		break;
1363 	case IP_RECVTTL:
1364 		mutex_enter(&connp->conn_lock);
1365 		connp->conn_recv_ancillary.crb_recvttl = onoff;
1366 		mutex_exit(&connp->conn_lock);
1367 		break;
1368 	case IP_PKTINFO: {
1369 		/*
1370 		 * This also handles IP_RECVPKTINFO.
1371 		 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1372 		 * Differentiation is based on the size of the
1373 		 * argument passed in.
1374 		 */
1375 		struct in_pktinfo *pktinfo;
1376 
1377 		if (inlen == sizeof (int)) {
1378 			/* This is IP_RECVPKTINFO option. */
1379 			mutex_enter(&connp->conn_lock);
1380 			connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1381 			    onoff;
1382 			mutex_exit(&connp->conn_lock);
1383 			break;
1384 		}
1385 
1386 		/* This is IP_PKTINFO option. */
1387 		mutex_enter(&connp->conn_lock);
1388 		pktinfo = (struct in_pktinfo *)invalp;
1389 		if (ipp->ipp_addr_v4 != INADDR_ANY) {
1390 			ipp->ipp_fields |= IPPF_ADDR;
1391 			IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1392 			    &ipp->ipp_addr);
1393 		} else {
1394 			ipp->ipp_fields &= ~IPPF_ADDR;
1395 			ipp->ipp_addr = ipv6_all_zeros;
1396 		}
1397 		mutex_exit(&connp->conn_lock);
1398 		ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1399 		coa->coa_changed |= COA_ROUTE_CHANGED;
1400 		coa->coa_changed |= COA_HEADER_CHANGED;
1401 		break;
1402 	}
1403 	case IP_DONTFRAG:
1404 		if (onoff) {
1405 			ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1406 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1407 		} else {
1408 			ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1409 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1410 		}
1411 		/* Need to redo ip_attr_connect */
1412 		coa->coa_changed |= COA_ROUTE_CHANGED;
1413 		break;
1414 	case IP_ADD_MEMBERSHIP:
1415 	case IP_DROP_MEMBERSHIP:
1416 	case MCAST_JOIN_GROUP:
1417 	case MCAST_LEAVE_GROUP:
1418 		return (ip_opt_set_multicast_group(connp, name,
1419 		    invalp, B_FALSE, checkonly));
1420 
1421 	case IP_BLOCK_SOURCE:
1422 	case IP_UNBLOCK_SOURCE:
1423 	case IP_ADD_SOURCE_MEMBERSHIP:
1424 	case IP_DROP_SOURCE_MEMBERSHIP:
1425 	case MCAST_BLOCK_SOURCE:
1426 	case MCAST_UNBLOCK_SOURCE:
1427 	case MCAST_JOIN_SOURCE_GROUP:
1428 	case MCAST_LEAVE_SOURCE_GROUP:
1429 		return (ip_opt_set_multicast_sources(connp, name,
1430 		    invalp, B_FALSE, checkonly));
1431 
1432 	case IP_SEC_OPT:
1433 		mutex_enter(&connp->conn_lock);
1434 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1435 		mutex_exit(&connp->conn_lock);
1436 		if (error != 0) {
1437 			return (error);
1438 		}
1439 		/* This is an IPsec policy change - redo ip_attr_connect */
1440 		coa->coa_changed |= COA_ROUTE_CHANGED;
1441 		break;
1442 	case IP_NEXTHOP:
1443 		ixa->ixa_nexthop_v4 = addr;
1444 		if (addr != INADDR_ANY)
1445 			ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1446 		else
1447 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1448 		coa->coa_changed |= COA_ROUTE_CHANGED;
1449 		break;
1450 
1451 	case IP_BOUND_IF:
1452 		ixa->ixa_ifindex = ifindex;		/* Send */
1453 		mutex_enter(&connp->conn_lock);
1454 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1455 		connp->conn_bound_if = ifindex;		/* getsockopt */
1456 		mutex_exit(&connp->conn_lock);
1457 		coa->coa_changed |= COA_ROUTE_CHANGED;
1458 		break;
1459 	case IP_UNSPEC_SRC:
1460 		mutex_enter(&connp->conn_lock);
1461 		connp->conn_unspec_src = onoff;
1462 		if (onoff)
1463 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1464 		else
1465 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1466 
1467 		mutex_exit(&connp->conn_lock);
1468 		break;
1469 	case IP_BROADCAST_TTL:
1470 		ixa->ixa_broadcast_ttl = *invalp;
1471 		ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1472 		/* Handled automatically by ip_output */
1473 		break;
1474 	case MRT_INIT:
1475 	case MRT_DONE:
1476 	case MRT_ADD_VIF:
1477 	case MRT_DEL_VIF:
1478 	case MRT_ADD_MFC:
1479 	case MRT_DEL_MFC:
1480 	case MRT_ASSERT:
1481 		if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1482 			return (error);
1483 		}
1484 		error = ip_mrouter_set((int)name, connp, checkonly,
1485 		    (uchar_t *)invalp, inlen);
1486 		if (error) {
1487 			return (error);
1488 		}
1489 		return (0);
1490 
1491 	}
1492 	return (0);
1493 }
1494 
1495 /* Handle IPPROTO_IPV6 */
1496 static int
1497 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1498     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1499 {
1500 	conn_t		*connp = coa->coa_connp;
1501 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1502 	ip_pkt_t	*ipp = coa->coa_ipp;
1503 	int		*i1 = (int *)invalp;
1504 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1505 	uint_t		ifindex;
1506 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1507 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1508 	int		error;
1509 
1510 	if (connp->conn_family != AF_INET6)
1511 		return (EINVAL);
1512 
1513 	switch (name) {
1514 	case IPV6_MULTICAST_IF:
1515 		/*
1516 		 * The only possible error is EINVAL.
1517 		 * We call this option on both V4 and V6
1518 		 * If both fail, then this call returns
1519 		 * EINVAL. If at least one of them succeeds we
1520 		 * return success.
1521 		 */
1522 		ifindex = *(uint_t *)i1;
1523 
1524 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1525 		    !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1526 			return (EINVAL);
1527 		break;
1528 	case IPV6_UNICAST_HOPS:
1529 		/* Don't allow zero. -1 means to use default */
1530 		if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1531 			return (EINVAL);
1532 		break;
1533 	case IPV6_MULTICAST_HOPS:
1534 		/* -1 means use default */
1535 		if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1536 			return (EINVAL);
1537 		break;
1538 	case IPV6_MULTICAST_LOOP:
1539 		if (*i1 != 0 && *i1 != 1)
1540 			return (EINVAL);
1541 		break;
1542 	case IPV6_BOUND_IF:
1543 		ifindex = *(uint_t *)i1;
1544 
1545 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1546 			return (ENXIO);
1547 		break;
1548 	case IPV6_PKTINFO: {
1549 		struct in6_pktinfo *pkti;
1550 		boolean_t isv6;
1551 
1552 		if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1553 			return (EINVAL);
1554 		if (inlen == 0)
1555 			break;	/* Clear values below */
1556 
1557 		/*
1558 		 * Verify the source address and ifindex. Privileged users
1559 		 * can use any source address.
1560 		 */
1561 		pkti = (struct in6_pktinfo *)invalp;
1562 
1563 		/*
1564 		 * For link-local addresses we use the ipi6_ifindex when
1565 		 * we verify the local address.
1566 		 * If net_rawaccess then any source address can be used.
1567 		 */
1568 		if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1569 		    secpolicy_net_rawaccess(cr) != 0) {
1570 			uint_t scopeid = 0;
1571 			in6_addr_t *v6src = &pkti->ipi6_addr;
1572 			ipaddr_t v4src;
1573 			ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1574 
1575 			if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1576 				IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1577 				if (v4src != INADDR_ANY) {
1578 					laddr_type = ip_laddr_verify_v4(v4src,
1579 					    zoneid, ipst, B_FALSE);
1580 				}
1581 			} else {
1582 				if (IN6_IS_ADDR_LINKSCOPE(v6src))
1583 					scopeid = pkti->ipi6_ifindex;
1584 
1585 				laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1586 				    ipst, B_FALSE, scopeid);
1587 			}
1588 			switch (laddr_type) {
1589 			case IPVL_UNICAST_UP:
1590 			case IPVL_UNICAST_DOWN:
1591 				break;
1592 			default:
1593 				return (EADDRNOTAVAIL);
1594 			}
1595 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1596 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1597 			/* Allow any source */
1598 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1599 		}
1600 		isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1601 		if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1602 		    ipst))
1603 			return (ENXIO);
1604 		break;
1605 	}
1606 	case IPV6_HOPLIMIT:
1607 		/* It is only allowed as ancilary data */
1608 		if (!coa->coa_ancillary)
1609 			return (EINVAL);
1610 
1611 		if (inlen != 0 && inlen != sizeof (int))
1612 			return (EINVAL);
1613 		if (inlen == sizeof (int)) {
1614 			if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1615 				return (EINVAL);
1616 		}
1617 		break;
1618 	case IPV6_TCLASS:
1619 		if (inlen != 0 && inlen != sizeof (int))
1620 			return (EINVAL);
1621 		if (inlen == sizeof (int)) {
1622 			if (*i1 > 255 || *i1 < -1)
1623 				return (EINVAL);
1624 		}
1625 		break;
1626 	case IPV6_NEXTHOP:
1627 		if (inlen != 0 && inlen != sizeof (sin6_t))
1628 			return (EINVAL);
1629 		if (inlen == sizeof (sin6_t)) {
1630 			sin6_t *sin6 = (sin6_t *)invalp;
1631 			ire_t	*ire;
1632 
1633 			if (sin6->sin6_family != AF_INET6)
1634 				return (EAFNOSUPPORT);
1635 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1636 				return (EADDRNOTAVAIL);
1637 
1638 			/* Verify that the next-hop is on-link */
1639 			ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1640 			    0, 0, IRE_ONLINK, NULL, zoneid,
1641 			    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1642 			if (ire == NULL)
1643 				return (EHOSTUNREACH);
1644 			ire_refrele(ire);
1645 			break;
1646 		}
1647 		break;
1648 	case IPV6_RTHDR:
1649 	case IPV6_DSTOPTS:
1650 	case IPV6_RTHDRDSTOPTS:
1651 	case IPV6_HOPOPTS: {
1652 		/* All have the length field in the same place */
1653 		ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1654 		/*
1655 		 * Sanity checks - minimum size, size a multiple of
1656 		 * eight bytes, and matching size passed in.
1657 		 */
1658 		if (inlen != 0 &&
1659 		    inlen != (8 * (hopts->ip6h_len + 1)))
1660 			return (EINVAL);
1661 		break;
1662 	}
1663 	case IPV6_PATHMTU:
1664 		/* Can't be set */
1665 		return (EINVAL);
1666 
1667 	case IPV6_USE_MIN_MTU:
1668 		if (inlen != sizeof (int))
1669 			return (EINVAL);
1670 		if (*i1 < -1 || *i1 > 1)
1671 			return (EINVAL);
1672 		break;
1673 	case IPV6_SRC_PREFERENCES:
1674 		if (inlen != sizeof (uint32_t))
1675 			return (EINVAL);
1676 		break;
1677 	case IPV6_V6ONLY:
1678 		if (*i1 < 0 || *i1 > 1) {
1679 			return (EINVAL);
1680 		}
1681 		break;
1682 	}
1683 	if (checkonly)
1684 		return (0);
1685 
1686 	/* Here we set the actual option value */
1687 	/*
1688 	 * conn_lock protects the bitfields, and is used to
1689 	 * set the fields atomically. Not needed for ixa settings since
1690 	 * the caller has an exclusive copy of the ixa.
1691 	 * We can not hold conn_lock across the multicast options though.
1692 	 */
1693 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1694 	switch (name) {
1695 	case IPV6_MULTICAST_IF:
1696 		ixa->ixa_multicast_ifindex = ifindex;
1697 		/* Need to redo ip_attr_connect */
1698 		coa->coa_changed |= COA_ROUTE_CHANGED;
1699 		break;
1700 	case IPV6_UNICAST_HOPS:
1701 		/* -1 means use default */
1702 		mutex_enter(&connp->conn_lock);
1703 		if (*i1 == -1) {
1704 			ipp->ipp_unicast_hops = connp->conn_default_ttl;
1705 		} else {
1706 			ipp->ipp_unicast_hops = (uint8_t)*i1;
1707 		}
1708 		mutex_exit(&connp->conn_lock);
1709 		coa->coa_changed |= COA_HEADER_CHANGED;
1710 		break;
1711 	case IPV6_MULTICAST_HOPS:
1712 		/* -1 means use default */
1713 		if (*i1 == -1) {
1714 			ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1715 		} else {
1716 			ixa->ixa_multicast_ttl = (uint8_t)*i1;
1717 		}
1718 		/* Handled automatically by ip_output */
1719 		break;
1720 	case IPV6_MULTICAST_LOOP:
1721 		if (*i1 != 0)
1722 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1723 		else
1724 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1725 		/* Handled automatically by ip_output */
1726 		break;
1727 	case IPV6_JOIN_GROUP:
1728 	case IPV6_LEAVE_GROUP:
1729 	case MCAST_JOIN_GROUP:
1730 	case MCAST_LEAVE_GROUP:
1731 		return (ip_opt_set_multicast_group(connp, name,
1732 		    invalp, B_TRUE, checkonly));
1733 
1734 	case MCAST_BLOCK_SOURCE:
1735 	case MCAST_UNBLOCK_SOURCE:
1736 	case MCAST_JOIN_SOURCE_GROUP:
1737 	case MCAST_LEAVE_SOURCE_GROUP:
1738 		return (ip_opt_set_multicast_sources(connp, name,
1739 		    invalp, B_TRUE, checkonly));
1740 
1741 	case IPV6_BOUND_IF:
1742 		ixa->ixa_ifindex = ifindex;		/* Send */
1743 		mutex_enter(&connp->conn_lock);
1744 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1745 		connp->conn_bound_if = ifindex;		/* getsockopt */
1746 		mutex_exit(&connp->conn_lock);
1747 		coa->coa_changed |= COA_ROUTE_CHANGED;
1748 		break;
1749 	case IPV6_UNSPEC_SRC:
1750 		mutex_enter(&connp->conn_lock);
1751 		connp->conn_unspec_src = onoff;
1752 		if (onoff)
1753 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1754 		else
1755 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1756 		mutex_exit(&connp->conn_lock);
1757 		break;
1758 	case IPV6_RECVPKTINFO:
1759 		mutex_enter(&connp->conn_lock);
1760 		connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1761 		mutex_exit(&connp->conn_lock);
1762 		break;
1763 	case IPV6_RECVTCLASS:
1764 		mutex_enter(&connp->conn_lock);
1765 		connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1766 		mutex_exit(&connp->conn_lock);
1767 		break;
1768 	case IPV6_RECVPATHMTU:
1769 		mutex_enter(&connp->conn_lock);
1770 		connp->conn_ipv6_recvpathmtu = onoff;
1771 		mutex_exit(&connp->conn_lock);
1772 		break;
1773 	case IPV6_RECVHOPLIMIT:
1774 		mutex_enter(&connp->conn_lock);
1775 		connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1776 		    onoff;
1777 		mutex_exit(&connp->conn_lock);
1778 		break;
1779 	case IPV6_RECVHOPOPTS:
1780 		mutex_enter(&connp->conn_lock);
1781 		connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1782 		mutex_exit(&connp->conn_lock);
1783 		break;
1784 	case IPV6_RECVDSTOPTS:
1785 		mutex_enter(&connp->conn_lock);
1786 		connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1787 		mutex_exit(&connp->conn_lock);
1788 		break;
1789 	case _OLD_IPV6_RECVDSTOPTS:
1790 		mutex_enter(&connp->conn_lock);
1791 		connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1792 		    onoff;
1793 		mutex_exit(&connp->conn_lock);
1794 		break;
1795 	case IPV6_RECVRTHDRDSTOPTS:
1796 		mutex_enter(&connp->conn_lock);
1797 		connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1798 		    onoff;
1799 		mutex_exit(&connp->conn_lock);
1800 		break;
1801 	case IPV6_RECVRTHDR:
1802 		mutex_enter(&connp->conn_lock);
1803 		connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1804 		mutex_exit(&connp->conn_lock);
1805 		break;
1806 	case IPV6_PKTINFO:
1807 		mutex_enter(&connp->conn_lock);
1808 		if (inlen == 0) {
1809 			ipp->ipp_fields &= ~IPPF_ADDR;
1810 			ipp->ipp_addr = ipv6_all_zeros;
1811 			ixa->ixa_ifindex = 0;
1812 		} else {
1813 			struct in6_pktinfo *pkti;
1814 
1815 			pkti = (struct in6_pktinfo *)invalp;
1816 			ipp->ipp_addr = pkti->ipi6_addr;
1817 			if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1818 				ipp->ipp_fields |= IPPF_ADDR;
1819 			else
1820 				ipp->ipp_fields &= ~IPPF_ADDR;
1821 			ixa->ixa_ifindex = pkti->ipi6_ifindex;
1822 		}
1823 		mutex_exit(&connp->conn_lock);
1824 		/* Source and ifindex might have changed */
1825 		coa->coa_changed |= COA_HEADER_CHANGED;
1826 		coa->coa_changed |= COA_ROUTE_CHANGED;
1827 		break;
1828 	case IPV6_HOPLIMIT:
1829 		mutex_enter(&connp->conn_lock);
1830 		if (inlen == 0 || *i1 == -1) {
1831 			/* Revert to default */
1832 			ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1833 			ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1834 		} else {
1835 			ipp->ipp_hoplimit = *i1;
1836 			ipp->ipp_fields |= IPPF_HOPLIMIT;
1837 			/* Ensure that it sticks for multicast packets */
1838 			ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1839 		}
1840 		mutex_exit(&connp->conn_lock);
1841 		coa->coa_changed |= COA_HEADER_CHANGED;
1842 		break;
1843 	case IPV6_TCLASS:
1844 		/*
1845 		 * IPV6_TCLASS accepts -1 as use kernel default
1846 		 * and [0, 255] as the actualy traffic class.
1847 		 */
1848 		mutex_enter(&connp->conn_lock);
1849 		if (inlen == 0 || *i1 == -1) {
1850 			ipp->ipp_tclass = 0;
1851 			ipp->ipp_fields &= ~IPPF_TCLASS;
1852 		} else {
1853 			ipp->ipp_tclass = *i1;
1854 			ipp->ipp_fields |= IPPF_TCLASS;
1855 		}
1856 		mutex_exit(&connp->conn_lock);
1857 		coa->coa_changed |= COA_HEADER_CHANGED;
1858 		break;
1859 	case IPV6_NEXTHOP:
1860 		if (inlen == 0) {
1861 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1862 		} else {
1863 			sin6_t *sin6 = (sin6_t *)invalp;
1864 
1865 			ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1866 			if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1867 				ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1868 			else
1869 				ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1870 		}
1871 		coa->coa_changed |= COA_ROUTE_CHANGED;
1872 		break;
1873 	case IPV6_HOPOPTS:
1874 		mutex_enter(&connp->conn_lock);
1875 		error = optcom_pkt_set(invalp, inlen,
1876 		    (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1877 		if (error != 0) {
1878 			mutex_exit(&connp->conn_lock);
1879 			return (error);
1880 		}
1881 		if (ipp->ipp_hopoptslen == 0) {
1882 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1883 		} else {
1884 			ipp->ipp_fields |= IPPF_HOPOPTS;
1885 		}
1886 		mutex_exit(&connp->conn_lock);
1887 		coa->coa_changed |= COA_HEADER_CHANGED;
1888 		coa->coa_changed |= COA_WROFF_CHANGED;
1889 		break;
1890 	case IPV6_RTHDRDSTOPTS:
1891 		mutex_enter(&connp->conn_lock);
1892 		error = optcom_pkt_set(invalp, inlen,
1893 		    (uchar_t **)&ipp->ipp_rthdrdstopts,
1894 		    &ipp->ipp_rthdrdstoptslen);
1895 		if (error != 0) {
1896 			mutex_exit(&connp->conn_lock);
1897 			return (error);
1898 		}
1899 		if (ipp->ipp_rthdrdstoptslen == 0) {
1900 			ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1901 		} else {
1902 			ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1903 		}
1904 		mutex_exit(&connp->conn_lock);
1905 		coa->coa_changed |= COA_HEADER_CHANGED;
1906 		coa->coa_changed |= COA_WROFF_CHANGED;
1907 		break;
1908 	case IPV6_DSTOPTS:
1909 		mutex_enter(&connp->conn_lock);
1910 		error = optcom_pkt_set(invalp, inlen,
1911 		    (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1912 		if (error != 0) {
1913 			mutex_exit(&connp->conn_lock);
1914 			return (error);
1915 		}
1916 		if (ipp->ipp_dstoptslen == 0) {
1917 			ipp->ipp_fields &= ~IPPF_DSTOPTS;
1918 		} else {
1919 			ipp->ipp_fields |= IPPF_DSTOPTS;
1920 		}
1921 		mutex_exit(&connp->conn_lock);
1922 		coa->coa_changed |= COA_HEADER_CHANGED;
1923 		coa->coa_changed |= COA_WROFF_CHANGED;
1924 		break;
1925 	case IPV6_RTHDR:
1926 		mutex_enter(&connp->conn_lock);
1927 		error = optcom_pkt_set(invalp, inlen,
1928 		    (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1929 		if (error != 0) {
1930 			mutex_exit(&connp->conn_lock);
1931 			return (error);
1932 		}
1933 		if (ipp->ipp_rthdrlen == 0) {
1934 			ipp->ipp_fields &= ~IPPF_RTHDR;
1935 		} else {
1936 			ipp->ipp_fields |= IPPF_RTHDR;
1937 		}
1938 		mutex_exit(&connp->conn_lock);
1939 		coa->coa_changed |= COA_HEADER_CHANGED;
1940 		coa->coa_changed |= COA_WROFF_CHANGED;
1941 		break;
1942 
1943 	case IPV6_DONTFRAG:
1944 		if (onoff) {
1945 			ixa->ixa_flags |= IXAF_DONTFRAG;
1946 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1947 		} else {
1948 			ixa->ixa_flags &= ~IXAF_DONTFRAG;
1949 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1950 		}
1951 		/* Need to redo ip_attr_connect */
1952 		coa->coa_changed |= COA_ROUTE_CHANGED;
1953 		break;
1954 
1955 	case IPV6_USE_MIN_MTU:
1956 		ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1957 		ixa->ixa_use_min_mtu = *i1;
1958 		/* Need to redo ip_attr_connect */
1959 		coa->coa_changed |= COA_ROUTE_CHANGED;
1960 		break;
1961 
1962 	case IPV6_SEC_OPT:
1963 		mutex_enter(&connp->conn_lock);
1964 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1965 		mutex_exit(&connp->conn_lock);
1966 		if (error != 0) {
1967 			return (error);
1968 		}
1969 		/* This is an IPsec policy change - redo ip_attr_connect */
1970 		coa->coa_changed |= COA_ROUTE_CHANGED;
1971 		break;
1972 	case IPV6_SRC_PREFERENCES:
1973 		/*
1974 		 * This socket option only affects connected
1975 		 * sockets that haven't already bound to a specific
1976 		 * IPv6 address.  In other words, sockets that
1977 		 * don't call bind() with an address other than the
1978 		 * unspecified address and that call connect().
1979 		 * ip_set_destination_v6() passes these preferences
1980 		 * to the ipif_select_source_v6() function.
1981 		 */
1982 		mutex_enter(&connp->conn_lock);
1983 		error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1984 		mutex_exit(&connp->conn_lock);
1985 		if (error != 0) {
1986 			return (error);
1987 		}
1988 		break;
1989 	case IPV6_V6ONLY:
1990 		mutex_enter(&connp->conn_lock);
1991 		connp->conn_ipv6_v6only = onoff;
1992 		mutex_exit(&connp->conn_lock);
1993 		break;
1994 	}
1995 	return (0);
1996 }
1997 
1998 /* Handle IPPROTO_UDP */
1999 /* ARGSUSED1 */
2000 static int
2001 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2002     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2003 {
2004 	conn_t		*connp = coa->coa_connp;
2005 	int		*i1 = (int *)invalp;
2006 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2007 	int		error;
2008 
2009 	switch (name) {
2010 	case UDP_ANONPRIVBIND:
2011 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2012 			return (error);
2013 		}
2014 		break;
2015 	}
2016 	if (checkonly)
2017 		return (0);
2018 
2019 	/* Here we set the actual option value */
2020 	mutex_enter(&connp->conn_lock);
2021 	switch (name) {
2022 	case UDP_ANONPRIVBIND:
2023 		connp->conn_anon_priv_bind = onoff;
2024 		break;
2025 	case UDP_EXCLBIND:
2026 		connp->conn_exclbind = onoff;
2027 		break;
2028 	}
2029 	mutex_exit(&connp->conn_lock);
2030 	return (0);
2031 }
2032 
2033 /* Handle IPPROTO_TCP */
2034 /* ARGSUSED1 */
2035 static int
2036 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2037     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2038 {
2039 	conn_t		*connp = coa->coa_connp;
2040 	int		*i1 = (int *)invalp;
2041 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2042 	int		error;
2043 
2044 	switch (name) {
2045 	case TCP_ANONPRIVBIND:
2046 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2047 			return (error);
2048 		}
2049 		break;
2050 	}
2051 	if (checkonly)
2052 		return (0);
2053 
2054 	/* Here we set the actual option value */
2055 	mutex_enter(&connp->conn_lock);
2056 	switch (name) {
2057 	case TCP_ANONPRIVBIND:
2058 		connp->conn_anon_priv_bind = onoff;
2059 		break;
2060 	case TCP_EXCLBIND:
2061 		connp->conn_exclbind = onoff;
2062 		break;
2063 	case TCP_RECVDSTADDR:
2064 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2065 		break;
2066 	}
2067 	mutex_exit(&connp->conn_lock);
2068 	return (0);
2069 }
2070 
2071 int
2072 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2073 {
2074 	sin_t		*sin;
2075 	sin6_t		*sin6;
2076 
2077 	if (connp->conn_family == AF_INET) {
2078 		if (*salenp < sizeof (sin_t))
2079 			return (EINVAL);
2080 
2081 		*salenp = sizeof (sin_t);
2082 		/* Fill zeroes and then initialize non-zero fields */
2083 		sin = (sin_t *)sa;
2084 		*sin = sin_null;
2085 		sin->sin_family = AF_INET;
2086 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2087 		    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2088 			sin->sin_addr.s_addr = connp->conn_saddr_v4;
2089 		} else {
2090 			/*
2091 			 * INADDR_ANY
2092 			 * conn_saddr is not set, we might be bound to
2093 			 * broadcast/multicast. Use conn_bound_addr as
2094 			 * local address instead (that could
2095 			 * also still be INADDR_ANY)
2096 			 */
2097 			sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2098 		}
2099 		sin->sin_port = connp->conn_lport;
2100 	} else {
2101 		if (*salenp < sizeof (sin6_t))
2102 			return (EINVAL);
2103 
2104 		*salenp = sizeof (sin6_t);
2105 		/* Fill zeroes and then initialize non-zero fields */
2106 		sin6 = (sin6_t *)sa;
2107 		*sin6 = sin6_null;
2108 		sin6->sin6_family = AF_INET6;
2109 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2110 			sin6->sin6_addr = connp->conn_saddr_v6;
2111 		} else {
2112 			/*
2113 			 * conn_saddr is not set, we might be bound to
2114 			 * broadcast/multicast. Use conn_bound_addr as
2115 			 * local address instead (which could
2116 			 * also still be unspecified)
2117 			 */
2118 			sin6->sin6_addr = connp->conn_bound_addr_v6;
2119 		}
2120 		sin6->sin6_port = connp->conn_lport;
2121 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2122 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2123 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2124 	}
2125 	return (0);
2126 }
2127 
2128 int
2129 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2130 {
2131 	struct sockaddr_in	*sin;
2132 	struct sockaddr_in6	*sin6;
2133 
2134 	if (connp->conn_family == AF_INET) {
2135 		if (*salenp < sizeof (sin_t))
2136 			return (EINVAL);
2137 
2138 		*salenp = sizeof (sin_t);
2139 		/* initialize */
2140 		sin = (sin_t *)sa;
2141 		*sin = sin_null;
2142 		sin->sin_family = AF_INET;
2143 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
2144 		sin->sin_port = connp->conn_fport;
2145 	} else {
2146 		if (*salenp < sizeof (sin6_t))
2147 			return (EINVAL);
2148 
2149 		*salenp = sizeof (sin6_t);
2150 		/* initialize */
2151 		sin6 = (sin6_t *)sa;
2152 		*sin6 = sin6_null;
2153 		sin6->sin6_family = AF_INET6;
2154 		sin6->sin6_addr = connp->conn_faddr_v6;
2155 		sin6->sin6_port =  connp->conn_fport;
2156 		sin6->sin6_flowinfo = connp->conn_flowinfo;
2157 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2158 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2159 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2160 	}
2161 	return (0);
2162 }
2163 
2164 static uint32_t	cksum_massage_options_v4(ipha_t *, netstack_t *);
2165 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2166 
2167 /*
2168  * Allocate and fill in conn_ht_iphc based on the current information
2169  * in the conn.
2170  * Normally used when we bind() and connect().
2171  * Returns failure if can't allocate memory, or if there is a problem
2172  * with a routing header/option.
2173  *
2174  * We allocate space for the transport header (ulp_hdr_len + extra) and
2175  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2176  * The extra is there for transports that want some spare room for future
2177  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2178  * excludes the extra part.
2179  *
2180  * We massage an routing option/header and store the ckecksum difference
2181  * in conn_sum.
2182  *
2183  * Caller needs to update conn_wroff if desired.
2184  */
2185 int
2186 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2187     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2188 {
2189 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2190 	ip_pkt_t	*ipp = &connp->conn_xmit_ipp;
2191 	uint_t		ip_hdr_length;
2192 	uchar_t		*hdrs;
2193 	uint_t		hdrs_len;
2194 
2195 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2196 
2197 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2198 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2199 		/* In case of TX label and IP options it can be too much */
2200 		if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2201 			/* Preserves existing TX errno for this */
2202 			return (EHOSTUNREACH);
2203 		}
2204 	} else {
2205 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2206 	}
2207 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2208 	hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2209 	ASSERT(hdrs_len != 0);
2210 
2211 	if (hdrs_len != connp->conn_ht_iphc_allocated) {
2212 		/* Allocate new before we free any old */
2213 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2214 		if (hdrs == NULL)
2215 			return (ENOMEM);
2216 
2217 		if (connp->conn_ht_iphc != NULL) {
2218 			kmem_free(connp->conn_ht_iphc,
2219 			    connp->conn_ht_iphc_allocated);
2220 		}
2221 		connp->conn_ht_iphc = hdrs;
2222 		connp->conn_ht_iphc_allocated = hdrs_len;
2223 	} else {
2224 		hdrs = connp->conn_ht_iphc;
2225 	}
2226 	hdrs_len -= extra;
2227 	connp->conn_ht_iphc_len = hdrs_len;
2228 
2229 	connp->conn_ht_ulp = hdrs + ip_hdr_length;
2230 	connp->conn_ht_ulp_len = ulp_hdr_length;
2231 
2232 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2233 		ipha_t	*ipha = (ipha_t *)hdrs;
2234 
2235 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2236 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2237 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2238 		ipha->ipha_length = htons(hdrs_len);
2239 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2240 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2241 		else
2242 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2243 
2244 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2245 			connp->conn_sum = cksum_massage_options_v4(ipha,
2246 			    connp->conn_netstack);
2247 		} else {
2248 			connp->conn_sum = 0;
2249 		}
2250 	} else {
2251 		ip6_t	*ip6h = (ip6_t *)hdrs;
2252 
2253 		ip6h->ip6_src = *v6src;
2254 		ip6h->ip6_dst = *v6dst;
2255 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2256 		    flowinfo);
2257 		ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2258 
2259 		if (ipp->ipp_fields & IPPF_RTHDR) {
2260 			connp->conn_sum = cksum_massage_options_v6(ip6h,
2261 			    ip_hdr_length, connp->conn_netstack);
2262 
2263 			/*
2264 			 * Verify that the first hop isn't a mapped address.
2265 			 * Routers along the path need to do this verification
2266 			 * for subsequent hops.
2267 			 */
2268 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2269 				return (EADDRNOTAVAIL);
2270 
2271 		} else {
2272 			connp->conn_sum = 0;
2273 		}
2274 	}
2275 	return (0);
2276 }
2277 
2278 /*
2279  * Prepend a header template to data_mp based on the ip_pkt_t
2280  * and the passed in source, destination and protocol.
2281  *
2282  * Returns failure if can't allocate memory, in which case data_mp is freed.
2283  * We allocate space for the transport header (ulp_hdr_len) and
2284  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2285  *
2286  * We massage an routing option/header and return the ckecksum difference
2287  * in *sump. This is in host byte order.
2288  *
2289  * Caller needs to update conn_wroff if desired.
2290  */
2291 mblk_t *
2292 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2293     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2294     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2295     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2296 {
2297 	uint_t		ip_hdr_length;
2298 	uchar_t		*hdrs;
2299 	uint_t		hdrs_len;
2300 	mblk_t		*mp;
2301 
2302 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2303 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2304 		ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2305 	} else {
2306 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2307 	}
2308 	hdrs_len = ip_hdr_length + ulp_hdr_length;
2309 	ASSERT(hdrs_len != 0);
2310 
2311 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2312 
2313 	/* Can we prepend to data_mp? */
2314 	if (data_mp != NULL &&
2315 	    data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2316 	    data_mp->b_datap->db_ref == 1) {
2317 		hdrs = data_mp->b_rptr - hdrs_len;
2318 		data_mp->b_rptr = hdrs;
2319 		mp = data_mp;
2320 	} else {
2321 		mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2322 		if (mp == NULL) {
2323 			freemsg(data_mp);
2324 			*errorp = ENOMEM;
2325 			return (NULL);
2326 		}
2327 		mp->b_wptr = mp->b_datap->db_lim;
2328 		hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2329 		mp->b_cont = data_mp;
2330 	}
2331 
2332 	/*
2333 	 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2334 	 * if PKTINFO (aka IPPF_ADDR) was set.
2335 	 */
2336 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2337 		ipha_t *ipha = (ipha_t *)hdrs;
2338 
2339 		ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2340 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2341 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2342 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2343 		ipha->ipha_length = htons(hdrs_len + data_length);
2344 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2345 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2346 		else
2347 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2348 
2349 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2350 			*sump = cksum_massage_options_v4(ipha,
2351 			    ixa->ixa_ipst->ips_netstack);
2352 		} else {
2353 			*sump = 0;
2354 		}
2355 	} else {
2356 		ip6_t *ip6h = (ip6_t *)hdrs;
2357 
2358 		ip6h->ip6_src = *v6src;
2359 		ip6h->ip6_dst = *v6dst;
2360 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2361 		ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2362 
2363 		if (ipp->ipp_fields & IPPF_RTHDR) {
2364 			*sump = cksum_massage_options_v6(ip6h,
2365 			    ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2366 
2367 			/*
2368 			 * Verify that the first hop isn't a mapped address.
2369 			 * Routers along the path need to do this verification
2370 			 * for subsequent hops.
2371 			 */
2372 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2373 				*errorp = EADDRNOTAVAIL;
2374 				freemsg(mp);
2375 				return (NULL);
2376 			}
2377 		} else {
2378 			*sump = 0;
2379 		}
2380 	}
2381 	return (mp);
2382 }
2383 
2384 /*
2385  * Massage a source route if any putting the first hop
2386  * in ipha_dst. Compute a starting value for the checksum which
2387  * takes into account that the original ipha_dst should be
2388  * included in the checksum but that IP will include the
2389  * first hop from the source route in the tcp checksum.
2390  */
2391 static uint32_t
2392 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2393 {
2394 	in_addr_t	dst;
2395 	uint32_t	cksum;
2396 
2397 	/* Get last hop then diff against first hop */
2398 	cksum = ip_massage_options(ipha, ns);
2399 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2400 	dst = ipha->ipha_dst;
2401 	cksum -= ((dst >> 16) + (dst & 0xffff));
2402 	if ((int)cksum < 0)
2403 		cksum--;
2404 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2406 	ASSERT(cksum < 0x10000);
2407 	return (ntohs(cksum));
2408 }
2409 
2410 static uint32_t
2411 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2412 {
2413 	uint8_t		*end;
2414 	ip6_rthdr_t	*rth;
2415 	uint32_t	cksum;
2416 
2417 	end = (uint8_t *)ip6h + ip_hdr_len;
2418 	rth = ip_find_rthdr_v6(ip6h, end);
2419 	if (rth == NULL)
2420 		return (0);
2421 
2422 	cksum = ip_massage_options_v6(ip6h, rth, ns);
2423 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2424 	ASSERT(cksum < 0x10000);
2425 	return (ntohs(cksum));
2426 }
2427 
2428 /*
2429  * ULPs that change the destination address need to call this for each
2430  * change to discard any state about a previous destination that might
2431  * have been multicast or multirt.
2432  */
2433 void
2434 ip_attr_newdst(ip_xmit_attr_t *ixa)
2435 {
2436 	ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2437 	    IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2438 	    IXAF_NO_LOOP_ZONEID_SET);
2439 }
2440 
2441 /*
2442  * Determine the nexthop which will be used.
2443  * Normally this is just the destination, but if a IPv4 source route, or
2444  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2445  * there.
2446  */
2447 void
2448 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2449     const in6_addr_t *dst, in6_addr_t *nexthop)
2450 {
2451 	if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2452 		*nexthop = *dst;
2453 		return;
2454 	}
2455 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2456 		ipaddr_t v4dst;
2457 		ipaddr_t v4nexthop;
2458 
2459 		IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2460 		v4nexthop = ip_pkt_source_route_v4(ipp);
2461 		if (v4nexthop == INADDR_ANY)
2462 			v4nexthop = v4dst;
2463 
2464 		IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2465 	} else {
2466 		const in6_addr_t *v6nexthop;
2467 
2468 		v6nexthop = ip_pkt_source_route_v6(ipp);
2469 		if (v6nexthop == NULL)
2470 			v6nexthop = dst;
2471 
2472 		*nexthop = *v6nexthop;
2473 	}
2474 }
2475 
2476 /*
2477  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2478  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2479  * case (connected latching is done in conn_connect).
2480  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2481  * set, but doesn't otherwise use the conn_t.
2482  *
2483  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2484  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2485  *
2486  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2487  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2488  *
2489  * Updates laddrp and uinfo if they are non-NULL.
2490  *
2491  * TSOL notes: The callers if ip_attr_connect must check if the destination
2492  * is different than before and in that case redo conn_update_label.
2493  * The callers of conn_connect do not need that since conn_connect
2494  * performs the conn_update_label.
2495  */
2496 int
2497 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2498     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2499     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2500     iulp_t *uinfo, uint32_t flags)
2501 {
2502 	in6_addr_t		laddr = *v6src;
2503 	int			error;
2504 
2505 	ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2506 
2507 	if (connp->conn_zone_is_global)
2508 		flags |= IPDF_ZONE_IS_GLOBAL;
2509 	else
2510 		flags &= ~IPDF_ZONE_IS_GLOBAL;
2511 
2512 	/*
2513 	 * Lookup the route to determine a source address and the uinfo.
2514 	 * If the ULP has a source route option then the caller will
2515 	 * have set v6nexthop to be the first hop.
2516 	 */
2517 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2518 		ipaddr_t v4dst;
2519 		ipaddr_t v4src, v4nexthop;
2520 
2521 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2522 		IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2523 		IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2524 
2525 		if (connp->conn_unspec_src || v4src != INADDR_ANY)
2526 			flags &= ~IPDF_SELECT_SRC;
2527 		else
2528 			flags |= IPDF_SELECT_SRC;
2529 
2530 		error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2531 		    uinfo, flags, connp->conn_mac_mode);
2532 		IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2533 	} else {
2534 		if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2535 			flags &= ~IPDF_SELECT_SRC;
2536 		else
2537 			flags |= IPDF_SELECT_SRC;
2538 
2539 		error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2540 		    uinfo, flags, connp->conn_mac_mode);
2541 	}
2542 	/* Pass out some address even if we hit a RTF_REJECT etc */
2543 	if (laddrp != NULL)
2544 		*laddrp = laddr;
2545 
2546 	if (error != 0)
2547 		return (error);
2548 
2549 	if (flags & IPDF_IPSEC) {
2550 		/*
2551 		 * Set any IPsec policy in ixa. Routine also looks at ULP
2552 		 * ports.
2553 		 */
2554 		ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2555 	}
2556 	return (0);
2557 }
2558 
2559 /*
2560  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2561  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2562  * usable for SCTP, since SCTP has multiple faddrs.
2563  *
2564  * Caller must hold conn_lock to provide atomic constency between the
2565  * conn_t's addresses and the ixa.
2566  * NOTE: this function drops and reaquires conn_lock since it can't be
2567  * held across ip_attr_connect/ip_set_destination.
2568  *
2569  * The caller needs to handle inserting in the receive-side fanout when
2570  * appropriate after conn_connect returns.
2571  */
2572 int
2573 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2574 {
2575 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2576 	in6_addr_t	nexthop;
2577 	in6_addr_t	saddr, faddr;
2578 	in_port_t	fport;
2579 	int		error;
2580 
2581 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2582 
2583 	if (connp->conn_ipversion == IPV4_VERSION)
2584 		ixa->ixa_flags |= IXAF_IS_IPV4;
2585 	else
2586 		ixa->ixa_flags &= ~IXAF_IS_IPV4;
2587 
2588 	/* We do IPsec latching below - hence no caching in ip_attr_connect */
2589 	flags &= ~IPDF_IPSEC;
2590 
2591 	/* In case we had previously done an ip_attr_connect */
2592 	ip_attr_newdst(ixa);
2593 
2594 	/*
2595 	 * Determine the nexthop and copy the addresses before dropping
2596 	 * conn_lock.
2597 	 */
2598 	ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2599 	    &connp->conn_faddr_v6, &nexthop);
2600 	saddr = connp->conn_saddr_v6;
2601 	faddr = connp->conn_faddr_v6;
2602 	fport = connp->conn_fport;
2603 
2604 	mutex_exit(&connp->conn_lock);
2605 	error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2606 	    &saddr, uinfo, flags | IPDF_VERIFY_DST);
2607 	mutex_enter(&connp->conn_lock);
2608 
2609 	/* Could have changed even if an error */
2610 	connp->conn_saddr_v6 = saddr;
2611 	if (error != 0)
2612 		return (error);
2613 
2614 	/*
2615 	 * Check whether Trusted Solaris policy allows communication with this
2616 	 * host, and pretend that the destination is unreachable if not.
2617 	 * Compute any needed label and place it in ipp_label_v4/v6.
2618 	 *
2619 	 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2620 	 * the packet.
2621 	 *
2622 	 * TSOL Note: Any concurrent threads would pick a different ixa
2623 	 * (and ipp if they are to change the ipp)  so we
2624 	 * don't have to worry about concurrent threads.
2625 	 */
2626 	if (is_system_labeled()) {
2627 		if (connp->conn_mlp_type != mlptSingle)
2628 			return (ECONNREFUSED);
2629 
2630 		/*
2631 		 * conn_update_label will set ipp_label* which will later
2632 		 * be used by conn_build_hdr_template.
2633 		 */
2634 		error = conn_update_label(connp, ixa,
2635 		    &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2636 		if (error != 0)
2637 			return (error);
2638 	}
2639 
2640 	/*
2641 	 * Ensure that we match on the selected local address.
2642 	 * This overrides conn_laddr in the case we had earlier bound to a
2643 	 * multicast or broadcast address.
2644 	 */
2645 	connp->conn_laddr_v6 = connp->conn_saddr_v6;
2646 
2647 	/*
2648 	 * Allow setting new policies.
2649 	 * The addresses/ports are already set, thus the IPsec policy calls
2650 	 * can handle their passed-in conn's.
2651 	 */
2652 	connp->conn_policy_cached = B_FALSE;
2653 
2654 	/*
2655 	 * Cache IPsec policy in this conn.  If we have per-socket policy,
2656 	 * we'll cache that.  If we don't, we'll inherit global policy.
2657 	 *
2658 	 * This is done before the caller inserts in the receive-side fanout.
2659 	 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2660 	 * for connections where we don't have a policy. This is to prevent
2661 	 * global policy lookups in the inbound path.
2662 	 *
2663 	 * If we insert before we set conn_policy_cached,
2664 	 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2665 	 * because global policy cound be non-empty. We normally call
2666 	 * ipsec_check_policy() for conn_policy_cached connections only if
2667 	 * conn_in_enforce_policy is set. But in this case,
2668 	 * conn_policy_cached can get set anytime since we made the
2669 	 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2670 	 * called, which will make the above assumption false.  Thus, we
2671 	 * need to insert after we set conn_policy_cached.
2672 	 */
2673 	error = ipsec_conn_cache_policy(connp,
2674 	    connp->conn_ipversion == IPV4_VERSION);
2675 	if (error != 0)
2676 		return (error);
2677 
2678 	/*
2679 	 * We defer to do LSO check until here since now we have better idea
2680 	 * whether IPsec is present. If the underlying ill is LSO capable,
2681 	 * copy its capability in so the ULP can decide whether to enable LSO
2682 	 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2683 	 * claim LSO for IPv6.
2684 	 *
2685 	 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2686 	 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2687 	 */
2688 	ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2689 
2690 	ASSERT(ixa->ixa_ire != NULL);
2691 	if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2692 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2693 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2694 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2695 	    (ixa->ixa_nce != NULL) &&
2696 	    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2697 	    ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2698 	    ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2699 		ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2700 		ixa->ixa_flags |= IXAF_LSO_CAPAB;
2701 	}
2702 
2703 	/* Check whether ZEROCOPY capability is usable for this connection. */
2704 	ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2705 
2706 	if ((flags & IPDF_ZCOPY) &&
2707 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2708 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2709 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2710 	    (ixa->ixa_nce != NULL) &&
2711 	    ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2712 		ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2713 	}
2714 	return (0);
2715 }
2716 
2717 /*
2718  * Predicates to check if the addresses match conn_last*
2719  */
2720 
2721 /*
2722  * Compare the conn against an address.
2723  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2724  */
2725 boolean_t
2726 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2727 {
2728 	ASSERT(connp->conn_family == AF_INET);
2729 	return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2730 	    sin->sin_port == connp->conn_lastdstport);
2731 }
2732 
2733 /*
2734  * Compare, including for mapped addresses
2735  */
2736 boolean_t
2737 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2738 {
2739 	return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2740 	    sin6->sin6_port == connp->conn_lastdstport &&
2741 	    sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2742 	    sin6->sin6_scope_id == connp->conn_lastscopeid);
2743 }
2744 
2745 /*
2746  * Compute a label and place it in the ip_packet_t.
2747  * Handles IPv4 and IPv6.
2748  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2749  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2750  * has been called.
2751  */
2752 int
2753 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2754     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2755 {
2756 	int		err;
2757 	ipaddr_t	v4dst;
2758 
2759 	if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2760 		uchar_t		opt_storage[IP_MAX_OPT_LENGTH];
2761 
2762 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2763 
2764 		err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2765 		    v4dst, opt_storage, ixa->ixa_ipst);
2766 		if (err == 0) {
2767 			/* Length contained in opt_storage[IPOPT_OLEN] */
2768 			err = optcom_pkt_set(opt_storage,
2769 			    opt_storage[IPOPT_OLEN],
2770 			    (uchar_t **)&ipp->ipp_label_v4,
2771 			    &ipp->ipp_label_len_v4);
2772 		}
2773 		if (err != 0) {
2774 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2775 			    char *, "conn(1) failed to update options(2) "
2776 			    "on ixa(3)",
2777 			    conn_t *, connp, char *, opt_storage,
2778 			    ip_xmit_attr_t *, ixa);
2779 		}
2780 		if (ipp->ipp_label_len_v4 != 0)
2781 			ipp->ipp_fields |= IPPF_LABEL_V4;
2782 		else
2783 			ipp->ipp_fields &= ~IPPF_LABEL_V4;
2784 	} else {
2785 		uchar_t		opt_storage[TSOL_MAX_IPV6_OPTION];
2786 		uint_t		optlen;
2787 
2788 		err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2789 		    v6dst, opt_storage, ixa->ixa_ipst);
2790 		if (err == 0) {
2791 			/*
2792 			 * Note that ipp_label_v6 is just the option - not
2793 			 * the hopopts extension header.
2794 			 *
2795 			 * Length contained in opt_storage[IPOPT_OLEN], but
2796 			 * that doesn't include the two byte options header.
2797 			 */
2798 			optlen = opt_storage[IPOPT_OLEN];
2799 			if (optlen != 0)
2800 				optlen += 2;
2801 
2802 			err = optcom_pkt_set(opt_storage, optlen,
2803 			    (uchar_t **)&ipp->ipp_label_v6,
2804 			    &ipp->ipp_label_len_v6);
2805 		}
2806 		if (err != 0) {
2807 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2808 			    char *, "conn(1) failed to update options(2) "
2809 			    "on ixa(3)",
2810 			    conn_t *, connp, char *, opt_storage,
2811 			    ip_xmit_attr_t *, ixa);
2812 		}
2813 		if (ipp->ipp_label_len_v6 != 0)
2814 			ipp->ipp_fields |= IPPF_LABEL_V6;
2815 		else
2816 			ipp->ipp_fields &= ~IPPF_LABEL_V6;
2817 	}
2818 	return (err);
2819 }
2820 
2821 /*
2822  * Inherit all options settings from the parent/listener to the eager.
2823  * Returns zero on success; ENOMEM if memory allocation failed.
2824  *
2825  * We assume that the eager has not had any work done i.e., the conn_ixa
2826  * and conn_xmit_ipp are all zero.
2827  * Furthermore we assume that no other thread can access the eager (because
2828  * it isn't inserted in any fanout list).
2829  */
2830 int
2831 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2832 {
2833 	cred_t	*credp;
2834 	int	err;
2835 	void	*notify_cookie;
2836 	uint32_t xmit_hint;
2837 
2838 	econnp->conn_family = lconnp->conn_family;
2839 	econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2840 	econnp->conn_wq = lconnp->conn_wq;
2841 	econnp->conn_rq = lconnp->conn_rq;
2842 
2843 	/*
2844 	 * Make a safe copy of the transmit attributes.
2845 	 * conn_connect will later be used by the caller to setup the ire etc.
2846 	 */
2847 	ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2848 	ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2849 	ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2850 	ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2851 
2852 	/* Preserve ixa_notify_cookie and xmit_hint */
2853 	notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2854 	xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2855 	ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2856 	econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2857 	econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2858 
2859 	econnp->conn_bound_if = lconnp->conn_bound_if;
2860 	econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2861 
2862 	/* Inherit all RECV options */
2863 	econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2864 
2865 	err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2866 	    KM_NOSLEEP);
2867 	if (err != 0)
2868 		return (err);
2869 
2870 	econnp->conn_zoneid = lconnp->conn_zoneid;
2871 	econnp->conn_allzones = lconnp->conn_allzones;
2872 
2873 	/* This is odd. Pick a flowlabel for each connection instead? */
2874 	econnp->conn_flowinfo = lconnp->conn_flowinfo;
2875 
2876 	econnp->conn_default_ttl = lconnp->conn_default_ttl;
2877 
2878 	/*
2879 	 * TSOL: tsol_input_proc() needs the eager's cred before the
2880 	 * eager is accepted
2881 	 */
2882 	ASSERT(lconnp->conn_cred != NULL);
2883 	econnp->conn_cred = credp = lconnp->conn_cred;
2884 	crhold(credp);
2885 	econnp->conn_cpid = lconnp->conn_cpid;
2886 	econnp->conn_open_time = ddi_get_lbolt64();
2887 
2888 	/*
2889 	 * Cache things in the ixa without any refhold.
2890 	 * Listener might not have set up ixa_cred
2891 	 */
2892 	ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2893 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2894 	econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2895 	if (is_system_labeled())
2896 		econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2897 
2898 	/*
2899 	 * If the caller has the process-wide flag set, then default to MAC
2900 	 * exempt mode.  This allows read-down to unlabeled hosts.
2901 	 */
2902 	if (getpflags(NET_MAC_AWARE, credp) != 0)
2903 		econnp->conn_mac_mode = CONN_MAC_AWARE;
2904 
2905 	econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2906 
2907 	/*
2908 	 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2909 	 * via soaccept()->soinheritoptions() which essentially applies
2910 	 * all the listener options to the new connection. The options that we
2911 	 * need to take care of are:
2912 	 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2913 	 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2914 	 * SO_SNDBUF, SO_RCVBUF.
2915 	 *
2916 	 * SO_RCVBUF:	conn_rcvbuf is set.
2917 	 * SO_SNDBUF:	conn_sndbuf is set.
2918 	 */
2919 
2920 	/* Could we define a struct and use a struct copy for this? */
2921 	econnp->conn_sndbuf = lconnp->conn_sndbuf;
2922 	econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2923 	econnp->conn_sndlowat = lconnp->conn_sndlowat;
2924 	econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2925 	econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2926 	econnp->conn_oobinline = lconnp->conn_oobinline;
2927 	econnp->conn_debug = lconnp->conn_debug;
2928 	econnp->conn_keepalive = lconnp->conn_keepalive;
2929 	econnp->conn_linger = lconnp->conn_linger;
2930 	econnp->conn_lingertime = lconnp->conn_lingertime;
2931 
2932 	/* Set the IP options */
2933 	econnp->conn_broadcast = lconnp->conn_broadcast;
2934 	econnp->conn_useloopback = lconnp->conn_useloopback;
2935 	econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2936 	return (0);
2937 }
2938