xref: /titanic_51/usr/src/uts/common/inet/ip/conn_opt.c (revision 96d9f183facd90dbbc2268c9a51689be0b6a0b46)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/xti_inet.h>
34 #include <sys/ucred.h>
35 #include <sys/zone.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/atomic.h>
41 #include <sys/policy.h>
42 
43 #include <sys/systm.h>
44 #include <sys/param.h>
45 #include <sys/kmem.h>
46 #include <sys/sdt.h>
47 #include <sys/socket.h>
48 #include <sys/ethernet.h>
49 #include <sys/mac.h>
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/if_arp.h>
53 #include <net/route.h>
54 #include <sys/sockio.h>
55 #include <netinet/in.h>
56 #include <net/if_dl.h>
57 
58 #include <inet/common.h>
59 #include <inet/mi.h>
60 #include <inet/mib2.h>
61 #include <inet/nd.h>
62 #include <inet/arp.h>
63 #include <inet/snmpcom.h>
64 #include <inet/kstatcom.h>
65 
66 #include <netinet/igmp_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet/icmp6.h>
69 #include <netinet/sctp.h>
70 
71 #include <inet/ip.h>
72 #include <inet/ip_impl.h>
73 #include <inet/ip6.h>
74 #include <inet/ip6_asp.h>
75 #include <inet/tcp.h>
76 #include <inet/ip_multi.h>
77 #include <inet/ip_if.h>
78 #include <inet/ip_ire.h>
79 #include <inet/ip_ftable.h>
80 #include <inet/ip_rts.h>
81 #include <inet/optcom.h>
82 #include <inet/ip_ndp.h>
83 #include <inet/ip_listutils.h>
84 #include <netinet/igmp.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/udp.h>
87 #include <inet/ipp_common.h>
88 
89 #include <net/pfkeyv2.h>
90 #include <inet/sadb.h>
91 #include <inet/ipsec_impl.h>
92 #include <inet/ipdrop.h>
93 #include <inet/ip_netinfo.h>
94 
95 #include <inet/ipclassifier.h>
96 #include <inet/sctp_ip.h>
97 #include <inet/sctp/sctp_impl.h>
98 #include <inet/udp_impl.h>
99 #include <sys/sunddi.h>
100 
101 #include <sys/tsol/label.h>
102 #include <sys/tsol/tnet.h>
103 
104 static	sin_t	sin_null;	/* Zero address for quick clears */
105 static	sin6_t	sin6_null;	/* Zero address for quick clears */
106 
107 /*
108  * Return how much size is needed for the different ancillary data items
109  */
110 uint_t
111 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
112     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
113 {
114 	uint_t		ancil_size;
115 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
116 
117 	/*
118 	 * If IP_RECVDSTADDR is set we include the destination IP
119 	 * address as an option. With IP_RECVOPTS we include all
120 	 * the IP options.
121 	 */
122 	ancil_size = 0;
123 	if (recv_ancillary.crb_recvdstaddr &&
124 	    (ira->ira_flags & IRAF_IS_IPV4)) {
125 		ancil_size += sizeof (struct T_opthdr) +
126 		    sizeof (struct in_addr);
127 		IP_STAT(ipst, conn_in_recvdstaddr);
128 	}
129 
130 	/*
131 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
132 	 * are different
133 	 */
134 	if (recv_ancillary.crb_ip_recvpktinfo &&
135 	    connp->conn_family == AF_INET) {
136 		ancil_size += sizeof (struct T_opthdr) +
137 		    sizeof (struct in_pktinfo);
138 		IP_STAT(ipst, conn_in_recvpktinfo);
139 	}
140 
141 	if ((recv_ancillary.crb_recvopts) &&
142 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
143 		ancil_size += sizeof (struct T_opthdr) +
144 		    ipp->ipp_ipv4_options_len;
145 		IP_STAT(ipst, conn_in_recvopts);
146 	}
147 
148 	if (recv_ancillary.crb_recvslla) {
149 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
150 		ill_t *ill;
151 
152 		/* Make sure ira_l2src is setup if not already */
153 		if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
154 			ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
155 			    ipst);
156 			if (ill != NULL) {
157 				ip_setl2src(mp, ira, ill);
158 				ill_refrele(ill);
159 			}
160 		}
161 		ancil_size += sizeof (struct T_opthdr) +
162 		    sizeof (struct sockaddr_dl);
163 		IP_STAT(ipst, conn_in_recvslla);
164 	}
165 
166 	if (recv_ancillary.crb_recvif) {
167 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
168 		IP_STAT(ipst, conn_in_recvif);
169 	}
170 
171 	/*
172 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
173 	 * are different
174 	 */
175 	if (recv_ancillary.crb_ip_recvpktinfo &&
176 	    connp->conn_family == AF_INET6) {
177 		ancil_size += sizeof (struct T_opthdr) +
178 		    sizeof (struct in6_pktinfo);
179 		IP_STAT(ipst, conn_in_recvpktinfo);
180 	}
181 
182 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
183 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
184 		IP_STAT(ipst, conn_in_recvhoplimit);
185 	}
186 
187 	if (recv_ancillary.crb_ipv6_recvtclass) {
188 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
189 		IP_STAT(ipst, conn_in_recvtclass);
190 	}
191 
192 	if (recv_ancillary.crb_ipv6_recvhopopts &&
193 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
194 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
195 		IP_STAT(ipst, conn_in_recvhopopts);
196 	}
197 	/*
198 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
199 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
200 	 * options that appear before a routing header.
201 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
202 	 */
203 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
204 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
205 		    (recv_ancillary.crb_ipv6_recvdstopts &&
206 		    recv_ancillary.crb_ipv6_recvrthdr)) {
207 			ancil_size += sizeof (struct T_opthdr) +
208 			    ipp->ipp_rthdrdstoptslen;
209 			IP_STAT(ipst, conn_in_recvrthdrdstopts);
210 		}
211 	}
212 	if ((recv_ancillary.crb_ipv6_recvrthdr) &&
213 	    (ipp->ipp_fields & IPPF_RTHDR)) {
214 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
215 		IP_STAT(ipst, conn_in_recvrthdr);
216 	}
217 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
218 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
219 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
220 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
221 		IP_STAT(ipst, conn_in_recvdstopts);
222 	}
223 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
224 		ancil_size += sizeof (struct T_opthdr) +
225 		    ucredminsize(ira->ira_cred);
226 		IP_STAT(ipst, conn_in_recvucred);
227 	}
228 
229 	/*
230 	 * If SO_TIMESTAMP is set allocate the appropriate sized
231 	 * buffer. Since gethrestime() expects a pointer aligned
232 	 * argument, we allocate space necessary for extra
233 	 * alignment (even though it might not be used).
234 	 */
235 	if (recv_ancillary.crb_timestamp) {
236 		ancil_size += sizeof (struct T_opthdr) +
237 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
238 		IP_STAT(ipst, conn_in_timestamp);
239 	}
240 
241 	/*
242 	 * If IP_RECVTTL is set allocate the appropriate sized buffer
243 	 */
244 	if (recv_ancillary.crb_recvttl &&
245 	    (ira->ira_flags & IRAF_IS_IPV4)) {
246 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
247 		IP_STAT(ipst, conn_in_recvttl);
248 	}
249 
250 	return (ancil_size);
251 }
252 
253 /*
254  * Lay down the ancillary data items at "ancil_buf".
255  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
256  * large buffer - ancil_size.
257  */
258 void
259 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
260     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
261 {
262 	/*
263 	 * Copy in destination address before options to avoid
264 	 * any padding issues.
265 	 */
266 	if (recv_ancillary.crb_recvdstaddr &&
267 	    (ira->ira_flags & IRAF_IS_IPV4)) {
268 		struct T_opthdr *toh;
269 		ipaddr_t *dstptr;
270 
271 		toh = (struct T_opthdr *)ancil_buf;
272 		toh->level = IPPROTO_IP;
273 		toh->name = IP_RECVDSTADDR;
274 		toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
275 		toh->status = 0;
276 		ancil_buf += sizeof (struct T_opthdr);
277 		dstptr = (ipaddr_t *)ancil_buf;
278 		*dstptr = ipp->ipp_addr_v4;
279 		ancil_buf += sizeof (ipaddr_t);
280 		ancil_size -= toh->len;
281 	}
282 
283 	/*
284 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
285 	 * are different
286 	 */
287 	if (recv_ancillary.crb_ip_recvpktinfo &&
288 	    connp->conn_family == AF_INET) {
289 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
290 		struct T_opthdr *toh;
291 		struct in_pktinfo *pktinfop;
292 		ill_t *ill;
293 		ipif_t *ipif;
294 
295 		toh = (struct T_opthdr *)ancil_buf;
296 		toh->level = IPPROTO_IP;
297 		toh->name = IP_PKTINFO;
298 		toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
299 		toh->status = 0;
300 		ancil_buf += sizeof (struct T_opthdr);
301 		pktinfop = (struct in_pktinfo *)ancil_buf;
302 
303 		pktinfop->ipi_ifindex = ira->ira_ruifindex;
304 		pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
305 
306 		/* Find a good address to report */
307 		ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
308 		if (ill != NULL) {
309 			ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
310 			if (ipif != NULL) {
311 				pktinfop->ipi_spec_dst.s_addr =
312 				    ipif->ipif_lcl_addr;
313 				ipif_refrele(ipif);
314 			}
315 			ill_refrele(ill);
316 		}
317 		pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
318 		ancil_buf += sizeof (struct in_pktinfo);
319 		ancil_size -= toh->len;
320 	}
321 
322 	if ((recv_ancillary.crb_recvopts) &&
323 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
324 		struct T_opthdr *toh;
325 
326 		toh = (struct T_opthdr *)ancil_buf;
327 		toh->level = IPPROTO_IP;
328 		toh->name = IP_RECVOPTS;
329 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
330 		toh->status = 0;
331 		ancil_buf += sizeof (struct T_opthdr);
332 		bcopy(ipp->ipp_ipv4_options, ancil_buf,
333 		    ipp->ipp_ipv4_options_len);
334 		ancil_buf += ipp->ipp_ipv4_options_len;
335 		ancil_size -= toh->len;
336 	}
337 
338 	if (recv_ancillary.crb_recvslla) {
339 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
340 		struct T_opthdr *toh;
341 		struct sockaddr_dl *dstptr;
342 		ill_t *ill;
343 		int alen = 0;
344 
345 		ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
346 		if (ill != NULL)
347 			alen = ill->ill_phys_addr_length;
348 
349 		/*
350 		 * For loopback multicast and broadcast the packet arrives
351 		 * with ira_ruifdex being the physical interface, but
352 		 * ira_l2src is all zero since ip_postfrag_loopback doesn't
353 		 * know our l2src. We don't report the address in that case.
354 		 */
355 		if (ira->ira_flags & IRAF_LOOPBACK)
356 			alen = 0;
357 
358 		toh = (struct T_opthdr *)ancil_buf;
359 		toh->level = IPPROTO_IP;
360 		toh->name = IP_RECVSLLA;
361 		toh->len = sizeof (struct T_opthdr) +
362 		    sizeof (struct sockaddr_dl);
363 		toh->status = 0;
364 		ancil_buf += sizeof (struct T_opthdr);
365 		dstptr = (struct sockaddr_dl *)ancil_buf;
366 		dstptr->sdl_family = AF_LINK;
367 		dstptr->sdl_index = ira->ira_ruifindex;
368 		if (ill != NULL)
369 			dstptr->sdl_type = ill->ill_type;
370 		else
371 			dstptr->sdl_type = 0;
372 		dstptr->sdl_nlen = 0;
373 		dstptr->sdl_alen = alen;
374 		dstptr->sdl_slen = 0;
375 		bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
376 		ancil_buf += sizeof (struct sockaddr_dl);
377 		ancil_size -= toh->len;
378 		if (ill != NULL)
379 			ill_refrele(ill);
380 	}
381 
382 	if (recv_ancillary.crb_recvif) {
383 		struct T_opthdr *toh;
384 		uint_t		*dstptr;
385 
386 		toh = (struct T_opthdr *)ancil_buf;
387 		toh->level = IPPROTO_IP;
388 		toh->name = IP_RECVIF;
389 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
390 		toh->status = 0;
391 		ancil_buf += sizeof (struct T_opthdr);
392 		dstptr = (uint_t *)ancil_buf;
393 		*dstptr = ira->ira_ruifindex;
394 		ancil_buf += sizeof (uint_t);
395 		ancil_size -= toh->len;
396 	}
397 
398 	/*
399 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
400 	 * are different
401 	 */
402 	if (recv_ancillary.crb_ip_recvpktinfo &&
403 	    connp->conn_family == AF_INET6) {
404 		struct T_opthdr *toh;
405 		struct in6_pktinfo *pkti;
406 
407 		toh = (struct T_opthdr *)ancil_buf;
408 		toh->level = IPPROTO_IPV6;
409 		toh->name = IPV6_PKTINFO;
410 		toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
411 		toh->status = 0;
412 		ancil_buf += sizeof (struct T_opthdr);
413 		pkti = (struct in6_pktinfo *)ancil_buf;
414 		if (ira->ira_flags & IRAF_IS_IPV4) {
415 			IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
416 			    &pkti->ipi6_addr);
417 		} else {
418 			pkti->ipi6_addr = ipp->ipp_addr;
419 		}
420 		pkti->ipi6_ifindex = ira->ira_ruifindex;
421 
422 		ancil_buf += sizeof (*pkti);
423 		ancil_size -= toh->len;
424 	}
425 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
426 		struct T_opthdr *toh;
427 
428 		toh = (struct T_opthdr *)ancil_buf;
429 		toh->level = IPPROTO_IPV6;
430 		toh->name = IPV6_HOPLIMIT;
431 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
432 		toh->status = 0;
433 		ancil_buf += sizeof (struct T_opthdr);
434 		*(uint_t *)ancil_buf = ipp->ipp_hoplimit;
435 		ancil_buf += sizeof (uint_t);
436 		ancil_size -= toh->len;
437 	}
438 	if (recv_ancillary.crb_ipv6_recvtclass) {
439 		struct T_opthdr *toh;
440 
441 		toh = (struct T_opthdr *)ancil_buf;
442 		toh->level = IPPROTO_IPV6;
443 		toh->name = IPV6_TCLASS;
444 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
445 		toh->status = 0;
446 		ancil_buf += sizeof (struct T_opthdr);
447 
448 		if (ira->ira_flags & IRAF_IS_IPV4)
449 			*(uint_t *)ancil_buf = ipp->ipp_type_of_service;
450 		else
451 			*(uint_t *)ancil_buf = ipp->ipp_tclass;
452 		ancil_buf += sizeof (uint_t);
453 		ancil_size -= toh->len;
454 	}
455 	if (recv_ancillary.crb_ipv6_recvhopopts &&
456 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
457 		struct T_opthdr *toh;
458 
459 		toh = (struct T_opthdr *)ancil_buf;
460 		toh->level = IPPROTO_IPV6;
461 		toh->name = IPV6_HOPOPTS;
462 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
463 		toh->status = 0;
464 		ancil_buf += sizeof (struct T_opthdr);
465 		bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
466 		ancil_buf += ipp->ipp_hopoptslen;
467 		ancil_size -= toh->len;
468 	}
469 	/*
470 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
471 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
472 	 * options that appear before a routing header.
473 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
474 	 */
475 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
476 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
477 		    (recv_ancillary.crb_ipv6_recvdstopts &&
478 		    recv_ancillary.crb_ipv6_recvrthdr)) {
479 			struct T_opthdr *toh;
480 
481 			toh = (struct T_opthdr *)ancil_buf;
482 			toh->level = IPPROTO_IPV6;
483 			toh->name = IPV6_DSTOPTS;
484 			toh->len = sizeof (struct T_opthdr) +
485 			    ipp->ipp_rthdrdstoptslen;
486 			toh->status = 0;
487 			ancil_buf += sizeof (struct T_opthdr);
488 			bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
489 			    ipp->ipp_rthdrdstoptslen);
490 			ancil_buf += ipp->ipp_rthdrdstoptslen;
491 			ancil_size -= toh->len;
492 		}
493 	}
494 	if (recv_ancillary.crb_ipv6_recvrthdr &&
495 	    (ipp->ipp_fields & IPPF_RTHDR)) {
496 		struct T_opthdr *toh;
497 
498 		toh = (struct T_opthdr *)ancil_buf;
499 		toh->level = IPPROTO_IPV6;
500 		toh->name = IPV6_RTHDR;
501 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
502 		toh->status = 0;
503 		ancil_buf += sizeof (struct T_opthdr);
504 		bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
505 		ancil_buf += ipp->ipp_rthdrlen;
506 		ancil_size -= toh->len;
507 	}
508 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
509 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
510 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
511 		struct T_opthdr *toh;
512 
513 		toh = (struct T_opthdr *)ancil_buf;
514 		toh->level = IPPROTO_IPV6;
515 		toh->name = IPV6_DSTOPTS;
516 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
517 		toh->status = 0;
518 		ancil_buf += sizeof (struct T_opthdr);
519 		bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
520 		ancil_buf += ipp->ipp_dstoptslen;
521 		ancil_size -= toh->len;
522 	}
523 
524 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
525 		struct T_opthdr *toh;
526 		cred_t		*rcr = connp->conn_cred;
527 
528 		toh = (struct T_opthdr *)ancil_buf;
529 		toh->level = SOL_SOCKET;
530 		toh->name = SCM_UCRED;
531 		toh->len = sizeof (struct T_opthdr) +
532 		    ucredminsize(ira->ira_cred);
533 		toh->status = 0;
534 		(void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
535 		ancil_buf += toh->len;
536 		ancil_size -= toh->len;
537 	}
538 	if (recv_ancillary.crb_timestamp) {
539 		struct	T_opthdr *toh;
540 
541 		toh = (struct T_opthdr *)ancil_buf;
542 		toh->level = SOL_SOCKET;
543 		toh->name = SCM_TIMESTAMP;
544 		toh->len = sizeof (struct T_opthdr) +
545 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
546 		toh->status = 0;
547 		ancil_buf += sizeof (struct T_opthdr);
548 		/* Align for gethrestime() */
549 		ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
550 		    sizeof (intptr_t));
551 		gethrestime((timestruc_t *)ancil_buf);
552 		ancil_buf = (uchar_t *)toh + toh->len;
553 		ancil_size -= toh->len;
554 	}
555 
556 	/*
557 	 * CAUTION:
558 	 * Due to aligment issues
559 	 * Processing of IP_RECVTTL option
560 	 * should always be the last. Adding
561 	 * any option processing after this will
562 	 * cause alignment panic.
563 	 */
564 	if (recv_ancillary.crb_recvttl &&
565 	    (ira->ira_flags & IRAF_IS_IPV4)) {
566 		struct	T_opthdr *toh;
567 		uint8_t	*dstptr;
568 
569 		toh = (struct T_opthdr *)ancil_buf;
570 		toh->level = IPPROTO_IP;
571 		toh->name = IP_RECVTTL;
572 		toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
573 		toh->status = 0;
574 		ancil_buf += sizeof (struct T_opthdr);
575 		dstptr = (uint8_t *)ancil_buf;
576 		*dstptr = ipp->ipp_hoplimit;
577 		ancil_buf += sizeof (uint8_t);
578 		ancil_size -= toh->len;
579 	}
580 
581 	/* Consumed all of allocated space */
582 	ASSERT(ancil_size == 0);
583 
584 }
585 
586 /*
587  * This routine retrieves the current status of socket options.
588  * It returns the size of the option retrieved, or -1.
589  */
590 int
591 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
592     uchar_t *ptr)
593 {
594 	int		*i1 = (int *)ptr;
595 	conn_t		*connp = coa->coa_connp;
596 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
597 	ip_pkt_t	*ipp = coa->coa_ipp;
598 	ip_stack_t	*ipst = ixa->ixa_ipst;
599 	uint_t		len;
600 
601 	ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
602 
603 	switch (level) {
604 	case SOL_SOCKET:
605 		switch (name) {
606 		case SO_DEBUG:
607 			*i1 = connp->conn_debug ? SO_DEBUG : 0;
608 			break;	/* goto sizeof (int) option return */
609 		case SO_KEEPALIVE:
610 			*i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
611 			break;
612 		case SO_LINGER:	{
613 			struct linger *lgr = (struct linger *)ptr;
614 
615 			lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
616 			lgr->l_linger = connp->conn_lingertime;
617 			}
618 			return (sizeof (struct linger));
619 
620 		case SO_OOBINLINE:
621 			*i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
622 			break;
623 		case SO_REUSEADDR:
624 			*i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
625 			break;	/* goto sizeof (int) option return */
626 		case SO_TYPE:
627 			*i1 = connp->conn_so_type;
628 			break;	/* goto sizeof (int) option return */
629 		case SO_DONTROUTE:
630 			*i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
631 			    SO_DONTROUTE : 0;
632 			break;	/* goto sizeof (int) option return */
633 		case SO_USELOOPBACK:
634 			*i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
635 			break;	/* goto sizeof (int) option return */
636 		case SO_BROADCAST:
637 			*i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
638 			break;	/* goto sizeof (int) option return */
639 
640 		case SO_SNDBUF:
641 			*i1 = connp->conn_sndbuf;
642 			break;	/* goto sizeof (int) option return */
643 		case SO_RCVBUF:
644 			*i1 = connp->conn_rcvbuf;
645 			break;	/* goto sizeof (int) option return */
646 		case SO_RCVTIMEO:
647 		case SO_SNDTIMEO:
648 			/*
649 			 * Pass these two options in order for third part
650 			 * protocol usage. Here just return directly.
651 			 */
652 			*i1 = 0;
653 			break;
654 		case SO_DGRAM_ERRIND:
655 			*i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
656 			break;	/* goto sizeof (int) option return */
657 		case SO_RECVUCRED:
658 			*i1 = connp->conn_recv_ancillary.crb_recvucred;
659 			break;	/* goto sizeof (int) option return */
660 		case SO_TIMESTAMP:
661 			*i1 = connp->conn_recv_ancillary.crb_timestamp;
662 			break;	/* goto sizeof (int) option return */
663 		case SO_VRRP:
664 			*i1 = connp->conn_isvrrp;
665 			break;	/* goto sizeof (int) option return */
666 		case SO_ANON_MLP:
667 			*i1 = connp->conn_anon_mlp;
668 			break;	/* goto sizeof (int) option return */
669 		case SO_MAC_EXEMPT:
670 			*i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
671 			break;	/* goto sizeof (int) option return */
672 		case SO_MAC_IMPLICIT:
673 			*i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
674 			break;	/* goto sizeof (int) option return */
675 		case SO_ALLZONES:
676 			*i1 = connp->conn_allzones;
677 			break;	/* goto sizeof (int) option return */
678 		case SO_EXCLBIND:
679 			*i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
680 			break;
681 		case SO_PROTOTYPE:
682 			*i1 = connp->conn_proto;
683 			break;
684 
685 		case SO_DOMAIN:
686 			*i1 = connp->conn_family;
687 			break;
688 		default:
689 			return (-1);
690 		}
691 		break;
692 	case IPPROTO_IP:
693 		if (connp->conn_family != AF_INET)
694 			return (-1);
695 		switch (name) {
696 		case IP_OPTIONS:
697 		case T_IP_OPTIONS:
698 			if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
699 				return (0);
700 
701 			len = ipp->ipp_ipv4_options_len;
702 			if (len > 0) {
703 				bcopy(ipp->ipp_ipv4_options, ptr, len);
704 			}
705 			return (len);
706 
707 		case IP_PKTINFO: {
708 			/*
709 			 * This also handles IP_RECVPKTINFO.
710 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
711 			 * Differentiation is based on the size of the
712 			 * argument passed in.
713 			 */
714 			struct in_pktinfo *pktinfo;
715 
716 #ifdef notdef
717 			/* optcom doesn't provide a length with "get" */
718 			if (inlen == sizeof (int)) {
719 				/* This is IP_RECVPKTINFO option. */
720 				*i1 = connp->conn_recv_ancillary.
721 				    crb_ip_recvpktinfo;
722 				return (sizeof (int));
723 			}
724 #endif
725 			/* XXX assumes that caller has room for max size! */
726 
727 			pktinfo = (struct in_pktinfo *)ptr;
728 			pktinfo->ipi_ifindex = ixa->ixa_ifindex;
729 			if (ipp->ipp_fields & IPPF_ADDR)
730 				pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
731 			else
732 				pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
733 			return (sizeof (struct in_pktinfo));
734 		}
735 		case IP_DONTFRAG:
736 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
737 			return (sizeof (int));
738 		case IP_TOS:
739 		case T_IP_TOS:
740 			*i1 = (int)ipp->ipp_type_of_service;
741 			break;	/* goto sizeof (int) option return */
742 		case IP_TTL:
743 			*i1 = (int)ipp->ipp_unicast_hops;
744 			break;	/* goto sizeof (int) option return */
745 		case IP_DHCPINIT_IF:
746 			return (-1);
747 		case IP_NEXTHOP:
748 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
749 				*(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
750 				return (sizeof (ipaddr_t));
751 			} else {
752 				return (0);
753 			}
754 
755 		case IP_MULTICAST_IF:
756 			/* 0 address if not set */
757 			*(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
758 			return (sizeof (ipaddr_t));
759 		case IP_MULTICAST_TTL:
760 			*(uchar_t *)ptr = ixa->ixa_multicast_ttl;
761 			return (sizeof (uchar_t));
762 		case IP_MULTICAST_LOOP:
763 			*ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
764 			return (sizeof (uint8_t));
765 		case IP_RECVOPTS:
766 			*i1 = connp->conn_recv_ancillary.crb_recvopts;
767 			break;	/* goto sizeof (int) option return */
768 		case IP_RECVDSTADDR:
769 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
770 			break;	/* goto sizeof (int) option return */
771 		case IP_RECVIF:
772 			*i1 = connp->conn_recv_ancillary.crb_recvif;
773 			break;	/* goto sizeof (int) option return */
774 		case IP_RECVSLLA:
775 			*i1 = connp->conn_recv_ancillary.crb_recvslla;
776 			break;	/* goto sizeof (int) option return */
777 		case IP_RECVTTL:
778 			*i1 = connp->conn_recv_ancillary.crb_recvttl;
779 			break;	/* goto sizeof (int) option return */
780 		case IP_ADD_MEMBERSHIP:
781 		case IP_DROP_MEMBERSHIP:
782 		case MCAST_JOIN_GROUP:
783 		case MCAST_LEAVE_GROUP:
784 		case IP_BLOCK_SOURCE:
785 		case IP_UNBLOCK_SOURCE:
786 		case IP_ADD_SOURCE_MEMBERSHIP:
787 		case IP_DROP_SOURCE_MEMBERSHIP:
788 		case MCAST_BLOCK_SOURCE:
789 		case MCAST_UNBLOCK_SOURCE:
790 		case MCAST_JOIN_SOURCE_GROUP:
791 		case MCAST_LEAVE_SOURCE_GROUP:
792 		case MRT_INIT:
793 		case MRT_DONE:
794 		case MRT_ADD_VIF:
795 		case MRT_DEL_VIF:
796 		case MRT_ADD_MFC:
797 		case MRT_DEL_MFC:
798 			/* cannot "get" the value for these */
799 			return (-1);
800 		case MRT_VERSION:
801 		case MRT_ASSERT:
802 			(void) ip_mrouter_get(name, connp, ptr);
803 			return (sizeof (int));
804 		case IP_SEC_OPT:
805 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
806 			    IPSEC_AF_V4));
807 		case IP_BOUND_IF:
808 			/* Zero if not set */
809 			*i1 = connp->conn_bound_if;
810 			break;	/* goto sizeof (int) option return */
811 		case IP_UNSPEC_SRC:
812 			*i1 = connp->conn_unspec_src;
813 			break;	/* goto sizeof (int) option return */
814 		case IP_BROADCAST_TTL:
815 			if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
816 				*(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
817 			else
818 				*(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
819 			return (sizeof (uchar_t));
820 		default:
821 			return (-1);
822 		}
823 		break;
824 	case IPPROTO_IPV6:
825 		if (connp->conn_family != AF_INET6)
826 			return (-1);
827 		switch (name) {
828 		case IPV6_UNICAST_HOPS:
829 			*i1 = (int)ipp->ipp_unicast_hops;
830 			break;	/* goto sizeof (int) option return */
831 		case IPV6_MULTICAST_IF:
832 			/* 0 index if not set */
833 			*i1 = ixa->ixa_multicast_ifindex;
834 			break;	/* goto sizeof (int) option return */
835 		case IPV6_MULTICAST_HOPS:
836 			*i1 = ixa->ixa_multicast_ttl;
837 			break;	/* goto sizeof (int) option return */
838 		case IPV6_MULTICAST_LOOP:
839 			*i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
840 			break;	/* goto sizeof (int) option return */
841 		case IPV6_JOIN_GROUP:
842 		case IPV6_LEAVE_GROUP:
843 		case MCAST_JOIN_GROUP:
844 		case MCAST_LEAVE_GROUP:
845 		case MCAST_BLOCK_SOURCE:
846 		case MCAST_UNBLOCK_SOURCE:
847 		case MCAST_JOIN_SOURCE_GROUP:
848 		case MCAST_LEAVE_SOURCE_GROUP:
849 			/* cannot "get" the value for these */
850 			return (-1);
851 		case IPV6_BOUND_IF:
852 			/* Zero if not set */
853 			*i1 = connp->conn_bound_if;
854 			break;	/* goto sizeof (int) option return */
855 		case IPV6_UNSPEC_SRC:
856 			*i1 = connp->conn_unspec_src;
857 			break;	/* goto sizeof (int) option return */
858 		case IPV6_RECVPKTINFO:
859 			*i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
860 			break;	/* goto sizeof (int) option return */
861 		case IPV6_RECVTCLASS:
862 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
863 			break;	/* goto sizeof (int) option return */
864 		case IPV6_RECVPATHMTU:
865 			*i1 = connp->conn_ipv6_recvpathmtu;
866 			break;	/* goto sizeof (int) option return */
867 		case IPV6_RECVHOPLIMIT:
868 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
869 			break;	/* goto sizeof (int) option return */
870 		case IPV6_RECVHOPOPTS:
871 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
872 			break;	/* goto sizeof (int) option return */
873 		case IPV6_RECVDSTOPTS:
874 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
875 			break;	/* goto sizeof (int) option return */
876 		case _OLD_IPV6_RECVDSTOPTS:
877 			*i1 =
878 			    connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
879 			break;	/* goto sizeof (int) option return */
880 		case IPV6_RECVRTHDRDSTOPTS:
881 			*i1 = connp->conn_recv_ancillary.
882 			    crb_ipv6_recvrthdrdstopts;
883 			break;	/* goto sizeof (int) option return */
884 		case IPV6_RECVRTHDR:
885 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
886 			break;	/* goto sizeof (int) option return */
887 		case IPV6_PKTINFO: {
888 			/* XXX assumes that caller has room for max size! */
889 			struct in6_pktinfo *pkti;
890 
891 			pkti = (struct in6_pktinfo *)ptr;
892 			pkti->ipi6_ifindex = ixa->ixa_ifindex;
893 			if (ipp->ipp_fields & IPPF_ADDR)
894 				pkti->ipi6_addr = ipp->ipp_addr;
895 			else
896 				pkti->ipi6_addr = ipv6_all_zeros;
897 			return (sizeof (struct in6_pktinfo));
898 		}
899 		case IPV6_TCLASS:
900 			*i1 = ipp->ipp_tclass;
901 			break;	/* goto sizeof (int) option return */
902 		case IPV6_NEXTHOP: {
903 			sin6_t *sin6 = (sin6_t *)ptr;
904 
905 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
906 				return (0);
907 
908 			*sin6 = sin6_null;
909 			sin6->sin6_family = AF_INET6;
910 			sin6->sin6_addr = ixa->ixa_nexthop_v6;
911 
912 			return (sizeof (sin6_t));
913 		}
914 		case IPV6_HOPOPTS:
915 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
916 				return (0);
917 			bcopy(ipp->ipp_hopopts, ptr,
918 			    ipp->ipp_hopoptslen);
919 			return (ipp->ipp_hopoptslen);
920 		case IPV6_RTHDRDSTOPTS:
921 			if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
922 				return (0);
923 			bcopy(ipp->ipp_rthdrdstopts, ptr,
924 			    ipp->ipp_rthdrdstoptslen);
925 			return (ipp->ipp_rthdrdstoptslen);
926 		case IPV6_RTHDR:
927 			if (!(ipp->ipp_fields & IPPF_RTHDR))
928 				return (0);
929 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
930 			return (ipp->ipp_rthdrlen);
931 		case IPV6_DSTOPTS:
932 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
933 				return (0);
934 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
935 			return (ipp->ipp_dstoptslen);
936 		case IPV6_PATHMTU:
937 			return (ip_fill_mtuinfo(connp, ixa,
938 			    (struct ip6_mtuinfo *)ptr));
939 		case IPV6_SEC_OPT:
940 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
941 			    IPSEC_AF_V6));
942 		case IPV6_SRC_PREFERENCES:
943 			return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
944 		case IPV6_DONTFRAG:
945 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
946 			return (sizeof (int));
947 		case IPV6_USE_MIN_MTU:
948 			if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
949 				*i1 = ixa->ixa_use_min_mtu;
950 			else
951 				*i1 = IPV6_USE_MIN_MTU_MULTICAST;
952 			break;
953 		case IPV6_V6ONLY:
954 			*i1 = connp->conn_ipv6_v6only;
955 			return (sizeof (int));
956 		default:
957 			return (-1);
958 		}
959 		break;
960 	case IPPROTO_UDP:
961 		switch (name) {
962 		case UDP_ANONPRIVBIND:
963 			*i1 = connp->conn_anon_priv_bind;
964 			break;
965 		case UDP_EXCLBIND:
966 			*i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
967 			break;
968 		default:
969 			return (-1);
970 		}
971 		break;
972 	case IPPROTO_TCP:
973 		switch (name) {
974 		case TCP_RECVDSTADDR:
975 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
976 			break;
977 		case TCP_ANONPRIVBIND:
978 			*i1 = connp->conn_anon_priv_bind;
979 			break;
980 		case TCP_EXCLBIND:
981 			*i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
982 			break;
983 		default:
984 			return (-1);
985 		}
986 		break;
987 	default:
988 		return (-1);
989 	}
990 	return (sizeof (int));
991 }
992 
993 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
994     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
995 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
996     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
997 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
998     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
999 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1000     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1001 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1002     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1003 
1004 /*
1005  * This routine sets the most common socket options including some
1006  * that are transport/ULP specific.
1007  * It returns errno or zero.
1008  *
1009  * For fixed length options, there is no sanity check
1010  * of passed in length is done. It is assumed *_optcom_req()
1011  * routines do the right thing.
1012  */
1013 int
1014 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1015     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1016 {
1017 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1018 
1019 	/* We have different functions for different levels */
1020 	switch (level) {
1021 	case SOL_SOCKET:
1022 		return (conn_opt_set_socket(coa, name, inlen, invalp,
1023 		    checkonly, cr));
1024 	case IPPROTO_IP:
1025 		return (conn_opt_set_ip(coa, name, inlen, invalp,
1026 		    checkonly, cr));
1027 	case IPPROTO_IPV6:
1028 		return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1029 		    checkonly, cr));
1030 	case IPPROTO_UDP:
1031 		return (conn_opt_set_udp(coa, name, inlen, invalp,
1032 		    checkonly, cr));
1033 	case IPPROTO_TCP:
1034 		return (conn_opt_set_tcp(coa, name, inlen, invalp,
1035 		    checkonly, cr));
1036 	default:
1037 		return (0);
1038 	}
1039 }
1040 
1041 /*
1042  * Handle SOL_SOCKET
1043  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1044  * it implement their own checks and setting of conn_proto.
1045  */
1046 /* ARGSUSED1 */
1047 static int
1048 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1049     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1050 {
1051 	conn_t		*connp = coa->coa_connp;
1052 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1053 	int		*i1 = (int *)invalp;
1054 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1055 
1056 	switch (name) {
1057 	case SO_ALLZONES:
1058 		if (IPCL_IS_BOUND(connp))
1059 			return (EINVAL);
1060 		break;
1061 	case SO_VRRP:
1062 		if (secpolicy_ip_config(cr, checkonly) != 0)
1063 			return (EACCES);
1064 		break;
1065 	case SO_MAC_EXEMPT:
1066 		if (secpolicy_net_mac_aware(cr) != 0)
1067 			return (EACCES);
1068 		if (IPCL_IS_BOUND(connp))
1069 			return (EINVAL);
1070 		break;
1071 	case SO_MAC_IMPLICIT:
1072 		if (secpolicy_net_mac_implicit(cr) != 0)
1073 			return (EACCES);
1074 		break;
1075 	}
1076 	if (checkonly)
1077 		return (0);
1078 
1079 	mutex_enter(&connp->conn_lock);
1080 	/* Here we set the actual option value */
1081 	switch (name) {
1082 	case SO_DEBUG:
1083 		connp->conn_debug = onoff;
1084 		break;
1085 	case SO_KEEPALIVE:
1086 		connp->conn_keepalive = onoff;
1087 		break;
1088 	case SO_LINGER: {
1089 		struct linger *lgr = (struct linger *)invalp;
1090 
1091 		if (lgr->l_onoff) {
1092 			connp->conn_linger = 1;
1093 			connp->conn_lingertime = lgr->l_linger;
1094 		} else {
1095 			connp->conn_linger = 0;
1096 			connp->conn_lingertime = 0;
1097 		}
1098 		break;
1099 	}
1100 	case SO_OOBINLINE:
1101 		connp->conn_oobinline = onoff;
1102 		coa->coa_changed |= COA_OOBINLINE_CHANGED;
1103 		break;
1104 	case SO_REUSEADDR:
1105 		connp->conn_reuseaddr = onoff;
1106 		break;
1107 	case SO_DONTROUTE:
1108 		if (onoff)
1109 			ixa->ixa_flags |= IXAF_DONTROUTE;
1110 		else
1111 			ixa->ixa_flags &= ~IXAF_DONTROUTE;
1112 		coa->coa_changed |= COA_ROUTE_CHANGED;
1113 		break;
1114 	case SO_USELOOPBACK:
1115 		connp->conn_useloopback = onoff;
1116 		break;
1117 	case SO_BROADCAST:
1118 		connp->conn_broadcast = onoff;
1119 		break;
1120 	case SO_SNDBUF:
1121 		/* ULP has range checked the value */
1122 		connp->conn_sndbuf = *i1;
1123 		coa->coa_changed |= COA_SNDBUF_CHANGED;
1124 		break;
1125 	case SO_RCVBUF:
1126 		/* ULP has range checked the value */
1127 		connp->conn_rcvbuf = *i1;
1128 		coa->coa_changed |= COA_RCVBUF_CHANGED;
1129 		break;
1130 	case SO_RCVTIMEO:
1131 	case SO_SNDTIMEO:
1132 		/*
1133 		 * Pass these two options in order for third part
1134 		 * protocol usage.
1135 		 */
1136 		break;
1137 	case SO_DGRAM_ERRIND:
1138 		connp->conn_dgram_errind = onoff;
1139 		break;
1140 	case SO_RECVUCRED:
1141 		connp->conn_recv_ancillary.crb_recvucred = onoff;
1142 		break;
1143 	case SO_ALLZONES:
1144 		connp->conn_allzones = onoff;
1145 		coa->coa_changed |= COA_ROUTE_CHANGED;
1146 		if (onoff)
1147 			ixa->ixa_zoneid = ALL_ZONES;
1148 		else
1149 			ixa->ixa_zoneid = connp->conn_zoneid;
1150 		break;
1151 	case SO_TIMESTAMP:
1152 		connp->conn_recv_ancillary.crb_timestamp = onoff;
1153 		break;
1154 	case SO_VRRP:
1155 		connp->conn_isvrrp = onoff;
1156 		break;
1157 	case SO_ANON_MLP:
1158 		connp->conn_anon_mlp = onoff;
1159 		break;
1160 	case SO_MAC_EXEMPT:
1161 		connp->conn_mac_mode = onoff ?
1162 		    CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1163 		break;
1164 	case SO_MAC_IMPLICIT:
1165 		connp->conn_mac_mode = onoff ?
1166 		    CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1167 		break;
1168 	case SO_EXCLBIND:
1169 		connp->conn_exclbind = onoff;
1170 		break;
1171 	}
1172 	mutex_exit(&connp->conn_lock);
1173 	return (0);
1174 }
1175 
1176 /* Handle IPPROTO_IP */
1177 static int
1178 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1179     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1180 {
1181 	conn_t		*connp = coa->coa_connp;
1182 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1183 	ip_pkt_t	*ipp = coa->coa_ipp;
1184 	int		*i1 = (int *)invalp;
1185 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1186 	ipaddr_t	addr = (ipaddr_t)*i1;
1187 	uint_t		ifindex;
1188 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1189 	ipif_t		*ipif;
1190 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1191 	int		error;
1192 
1193 	if (connp->conn_family != AF_INET)
1194 		return (EINVAL);
1195 
1196 	switch (name) {
1197 	case IP_TTL:
1198 		/* Don't allow zero */
1199 		if (*i1 < 1 || *i1 > 255)
1200 			return (EINVAL);
1201 		break;
1202 	case IP_MULTICAST_IF:
1203 		if (addr == INADDR_ANY) {
1204 			/* Clear */
1205 			ifindex = 0;
1206 			break;
1207 		}
1208 		ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1209 		if (ipif == NULL)
1210 			return (EHOSTUNREACH);
1211 		/* not supported by the virtual network iface */
1212 		if (IS_VNI(ipif->ipif_ill)) {
1213 			ipif_refrele(ipif);
1214 			return (EINVAL);
1215 		}
1216 		ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1217 		ipif_refrele(ipif);
1218 		break;
1219 	case IP_NEXTHOP: {
1220 		ire_t	*ire;
1221 
1222 		if (addr == INADDR_ANY) {
1223 			/* Clear */
1224 			break;
1225 		}
1226 		/* Verify that the next-hop is on-link */
1227 		ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1228 		    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1229 		if (ire == NULL)
1230 			return (EHOSTUNREACH);
1231 		ire_refrele(ire);
1232 		break;
1233 	}
1234 	case IP_OPTIONS:
1235 	case T_IP_OPTIONS: {
1236 		uint_t newlen;
1237 
1238 		if (ipp->ipp_fields & IPPF_LABEL_V4)
1239 			newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1240 		else
1241 			newlen = inlen;
1242 		if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1243 			return (EINVAL);
1244 		}
1245 		break;
1246 	}
1247 	case IP_PKTINFO: {
1248 		struct in_pktinfo *pktinfo;
1249 
1250 		/* Two different valid lengths */
1251 		if (inlen != sizeof (int) &&
1252 		    inlen != sizeof (struct in_pktinfo))
1253 			return (EINVAL);
1254 		if (inlen == sizeof (int))
1255 			break;
1256 
1257 		pktinfo = (struct in_pktinfo *)invalp;
1258 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1259 			switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1260 			    zoneid, ipst, B_FALSE)) {
1261 			case IPVL_UNICAST_UP:
1262 			case IPVL_UNICAST_DOWN:
1263 				break;
1264 			default:
1265 				return (EADDRNOTAVAIL);
1266 			}
1267 		}
1268 		if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1269 		    B_FALSE, ipst))
1270 			return (ENXIO);
1271 		break;
1272 	}
1273 	case IP_BOUND_IF:
1274 		ifindex = *(uint_t *)i1;
1275 
1276 		/* Just check it is ok. */
1277 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1278 			return (ENXIO);
1279 		break;
1280 	}
1281 	if (checkonly)
1282 		return (0);
1283 
1284 	/* Here we set the actual option value */
1285 	/*
1286 	 * conn_lock protects the bitfields, and is used to
1287 	 * set the fields atomically. Not needed for ixa settings since
1288 	 * the caller has an exclusive copy of the ixa.
1289 	 * We can not hold conn_lock across the multicast options though.
1290 	 */
1291 	switch (name) {
1292 	case IP_OPTIONS:
1293 	case T_IP_OPTIONS:
1294 		/* Save options for use by IP. */
1295 		mutex_enter(&connp->conn_lock);
1296 		error = optcom_pkt_set(invalp, inlen,
1297 		    (uchar_t **)&ipp->ipp_ipv4_options,
1298 		    &ipp->ipp_ipv4_options_len);
1299 		if (error != 0) {
1300 			mutex_exit(&connp->conn_lock);
1301 			return (error);
1302 		}
1303 		if (ipp->ipp_ipv4_options_len == 0) {
1304 			ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1305 		} else {
1306 			ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1307 		}
1308 		mutex_exit(&connp->conn_lock);
1309 		coa->coa_changed |= COA_HEADER_CHANGED;
1310 		coa->coa_changed |= COA_WROFF_CHANGED;
1311 		break;
1312 
1313 	case IP_TTL:
1314 		mutex_enter(&connp->conn_lock);
1315 		ipp->ipp_unicast_hops = *i1;
1316 		mutex_exit(&connp->conn_lock);
1317 		coa->coa_changed |= COA_HEADER_CHANGED;
1318 		break;
1319 	case IP_TOS:
1320 	case T_IP_TOS:
1321 		mutex_enter(&connp->conn_lock);
1322 		if (*i1 == -1) {
1323 			ipp->ipp_type_of_service = 0;
1324 		} else {
1325 			ipp->ipp_type_of_service = *i1;
1326 		}
1327 		mutex_exit(&connp->conn_lock);
1328 		coa->coa_changed |= COA_HEADER_CHANGED;
1329 		break;
1330 	case IP_MULTICAST_IF:
1331 		ixa->ixa_multicast_ifindex = ifindex;
1332 		ixa->ixa_multicast_ifaddr = addr;
1333 		coa->coa_changed |= COA_ROUTE_CHANGED;
1334 		break;
1335 	case IP_MULTICAST_TTL:
1336 		ixa->ixa_multicast_ttl = *invalp;
1337 		/* Handled automatically by ip_output */
1338 		break;
1339 	case IP_MULTICAST_LOOP:
1340 		if (*invalp != 0)
1341 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1342 		else
1343 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1344 		/* Handled automatically by ip_output */
1345 		break;
1346 	case IP_RECVOPTS:
1347 		mutex_enter(&connp->conn_lock);
1348 		connp->conn_recv_ancillary.crb_recvopts = onoff;
1349 		mutex_exit(&connp->conn_lock);
1350 		break;
1351 	case IP_RECVDSTADDR:
1352 		mutex_enter(&connp->conn_lock);
1353 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1354 		mutex_exit(&connp->conn_lock);
1355 		break;
1356 	case IP_RECVIF:
1357 		mutex_enter(&connp->conn_lock);
1358 		connp->conn_recv_ancillary.crb_recvif = onoff;
1359 		mutex_exit(&connp->conn_lock);
1360 		break;
1361 	case IP_RECVSLLA:
1362 		mutex_enter(&connp->conn_lock);
1363 		connp->conn_recv_ancillary.crb_recvslla = onoff;
1364 		mutex_exit(&connp->conn_lock);
1365 		break;
1366 	case IP_RECVTTL:
1367 		mutex_enter(&connp->conn_lock);
1368 		connp->conn_recv_ancillary.crb_recvttl = onoff;
1369 		mutex_exit(&connp->conn_lock);
1370 		break;
1371 	case IP_PKTINFO: {
1372 		/*
1373 		 * This also handles IP_RECVPKTINFO.
1374 		 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1375 		 * Differentiation is based on the size of the
1376 		 * argument passed in.
1377 		 */
1378 		struct in_pktinfo *pktinfo;
1379 
1380 		if (inlen == sizeof (int)) {
1381 			/* This is IP_RECVPKTINFO option. */
1382 			mutex_enter(&connp->conn_lock);
1383 			connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1384 			    onoff;
1385 			mutex_exit(&connp->conn_lock);
1386 			break;
1387 		}
1388 
1389 		/* This is IP_PKTINFO option. */
1390 		mutex_enter(&connp->conn_lock);
1391 		pktinfo = (struct in_pktinfo *)invalp;
1392 		if (ipp->ipp_addr_v4 != INADDR_ANY) {
1393 			ipp->ipp_fields |= IPPF_ADDR;
1394 			IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1395 			    &ipp->ipp_addr);
1396 		} else {
1397 			ipp->ipp_fields &= ~IPPF_ADDR;
1398 			ipp->ipp_addr = ipv6_all_zeros;
1399 		}
1400 		mutex_exit(&connp->conn_lock);
1401 		ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1402 		coa->coa_changed |= COA_ROUTE_CHANGED;
1403 		coa->coa_changed |= COA_HEADER_CHANGED;
1404 		break;
1405 	}
1406 	case IP_DONTFRAG:
1407 		if (onoff) {
1408 			ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1409 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1410 		} else {
1411 			ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1412 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1413 		}
1414 		/* Need to redo ip_attr_connect */
1415 		coa->coa_changed |= COA_ROUTE_CHANGED;
1416 		break;
1417 	case IP_ADD_MEMBERSHIP:
1418 	case IP_DROP_MEMBERSHIP:
1419 	case MCAST_JOIN_GROUP:
1420 	case MCAST_LEAVE_GROUP:
1421 		return (ip_opt_set_multicast_group(connp, name,
1422 		    invalp, B_FALSE, checkonly));
1423 
1424 	case IP_BLOCK_SOURCE:
1425 	case IP_UNBLOCK_SOURCE:
1426 	case IP_ADD_SOURCE_MEMBERSHIP:
1427 	case IP_DROP_SOURCE_MEMBERSHIP:
1428 	case MCAST_BLOCK_SOURCE:
1429 	case MCAST_UNBLOCK_SOURCE:
1430 	case MCAST_JOIN_SOURCE_GROUP:
1431 	case MCAST_LEAVE_SOURCE_GROUP:
1432 		return (ip_opt_set_multicast_sources(connp, name,
1433 		    invalp, B_FALSE, checkonly));
1434 
1435 	case IP_SEC_OPT:
1436 		mutex_enter(&connp->conn_lock);
1437 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1438 		mutex_exit(&connp->conn_lock);
1439 		if (error != 0) {
1440 			return (error);
1441 		}
1442 		/* This is an IPsec policy change - redo ip_attr_connect */
1443 		coa->coa_changed |= COA_ROUTE_CHANGED;
1444 		break;
1445 	case IP_NEXTHOP:
1446 		ixa->ixa_nexthop_v4 = addr;
1447 		if (addr != INADDR_ANY)
1448 			ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1449 		else
1450 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1451 		coa->coa_changed |= COA_ROUTE_CHANGED;
1452 		break;
1453 
1454 	case IP_BOUND_IF:
1455 		ixa->ixa_ifindex = ifindex;		/* Send */
1456 		mutex_enter(&connp->conn_lock);
1457 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1458 		connp->conn_bound_if = ifindex;		/* getsockopt */
1459 		mutex_exit(&connp->conn_lock);
1460 		coa->coa_changed |= COA_ROUTE_CHANGED;
1461 		break;
1462 	case IP_UNSPEC_SRC:
1463 		mutex_enter(&connp->conn_lock);
1464 		connp->conn_unspec_src = onoff;
1465 		if (onoff)
1466 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1467 		else
1468 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1469 
1470 		mutex_exit(&connp->conn_lock);
1471 		break;
1472 	case IP_BROADCAST_TTL:
1473 		ixa->ixa_broadcast_ttl = *invalp;
1474 		ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1475 		/* Handled automatically by ip_output */
1476 		break;
1477 	case MRT_INIT:
1478 	case MRT_DONE:
1479 	case MRT_ADD_VIF:
1480 	case MRT_DEL_VIF:
1481 	case MRT_ADD_MFC:
1482 	case MRT_DEL_MFC:
1483 	case MRT_ASSERT:
1484 		if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1485 			return (error);
1486 		}
1487 		error = ip_mrouter_set((int)name, connp, checkonly,
1488 		    (uchar_t *)invalp, inlen);
1489 		if (error) {
1490 			return (error);
1491 		}
1492 		return (0);
1493 
1494 	}
1495 	return (0);
1496 }
1497 
1498 /* Handle IPPROTO_IPV6 */
1499 static int
1500 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1501     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1502 {
1503 	conn_t		*connp = coa->coa_connp;
1504 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1505 	ip_pkt_t	*ipp = coa->coa_ipp;
1506 	int		*i1 = (int *)invalp;
1507 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1508 	uint_t		ifindex;
1509 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1510 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1511 	int		error;
1512 
1513 	if (connp->conn_family != AF_INET6)
1514 		return (EINVAL);
1515 
1516 	switch (name) {
1517 	case IPV6_MULTICAST_IF:
1518 		/*
1519 		 * The only possible error is EINVAL.
1520 		 * We call this option on both V4 and V6
1521 		 * If both fail, then this call returns
1522 		 * EINVAL. If at least one of them succeeds we
1523 		 * return success.
1524 		 */
1525 		ifindex = *(uint_t *)i1;
1526 
1527 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1528 		    !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1529 			return (EINVAL);
1530 		break;
1531 	case IPV6_UNICAST_HOPS:
1532 		/* Don't allow zero. -1 means to use default */
1533 		if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1534 			return (EINVAL);
1535 		break;
1536 	case IPV6_MULTICAST_HOPS:
1537 		/* -1 means use default */
1538 		if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1539 			return (EINVAL);
1540 		break;
1541 	case IPV6_MULTICAST_LOOP:
1542 		if (*i1 != 0 && *i1 != 1)
1543 			return (EINVAL);
1544 		break;
1545 	case IPV6_BOUND_IF:
1546 		ifindex = *(uint_t *)i1;
1547 
1548 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1549 			return (ENXIO);
1550 		break;
1551 	case IPV6_PKTINFO: {
1552 		struct in6_pktinfo *pkti;
1553 		boolean_t isv6;
1554 
1555 		if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1556 			return (EINVAL);
1557 		if (inlen == 0)
1558 			break;	/* Clear values below */
1559 
1560 		/*
1561 		 * Verify the source address and ifindex. Privileged users
1562 		 * can use any source address.
1563 		 */
1564 		pkti = (struct in6_pktinfo *)invalp;
1565 
1566 		/*
1567 		 * For link-local addresses we use the ipi6_ifindex when
1568 		 * we verify the local address.
1569 		 * If net_rawaccess then any source address can be used.
1570 		 */
1571 		if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1572 		    secpolicy_net_rawaccess(cr) != 0) {
1573 			uint_t scopeid = 0;
1574 			in6_addr_t *v6src = &pkti->ipi6_addr;
1575 			ipaddr_t v4src;
1576 			ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1577 
1578 			if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1579 				IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1580 				if (v4src != INADDR_ANY) {
1581 					laddr_type = ip_laddr_verify_v4(v4src,
1582 					    zoneid, ipst, B_FALSE);
1583 				}
1584 			} else {
1585 				if (IN6_IS_ADDR_LINKSCOPE(v6src))
1586 					scopeid = pkti->ipi6_ifindex;
1587 
1588 				laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1589 				    ipst, B_FALSE, scopeid);
1590 			}
1591 			switch (laddr_type) {
1592 			case IPVL_UNICAST_UP:
1593 			case IPVL_UNICAST_DOWN:
1594 				break;
1595 			default:
1596 				return (EADDRNOTAVAIL);
1597 			}
1598 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1599 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1600 			/* Allow any source */
1601 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1602 		}
1603 		isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1604 		if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1605 		    ipst))
1606 			return (ENXIO);
1607 		break;
1608 	}
1609 	case IPV6_HOPLIMIT:
1610 		/* It is only allowed as ancilary data */
1611 		if (!coa->coa_ancillary)
1612 			return (EINVAL);
1613 
1614 		if (inlen != 0 && inlen != sizeof (int))
1615 			return (EINVAL);
1616 		if (inlen == sizeof (int)) {
1617 			if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1618 				return (EINVAL);
1619 		}
1620 		break;
1621 	case IPV6_TCLASS:
1622 		if (inlen != 0 && inlen != sizeof (int))
1623 			return (EINVAL);
1624 		if (inlen == sizeof (int)) {
1625 			if (*i1 > 255 || *i1 < -1)
1626 				return (EINVAL);
1627 		}
1628 		break;
1629 	case IPV6_NEXTHOP:
1630 		if (inlen != 0 && inlen != sizeof (sin6_t))
1631 			return (EINVAL);
1632 		if (inlen == sizeof (sin6_t)) {
1633 			sin6_t *sin6 = (sin6_t *)invalp;
1634 			ire_t	*ire;
1635 
1636 			if (sin6->sin6_family != AF_INET6)
1637 				return (EAFNOSUPPORT);
1638 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1639 				return (EADDRNOTAVAIL);
1640 
1641 			/* Verify that the next-hop is on-link */
1642 			ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1643 			    0, 0, IRE_ONLINK, NULL, zoneid,
1644 			    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1645 			if (ire == NULL)
1646 				return (EHOSTUNREACH);
1647 			ire_refrele(ire);
1648 			break;
1649 		}
1650 		break;
1651 	case IPV6_RTHDR:
1652 	case IPV6_DSTOPTS:
1653 	case IPV6_RTHDRDSTOPTS:
1654 	case IPV6_HOPOPTS: {
1655 		/* All have the length field in the same place */
1656 		ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1657 		/*
1658 		 * Sanity checks - minimum size, size a multiple of
1659 		 * eight bytes, and matching size passed in.
1660 		 */
1661 		if (inlen != 0 &&
1662 		    inlen != (8 * (hopts->ip6h_len + 1)))
1663 			return (EINVAL);
1664 		break;
1665 	}
1666 	case IPV6_PATHMTU:
1667 		/* Can't be set */
1668 		return (EINVAL);
1669 
1670 	case IPV6_USE_MIN_MTU:
1671 		if (inlen != sizeof (int))
1672 			return (EINVAL);
1673 		if (*i1 < -1 || *i1 > 1)
1674 			return (EINVAL);
1675 		break;
1676 	case IPV6_SRC_PREFERENCES:
1677 		if (inlen != sizeof (uint32_t))
1678 			return (EINVAL);
1679 		break;
1680 	case IPV6_V6ONLY:
1681 		if (*i1 < 0 || *i1 > 1) {
1682 			return (EINVAL);
1683 		}
1684 		break;
1685 	}
1686 	if (checkonly)
1687 		return (0);
1688 
1689 	/* Here we set the actual option value */
1690 	/*
1691 	 * conn_lock protects the bitfields, and is used to
1692 	 * set the fields atomically. Not needed for ixa settings since
1693 	 * the caller has an exclusive copy of the ixa.
1694 	 * We can not hold conn_lock across the multicast options though.
1695 	 */
1696 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1697 	switch (name) {
1698 	case IPV6_MULTICAST_IF:
1699 		ixa->ixa_multicast_ifindex = ifindex;
1700 		/* Need to redo ip_attr_connect */
1701 		coa->coa_changed |= COA_ROUTE_CHANGED;
1702 		break;
1703 	case IPV6_UNICAST_HOPS:
1704 		/* -1 means use default */
1705 		mutex_enter(&connp->conn_lock);
1706 		if (*i1 == -1) {
1707 			ipp->ipp_unicast_hops = connp->conn_default_ttl;
1708 		} else {
1709 			ipp->ipp_unicast_hops = (uint8_t)*i1;
1710 		}
1711 		mutex_exit(&connp->conn_lock);
1712 		coa->coa_changed |= COA_HEADER_CHANGED;
1713 		break;
1714 	case IPV6_MULTICAST_HOPS:
1715 		/* -1 means use default */
1716 		if (*i1 == -1) {
1717 			ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1718 		} else {
1719 			ixa->ixa_multicast_ttl = (uint8_t)*i1;
1720 		}
1721 		/* Handled automatically by ip_output */
1722 		break;
1723 	case IPV6_MULTICAST_LOOP:
1724 		if (*i1 != 0)
1725 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1726 		else
1727 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1728 		/* Handled automatically by ip_output */
1729 		break;
1730 	case IPV6_JOIN_GROUP:
1731 	case IPV6_LEAVE_GROUP:
1732 	case MCAST_JOIN_GROUP:
1733 	case MCAST_LEAVE_GROUP:
1734 		return (ip_opt_set_multicast_group(connp, name,
1735 		    invalp, B_TRUE, checkonly));
1736 
1737 	case MCAST_BLOCK_SOURCE:
1738 	case MCAST_UNBLOCK_SOURCE:
1739 	case MCAST_JOIN_SOURCE_GROUP:
1740 	case MCAST_LEAVE_SOURCE_GROUP:
1741 		return (ip_opt_set_multicast_sources(connp, name,
1742 		    invalp, B_TRUE, checkonly));
1743 
1744 	case IPV6_BOUND_IF:
1745 		ixa->ixa_ifindex = ifindex;		/* Send */
1746 		mutex_enter(&connp->conn_lock);
1747 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1748 		connp->conn_bound_if = ifindex;		/* getsockopt */
1749 		mutex_exit(&connp->conn_lock);
1750 		coa->coa_changed |= COA_ROUTE_CHANGED;
1751 		break;
1752 	case IPV6_UNSPEC_SRC:
1753 		mutex_enter(&connp->conn_lock);
1754 		connp->conn_unspec_src = onoff;
1755 		if (onoff)
1756 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1757 		else
1758 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1759 		mutex_exit(&connp->conn_lock);
1760 		break;
1761 	case IPV6_RECVPKTINFO:
1762 		mutex_enter(&connp->conn_lock);
1763 		connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1764 		mutex_exit(&connp->conn_lock);
1765 		break;
1766 	case IPV6_RECVTCLASS:
1767 		mutex_enter(&connp->conn_lock);
1768 		connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1769 		mutex_exit(&connp->conn_lock);
1770 		break;
1771 	case IPV6_RECVPATHMTU:
1772 		mutex_enter(&connp->conn_lock);
1773 		connp->conn_ipv6_recvpathmtu = onoff;
1774 		mutex_exit(&connp->conn_lock);
1775 		break;
1776 	case IPV6_RECVHOPLIMIT:
1777 		mutex_enter(&connp->conn_lock);
1778 		connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1779 		    onoff;
1780 		mutex_exit(&connp->conn_lock);
1781 		break;
1782 	case IPV6_RECVHOPOPTS:
1783 		mutex_enter(&connp->conn_lock);
1784 		connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1785 		mutex_exit(&connp->conn_lock);
1786 		break;
1787 	case IPV6_RECVDSTOPTS:
1788 		mutex_enter(&connp->conn_lock);
1789 		connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1790 		mutex_exit(&connp->conn_lock);
1791 		break;
1792 	case _OLD_IPV6_RECVDSTOPTS:
1793 		mutex_enter(&connp->conn_lock);
1794 		connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1795 		    onoff;
1796 		mutex_exit(&connp->conn_lock);
1797 		break;
1798 	case IPV6_RECVRTHDRDSTOPTS:
1799 		mutex_enter(&connp->conn_lock);
1800 		connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1801 		    onoff;
1802 		mutex_exit(&connp->conn_lock);
1803 		break;
1804 	case IPV6_RECVRTHDR:
1805 		mutex_enter(&connp->conn_lock);
1806 		connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1807 		mutex_exit(&connp->conn_lock);
1808 		break;
1809 	case IPV6_PKTINFO:
1810 		mutex_enter(&connp->conn_lock);
1811 		if (inlen == 0) {
1812 			ipp->ipp_fields &= ~IPPF_ADDR;
1813 			ipp->ipp_addr = ipv6_all_zeros;
1814 			ixa->ixa_ifindex = 0;
1815 		} else {
1816 			struct in6_pktinfo *pkti;
1817 
1818 			pkti = (struct in6_pktinfo *)invalp;
1819 			ipp->ipp_addr = pkti->ipi6_addr;
1820 			if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1821 				ipp->ipp_fields |= IPPF_ADDR;
1822 			else
1823 				ipp->ipp_fields &= ~IPPF_ADDR;
1824 			ixa->ixa_ifindex = pkti->ipi6_ifindex;
1825 		}
1826 		mutex_exit(&connp->conn_lock);
1827 		/* Source and ifindex might have changed */
1828 		coa->coa_changed |= COA_HEADER_CHANGED;
1829 		coa->coa_changed |= COA_ROUTE_CHANGED;
1830 		break;
1831 	case IPV6_HOPLIMIT:
1832 		mutex_enter(&connp->conn_lock);
1833 		if (inlen == 0 || *i1 == -1) {
1834 			/* Revert to default */
1835 			ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1836 			ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1837 		} else {
1838 			ipp->ipp_hoplimit = *i1;
1839 			ipp->ipp_fields |= IPPF_HOPLIMIT;
1840 			/* Ensure that it sticks for multicast packets */
1841 			ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1842 		}
1843 		mutex_exit(&connp->conn_lock);
1844 		coa->coa_changed |= COA_HEADER_CHANGED;
1845 		break;
1846 	case IPV6_TCLASS:
1847 		/*
1848 		 * IPV6_TCLASS accepts -1 as use kernel default
1849 		 * and [0, 255] as the actualy traffic class.
1850 		 */
1851 		mutex_enter(&connp->conn_lock);
1852 		if (inlen == 0 || *i1 == -1) {
1853 			ipp->ipp_tclass = 0;
1854 			ipp->ipp_fields &= ~IPPF_TCLASS;
1855 		} else {
1856 			ipp->ipp_tclass = *i1;
1857 			ipp->ipp_fields |= IPPF_TCLASS;
1858 		}
1859 		mutex_exit(&connp->conn_lock);
1860 		coa->coa_changed |= COA_HEADER_CHANGED;
1861 		break;
1862 	case IPV6_NEXTHOP:
1863 		if (inlen == 0) {
1864 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1865 		} else {
1866 			sin6_t *sin6 = (sin6_t *)invalp;
1867 
1868 			ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1869 			if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1870 				ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1871 			else
1872 				ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1873 		}
1874 		coa->coa_changed |= COA_ROUTE_CHANGED;
1875 		break;
1876 	case IPV6_HOPOPTS:
1877 		mutex_enter(&connp->conn_lock);
1878 		error = optcom_pkt_set(invalp, inlen,
1879 		    (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1880 		if (error != 0) {
1881 			mutex_exit(&connp->conn_lock);
1882 			return (error);
1883 		}
1884 		if (ipp->ipp_hopoptslen == 0) {
1885 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1886 		} else {
1887 			ipp->ipp_fields |= IPPF_HOPOPTS;
1888 		}
1889 		mutex_exit(&connp->conn_lock);
1890 		coa->coa_changed |= COA_HEADER_CHANGED;
1891 		coa->coa_changed |= COA_WROFF_CHANGED;
1892 		break;
1893 	case IPV6_RTHDRDSTOPTS:
1894 		mutex_enter(&connp->conn_lock);
1895 		error = optcom_pkt_set(invalp, inlen,
1896 		    (uchar_t **)&ipp->ipp_rthdrdstopts,
1897 		    &ipp->ipp_rthdrdstoptslen);
1898 		if (error != 0) {
1899 			mutex_exit(&connp->conn_lock);
1900 			return (error);
1901 		}
1902 		if (ipp->ipp_rthdrdstoptslen == 0) {
1903 			ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1904 		} else {
1905 			ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1906 		}
1907 		mutex_exit(&connp->conn_lock);
1908 		coa->coa_changed |= COA_HEADER_CHANGED;
1909 		coa->coa_changed |= COA_WROFF_CHANGED;
1910 		break;
1911 	case IPV6_DSTOPTS:
1912 		mutex_enter(&connp->conn_lock);
1913 		error = optcom_pkt_set(invalp, inlen,
1914 		    (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1915 		if (error != 0) {
1916 			mutex_exit(&connp->conn_lock);
1917 			return (error);
1918 		}
1919 		if (ipp->ipp_dstoptslen == 0) {
1920 			ipp->ipp_fields &= ~IPPF_DSTOPTS;
1921 		} else {
1922 			ipp->ipp_fields |= IPPF_DSTOPTS;
1923 		}
1924 		mutex_exit(&connp->conn_lock);
1925 		coa->coa_changed |= COA_HEADER_CHANGED;
1926 		coa->coa_changed |= COA_WROFF_CHANGED;
1927 		break;
1928 	case IPV6_RTHDR:
1929 		mutex_enter(&connp->conn_lock);
1930 		error = optcom_pkt_set(invalp, inlen,
1931 		    (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1932 		if (error != 0) {
1933 			mutex_exit(&connp->conn_lock);
1934 			return (error);
1935 		}
1936 		if (ipp->ipp_rthdrlen == 0) {
1937 			ipp->ipp_fields &= ~IPPF_RTHDR;
1938 		} else {
1939 			ipp->ipp_fields |= IPPF_RTHDR;
1940 		}
1941 		mutex_exit(&connp->conn_lock);
1942 		coa->coa_changed |= COA_HEADER_CHANGED;
1943 		coa->coa_changed |= COA_WROFF_CHANGED;
1944 		break;
1945 
1946 	case IPV6_DONTFRAG:
1947 		if (onoff) {
1948 			ixa->ixa_flags |= IXAF_DONTFRAG;
1949 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1950 		} else {
1951 			ixa->ixa_flags &= ~IXAF_DONTFRAG;
1952 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1953 		}
1954 		/* Need to redo ip_attr_connect */
1955 		coa->coa_changed |= COA_ROUTE_CHANGED;
1956 		break;
1957 
1958 	case IPV6_USE_MIN_MTU:
1959 		ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1960 		ixa->ixa_use_min_mtu = *i1;
1961 		/* Need to redo ip_attr_connect */
1962 		coa->coa_changed |= COA_ROUTE_CHANGED;
1963 		break;
1964 
1965 	case IPV6_SEC_OPT:
1966 		mutex_enter(&connp->conn_lock);
1967 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1968 		mutex_exit(&connp->conn_lock);
1969 		if (error != 0) {
1970 			return (error);
1971 		}
1972 		/* This is an IPsec policy change - redo ip_attr_connect */
1973 		coa->coa_changed |= COA_ROUTE_CHANGED;
1974 		break;
1975 	case IPV6_SRC_PREFERENCES:
1976 		/*
1977 		 * This socket option only affects connected
1978 		 * sockets that haven't already bound to a specific
1979 		 * IPv6 address.  In other words, sockets that
1980 		 * don't call bind() with an address other than the
1981 		 * unspecified address and that call connect().
1982 		 * ip_set_destination_v6() passes these preferences
1983 		 * to the ipif_select_source_v6() function.
1984 		 */
1985 		mutex_enter(&connp->conn_lock);
1986 		error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1987 		mutex_exit(&connp->conn_lock);
1988 		if (error != 0) {
1989 			return (error);
1990 		}
1991 		break;
1992 	case IPV6_V6ONLY:
1993 		mutex_enter(&connp->conn_lock);
1994 		connp->conn_ipv6_v6only = onoff;
1995 		mutex_exit(&connp->conn_lock);
1996 		break;
1997 	}
1998 	return (0);
1999 }
2000 
2001 /* Handle IPPROTO_UDP */
2002 /* ARGSUSED1 */
2003 static int
2004 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2005     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2006 {
2007 	conn_t		*connp = coa->coa_connp;
2008 	int		*i1 = (int *)invalp;
2009 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2010 	int		error;
2011 
2012 	switch (name) {
2013 	case UDP_ANONPRIVBIND:
2014 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2015 			return (error);
2016 		}
2017 		break;
2018 	}
2019 	if (checkonly)
2020 		return (0);
2021 
2022 	/* Here we set the actual option value */
2023 	mutex_enter(&connp->conn_lock);
2024 	switch (name) {
2025 	case UDP_ANONPRIVBIND:
2026 		connp->conn_anon_priv_bind = onoff;
2027 		break;
2028 	case UDP_EXCLBIND:
2029 		connp->conn_exclbind = onoff;
2030 		break;
2031 	}
2032 	mutex_exit(&connp->conn_lock);
2033 	return (0);
2034 }
2035 
2036 /* Handle IPPROTO_TCP */
2037 /* ARGSUSED1 */
2038 static int
2039 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2040     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2041 {
2042 	conn_t		*connp = coa->coa_connp;
2043 	int		*i1 = (int *)invalp;
2044 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2045 	int		error;
2046 
2047 	switch (name) {
2048 	case TCP_ANONPRIVBIND:
2049 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2050 			return (error);
2051 		}
2052 		break;
2053 	}
2054 	if (checkonly)
2055 		return (0);
2056 
2057 	/* Here we set the actual option value */
2058 	mutex_enter(&connp->conn_lock);
2059 	switch (name) {
2060 	case TCP_ANONPRIVBIND:
2061 		connp->conn_anon_priv_bind = onoff;
2062 		break;
2063 	case TCP_EXCLBIND:
2064 		connp->conn_exclbind = onoff;
2065 		break;
2066 	case TCP_RECVDSTADDR:
2067 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2068 		break;
2069 	}
2070 	mutex_exit(&connp->conn_lock);
2071 	return (0);
2072 }
2073 
2074 int
2075 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2076 {
2077 	sin_t		*sin;
2078 	sin6_t		*sin6;
2079 
2080 	if (connp->conn_family == AF_INET) {
2081 		if (*salenp < sizeof (sin_t))
2082 			return (EINVAL);
2083 
2084 		*salenp = sizeof (sin_t);
2085 		/* Fill zeroes and then initialize non-zero fields */
2086 		sin = (sin_t *)sa;
2087 		*sin = sin_null;
2088 		sin->sin_family = AF_INET;
2089 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2090 		    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2091 			sin->sin_addr.s_addr = connp->conn_saddr_v4;
2092 		} else {
2093 			/*
2094 			 * INADDR_ANY
2095 			 * conn_saddr is not set, we might be bound to
2096 			 * broadcast/multicast. Use conn_bound_addr as
2097 			 * local address instead (that could
2098 			 * also still be INADDR_ANY)
2099 			 */
2100 			sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2101 		}
2102 		sin->sin_port = connp->conn_lport;
2103 	} else {
2104 		if (*salenp < sizeof (sin6_t))
2105 			return (EINVAL);
2106 
2107 		*salenp = sizeof (sin6_t);
2108 		/* Fill zeroes and then initialize non-zero fields */
2109 		sin6 = (sin6_t *)sa;
2110 		*sin6 = sin6_null;
2111 		sin6->sin6_family = AF_INET6;
2112 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2113 			sin6->sin6_addr = connp->conn_saddr_v6;
2114 		} else {
2115 			/*
2116 			 * conn_saddr is not set, we might be bound to
2117 			 * broadcast/multicast. Use conn_bound_addr as
2118 			 * local address instead (which could
2119 			 * also still be unspecified)
2120 			 */
2121 			sin6->sin6_addr = connp->conn_bound_addr_v6;
2122 		}
2123 		sin6->sin6_port = connp->conn_lport;
2124 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2125 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2126 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2127 	}
2128 	return (0);
2129 }
2130 
2131 int
2132 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2133 {
2134 	struct sockaddr_in	*sin;
2135 	struct sockaddr_in6	*sin6;
2136 
2137 	if (connp->conn_family == AF_INET) {
2138 		if (*salenp < sizeof (sin_t))
2139 			return (EINVAL);
2140 
2141 		*salenp = sizeof (sin_t);
2142 		/* initialize */
2143 		sin = (sin_t *)sa;
2144 		*sin = sin_null;
2145 		sin->sin_family = AF_INET;
2146 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
2147 		sin->sin_port = connp->conn_fport;
2148 	} else {
2149 		if (*salenp < sizeof (sin6_t))
2150 			return (EINVAL);
2151 
2152 		*salenp = sizeof (sin6_t);
2153 		/* initialize */
2154 		sin6 = (sin6_t *)sa;
2155 		*sin6 = sin6_null;
2156 		sin6->sin6_family = AF_INET6;
2157 		sin6->sin6_addr = connp->conn_faddr_v6;
2158 		sin6->sin6_port =  connp->conn_fport;
2159 		sin6->sin6_flowinfo = connp->conn_flowinfo;
2160 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2161 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2162 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2163 	}
2164 	return (0);
2165 }
2166 
2167 static uint32_t	cksum_massage_options_v4(ipha_t *, netstack_t *);
2168 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2169 
2170 /*
2171  * Allocate and fill in conn_ht_iphc based on the current information
2172  * in the conn.
2173  * Normally used when we bind() and connect().
2174  * Returns failure if can't allocate memory, or if there is a problem
2175  * with a routing header/option.
2176  *
2177  * We allocate space for the transport header (ulp_hdr_len + extra) and
2178  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2179  * The extra is there for transports that want some spare room for future
2180  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2181  * excludes the extra part.
2182  *
2183  * We massage an routing option/header and store the ckecksum difference
2184  * in conn_sum.
2185  *
2186  * Caller needs to update conn_wroff if desired.
2187  */
2188 int
2189 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2190     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2191 {
2192 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2193 	ip_pkt_t	*ipp = &connp->conn_xmit_ipp;
2194 	uint_t		ip_hdr_length;
2195 	uchar_t		*hdrs;
2196 	uint_t		hdrs_len;
2197 
2198 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2199 
2200 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2201 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2202 		/* In case of TX label and IP options it can be too much */
2203 		if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2204 			/* Preserves existing TX errno for this */
2205 			return (EHOSTUNREACH);
2206 		}
2207 	} else {
2208 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2209 	}
2210 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2211 	hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2212 	ASSERT(hdrs_len != 0);
2213 
2214 	if (hdrs_len != connp->conn_ht_iphc_allocated) {
2215 		/* Allocate new before we free any old */
2216 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2217 		if (hdrs == NULL)
2218 			return (ENOMEM);
2219 
2220 		if (connp->conn_ht_iphc != NULL) {
2221 			kmem_free(connp->conn_ht_iphc,
2222 			    connp->conn_ht_iphc_allocated);
2223 		}
2224 		connp->conn_ht_iphc = hdrs;
2225 		connp->conn_ht_iphc_allocated = hdrs_len;
2226 	} else {
2227 		hdrs = connp->conn_ht_iphc;
2228 	}
2229 	hdrs_len -= extra;
2230 	connp->conn_ht_iphc_len = hdrs_len;
2231 
2232 	connp->conn_ht_ulp = hdrs + ip_hdr_length;
2233 	connp->conn_ht_ulp_len = ulp_hdr_length;
2234 
2235 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2236 		ipha_t	*ipha = (ipha_t *)hdrs;
2237 
2238 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2239 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2240 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2241 		ipha->ipha_length = htons(hdrs_len);
2242 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2243 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2244 		else
2245 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2246 
2247 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2248 			connp->conn_sum = cksum_massage_options_v4(ipha,
2249 			    connp->conn_netstack);
2250 		} else {
2251 			connp->conn_sum = 0;
2252 		}
2253 	} else {
2254 		ip6_t	*ip6h = (ip6_t *)hdrs;
2255 
2256 		ip6h->ip6_src = *v6src;
2257 		ip6h->ip6_dst = *v6dst;
2258 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2259 		    flowinfo);
2260 		ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2261 
2262 		if (ipp->ipp_fields & IPPF_RTHDR) {
2263 			connp->conn_sum = cksum_massage_options_v6(ip6h,
2264 			    ip_hdr_length, connp->conn_netstack);
2265 
2266 			/*
2267 			 * Verify that the first hop isn't a mapped address.
2268 			 * Routers along the path need to do this verification
2269 			 * for subsequent hops.
2270 			 */
2271 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2272 				return (EADDRNOTAVAIL);
2273 
2274 		} else {
2275 			connp->conn_sum = 0;
2276 		}
2277 	}
2278 	return (0);
2279 }
2280 
2281 /*
2282  * Prepend a header template to data_mp based on the ip_pkt_t
2283  * and the passed in source, destination and protocol.
2284  *
2285  * Returns failure if can't allocate memory, in which case data_mp is freed.
2286  * We allocate space for the transport header (ulp_hdr_len) and
2287  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2288  *
2289  * We massage an routing option/header and return the ckecksum difference
2290  * in *sump. This is in host byte order.
2291  *
2292  * Caller needs to update conn_wroff if desired.
2293  */
2294 mblk_t *
2295 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2296     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2297     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2298     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2299 {
2300 	uint_t		ip_hdr_length;
2301 	uchar_t		*hdrs;
2302 	uint_t		hdrs_len;
2303 	mblk_t		*mp;
2304 
2305 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2306 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2307 		ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2308 	} else {
2309 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2310 	}
2311 	hdrs_len = ip_hdr_length + ulp_hdr_length;
2312 	ASSERT(hdrs_len != 0);
2313 
2314 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2315 
2316 	/* Can we prepend to data_mp? */
2317 	if (data_mp != NULL &&
2318 	    data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2319 	    data_mp->b_datap->db_ref == 1) {
2320 		hdrs = data_mp->b_rptr - hdrs_len;
2321 		data_mp->b_rptr = hdrs;
2322 		mp = data_mp;
2323 	} else {
2324 		mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2325 		if (mp == NULL) {
2326 			freemsg(data_mp);
2327 			*errorp = ENOMEM;
2328 			return (NULL);
2329 		}
2330 		mp->b_wptr = mp->b_datap->db_lim;
2331 		hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2332 		mp->b_cont = data_mp;
2333 	}
2334 
2335 	/*
2336 	 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2337 	 * if PKTINFO (aka IPPF_ADDR) was set.
2338 	 */
2339 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2340 		ipha_t *ipha = (ipha_t *)hdrs;
2341 
2342 		ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2343 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2344 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2345 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2346 		ipha->ipha_length = htons(hdrs_len + data_length);
2347 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2348 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2349 		else
2350 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2351 
2352 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2353 			*sump = cksum_massage_options_v4(ipha,
2354 			    ixa->ixa_ipst->ips_netstack);
2355 		} else {
2356 			*sump = 0;
2357 		}
2358 	} else {
2359 		ip6_t *ip6h = (ip6_t *)hdrs;
2360 
2361 		ip6h->ip6_src = *v6src;
2362 		ip6h->ip6_dst = *v6dst;
2363 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2364 		ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2365 
2366 		if (ipp->ipp_fields & IPPF_RTHDR) {
2367 			*sump = cksum_massage_options_v6(ip6h,
2368 			    ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2369 
2370 			/*
2371 			 * Verify that the first hop isn't a mapped address.
2372 			 * Routers along the path need to do this verification
2373 			 * for subsequent hops.
2374 			 */
2375 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2376 				*errorp = EADDRNOTAVAIL;
2377 				freemsg(mp);
2378 				return (NULL);
2379 			}
2380 		} else {
2381 			*sump = 0;
2382 		}
2383 	}
2384 	return (mp);
2385 }
2386 
2387 /*
2388  * Massage a source route if any putting the first hop
2389  * in ipha_dst. Compute a starting value for the checksum which
2390  * takes into account that the original ipha_dst should be
2391  * included in the checksum but that IP will include the
2392  * first hop from the source route in the tcp checksum.
2393  */
2394 static uint32_t
2395 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2396 {
2397 	in_addr_t	dst;
2398 	uint32_t	cksum;
2399 
2400 	/* Get last hop then diff against first hop */
2401 	cksum = ip_massage_options(ipha, ns);
2402 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2403 	dst = ipha->ipha_dst;
2404 	cksum -= ((dst >> 16) + (dst & 0xffff));
2405 	if ((int)cksum < 0)
2406 		cksum--;
2407 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2408 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2409 	ASSERT(cksum < 0x10000);
2410 	return (ntohs(cksum));
2411 }
2412 
2413 static uint32_t
2414 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2415 {
2416 	uint8_t		*end;
2417 	ip6_rthdr_t	*rth;
2418 	uint32_t	cksum;
2419 
2420 	end = (uint8_t *)ip6h + ip_hdr_len;
2421 	rth = ip_find_rthdr_v6(ip6h, end);
2422 	if (rth == NULL)
2423 		return (0);
2424 
2425 	cksum = ip_massage_options_v6(ip6h, rth, ns);
2426 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2427 	ASSERT(cksum < 0x10000);
2428 	return (ntohs(cksum));
2429 }
2430 
2431 /*
2432  * ULPs that change the destination address need to call this for each
2433  * change to discard any state about a previous destination that might
2434  * have been multicast or multirt.
2435  */
2436 void
2437 ip_attr_newdst(ip_xmit_attr_t *ixa)
2438 {
2439 	ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2440 	    IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2441 	    IXAF_NO_LOOP_ZONEID_SET);
2442 }
2443 
2444 /*
2445  * Determine the nexthop which will be used.
2446  * Normally this is just the destination, but if a IPv4 source route, or
2447  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2448  * there.
2449  */
2450 void
2451 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2452     const in6_addr_t *dst, in6_addr_t *nexthop)
2453 {
2454 	if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2455 		*nexthop = *dst;
2456 		return;
2457 	}
2458 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2459 		ipaddr_t v4dst;
2460 		ipaddr_t v4nexthop;
2461 
2462 		IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2463 		v4nexthop = ip_pkt_source_route_v4(ipp);
2464 		if (v4nexthop == INADDR_ANY)
2465 			v4nexthop = v4dst;
2466 
2467 		IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2468 	} else {
2469 		const in6_addr_t *v6nexthop;
2470 
2471 		v6nexthop = ip_pkt_source_route_v6(ipp);
2472 		if (v6nexthop == NULL)
2473 			v6nexthop = dst;
2474 
2475 		*nexthop = *v6nexthop;
2476 	}
2477 }
2478 
2479 /*
2480  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2481  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2482  * case (connected latching is done in conn_connect).
2483  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2484  * set, but doesn't otherwise use the conn_t.
2485  *
2486  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2487  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2488  *
2489  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2490  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2491  *
2492  * Updates laddrp and uinfo if they are non-NULL.
2493  *
2494  * TSOL notes: The callers if ip_attr_connect must check if the destination
2495  * is different than before and in that case redo conn_update_label.
2496  * The callers of conn_connect do not need that since conn_connect
2497  * performs the conn_update_label.
2498  */
2499 int
2500 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2501     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2502     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2503     iulp_t *uinfo, uint32_t flags)
2504 {
2505 	in6_addr_t		laddr = *v6src;
2506 	int			error;
2507 
2508 	ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2509 
2510 	if (connp->conn_zone_is_global)
2511 		flags |= IPDF_ZONE_IS_GLOBAL;
2512 	else
2513 		flags &= ~IPDF_ZONE_IS_GLOBAL;
2514 
2515 	/*
2516 	 * Lookup the route to determine a source address and the uinfo.
2517 	 * If the ULP has a source route option then the caller will
2518 	 * have set v6nexthop to be the first hop.
2519 	 */
2520 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2521 		ipaddr_t v4dst;
2522 		ipaddr_t v4src, v4nexthop;
2523 
2524 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2525 		IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2526 		IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2527 
2528 		if (connp->conn_unspec_src || v4src != INADDR_ANY)
2529 			flags &= ~IPDF_SELECT_SRC;
2530 		else
2531 			flags |= IPDF_SELECT_SRC;
2532 
2533 		error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2534 		    uinfo, flags, connp->conn_mac_mode);
2535 		IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2536 	} else {
2537 		if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2538 			flags &= ~IPDF_SELECT_SRC;
2539 		else
2540 			flags |= IPDF_SELECT_SRC;
2541 
2542 		error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2543 		    uinfo, flags, connp->conn_mac_mode);
2544 	}
2545 	/* Pass out some address even if we hit a RTF_REJECT etc */
2546 	if (laddrp != NULL)
2547 		*laddrp = laddr;
2548 
2549 	if (error != 0)
2550 		return (error);
2551 
2552 	if (flags & IPDF_IPSEC) {
2553 		/*
2554 		 * Set any IPsec policy in ixa. Routine also looks at ULP
2555 		 * ports.
2556 		 */
2557 		ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2558 	}
2559 	return (0);
2560 }
2561 
2562 /*
2563  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2564  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2565  * usable for SCTP, since SCTP has multiple faddrs.
2566  *
2567  * Caller must hold conn_lock to provide atomic constency between the
2568  * conn_t's addresses and the ixa.
2569  * NOTE: this function drops and reaquires conn_lock since it can't be
2570  * held across ip_attr_connect/ip_set_destination.
2571  *
2572  * The caller needs to handle inserting in the receive-side fanout when
2573  * appropriate after conn_connect returns.
2574  */
2575 int
2576 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2577 {
2578 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2579 	in6_addr_t	nexthop;
2580 	in6_addr_t	saddr, faddr;
2581 	in_port_t	fport;
2582 	int		error;
2583 
2584 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2585 
2586 	if (connp->conn_ipversion == IPV4_VERSION)
2587 		ixa->ixa_flags |= IXAF_IS_IPV4;
2588 	else
2589 		ixa->ixa_flags &= ~IXAF_IS_IPV4;
2590 
2591 	/* We do IPsec latching below - hence no caching in ip_attr_connect */
2592 	flags &= ~IPDF_IPSEC;
2593 
2594 	/* In case we had previously done an ip_attr_connect */
2595 	ip_attr_newdst(ixa);
2596 
2597 	/*
2598 	 * Determine the nexthop and copy the addresses before dropping
2599 	 * conn_lock.
2600 	 */
2601 	ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2602 	    &connp->conn_faddr_v6, &nexthop);
2603 	saddr = connp->conn_saddr_v6;
2604 	faddr = connp->conn_faddr_v6;
2605 	fport = connp->conn_fport;
2606 
2607 	mutex_exit(&connp->conn_lock);
2608 	error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2609 	    &saddr, uinfo, flags | IPDF_VERIFY_DST);
2610 	mutex_enter(&connp->conn_lock);
2611 
2612 	/* Could have changed even if an error */
2613 	connp->conn_saddr_v6 = saddr;
2614 	if (error != 0)
2615 		return (error);
2616 
2617 	/*
2618 	 * Check whether Trusted Solaris policy allows communication with this
2619 	 * host, and pretend that the destination is unreachable if not.
2620 	 * Compute any needed label and place it in ipp_label_v4/v6.
2621 	 *
2622 	 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2623 	 * the packet.
2624 	 *
2625 	 * TSOL Note: Any concurrent threads would pick a different ixa
2626 	 * (and ipp if they are to change the ipp)  so we
2627 	 * don't have to worry about concurrent threads.
2628 	 */
2629 	if (is_system_labeled()) {
2630 		if (connp->conn_mlp_type != mlptSingle)
2631 			return (ECONNREFUSED);
2632 
2633 		/*
2634 		 * conn_update_label will set ipp_label* which will later
2635 		 * be used by conn_build_hdr_template.
2636 		 */
2637 		error = conn_update_label(connp, ixa,
2638 		    &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2639 		if (error != 0)
2640 			return (error);
2641 	}
2642 
2643 	/*
2644 	 * Ensure that we match on the selected local address.
2645 	 * This overrides conn_laddr in the case we had earlier bound to a
2646 	 * multicast or broadcast address.
2647 	 */
2648 	connp->conn_laddr_v6 = connp->conn_saddr_v6;
2649 
2650 	/*
2651 	 * Allow setting new policies.
2652 	 * The addresses/ports are already set, thus the IPsec policy calls
2653 	 * can handle their passed-in conn's.
2654 	 */
2655 	connp->conn_policy_cached = B_FALSE;
2656 
2657 	/*
2658 	 * Cache IPsec policy in this conn.  If we have per-socket policy,
2659 	 * we'll cache that.  If we don't, we'll inherit global policy.
2660 	 *
2661 	 * This is done before the caller inserts in the receive-side fanout.
2662 	 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2663 	 * for connections where we don't have a policy. This is to prevent
2664 	 * global policy lookups in the inbound path.
2665 	 *
2666 	 * If we insert before we set conn_policy_cached,
2667 	 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2668 	 * because global policy cound be non-empty. We normally call
2669 	 * ipsec_check_policy() for conn_policy_cached connections only if
2670 	 * conn_in_enforce_policy is set. But in this case,
2671 	 * conn_policy_cached can get set anytime since we made the
2672 	 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2673 	 * called, which will make the above assumption false.  Thus, we
2674 	 * need to insert after we set conn_policy_cached.
2675 	 */
2676 	error = ipsec_conn_cache_policy(connp,
2677 	    connp->conn_ipversion == IPV4_VERSION);
2678 	if (error != 0)
2679 		return (error);
2680 
2681 	/*
2682 	 * We defer to do LSO check until here since now we have better idea
2683 	 * whether IPsec is present. If the underlying ill is LSO capable,
2684 	 * copy its capability in so the ULP can decide whether to enable LSO
2685 	 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2686 	 * claim LSO for IPv6.
2687 	 *
2688 	 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2689 	 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2690 	 */
2691 	ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2692 
2693 	ASSERT(ixa->ixa_ire != NULL);
2694 	if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2695 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2696 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2697 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2698 	    (ixa->ixa_nce != NULL) &&
2699 	    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2700 	    ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2701 	    ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2702 		ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2703 		ixa->ixa_flags |= IXAF_LSO_CAPAB;
2704 	}
2705 
2706 	/* Check whether ZEROCOPY capability is usable for this connection. */
2707 	ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2708 
2709 	if ((flags & IPDF_ZCOPY) &&
2710 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2711 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2712 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2713 	    (ixa->ixa_nce != NULL) &&
2714 	    ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2715 		ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2716 	}
2717 	return (0);
2718 }
2719 
2720 /*
2721  * Predicates to check if the addresses match conn_last*
2722  */
2723 
2724 /*
2725  * Compare the conn against an address.
2726  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2727  */
2728 boolean_t
2729 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2730 {
2731 	ASSERT(connp->conn_family == AF_INET);
2732 	return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2733 	    sin->sin_port == connp->conn_lastdstport);
2734 }
2735 
2736 /*
2737  * Compare, including for mapped addresses
2738  */
2739 boolean_t
2740 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2741 {
2742 	return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2743 	    sin6->sin6_port == connp->conn_lastdstport &&
2744 	    sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2745 	    sin6->sin6_scope_id == connp->conn_lastscopeid);
2746 }
2747 
2748 /*
2749  * Compute a label and place it in the ip_packet_t.
2750  * Handles IPv4 and IPv6.
2751  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2752  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2753  * has been called.
2754  */
2755 int
2756 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2757     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2758 {
2759 	int		err;
2760 	ipaddr_t	v4dst;
2761 
2762 	if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2763 		uchar_t		opt_storage[IP_MAX_OPT_LENGTH];
2764 
2765 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2766 
2767 		err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2768 		    v4dst, opt_storage, ixa->ixa_ipst);
2769 		if (err == 0) {
2770 			/* Length contained in opt_storage[IPOPT_OLEN] */
2771 			err = optcom_pkt_set(opt_storage,
2772 			    opt_storage[IPOPT_OLEN],
2773 			    (uchar_t **)&ipp->ipp_label_v4,
2774 			    &ipp->ipp_label_len_v4);
2775 		}
2776 		if (err != 0) {
2777 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2778 			    char *, "conn(1) failed to update options(2) "
2779 			    "on ixa(3)",
2780 			    conn_t *, connp, char *, opt_storage,
2781 			    ip_xmit_attr_t *, ixa);
2782 		}
2783 		if (ipp->ipp_label_len_v4 != 0)
2784 			ipp->ipp_fields |= IPPF_LABEL_V4;
2785 		else
2786 			ipp->ipp_fields &= ~IPPF_LABEL_V4;
2787 	} else {
2788 		uchar_t		opt_storage[TSOL_MAX_IPV6_OPTION];
2789 		uint_t		optlen;
2790 
2791 		err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2792 		    v6dst, opt_storage, ixa->ixa_ipst);
2793 		if (err == 0) {
2794 			/*
2795 			 * Note that ipp_label_v6 is just the option - not
2796 			 * the hopopts extension header.
2797 			 *
2798 			 * Length contained in opt_storage[IPOPT_OLEN], but
2799 			 * that doesn't include the two byte options header.
2800 			 */
2801 			optlen = opt_storage[IPOPT_OLEN];
2802 			if (optlen != 0)
2803 				optlen += 2;
2804 
2805 			err = optcom_pkt_set(opt_storage, optlen,
2806 			    (uchar_t **)&ipp->ipp_label_v6,
2807 			    &ipp->ipp_label_len_v6);
2808 		}
2809 		if (err != 0) {
2810 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2811 			    char *, "conn(1) failed to update options(2) "
2812 			    "on ixa(3)",
2813 			    conn_t *, connp, char *, opt_storage,
2814 			    ip_xmit_attr_t *, ixa);
2815 		}
2816 		if (ipp->ipp_label_len_v6 != 0)
2817 			ipp->ipp_fields |= IPPF_LABEL_V6;
2818 		else
2819 			ipp->ipp_fields &= ~IPPF_LABEL_V6;
2820 	}
2821 	return (err);
2822 }
2823 
2824 /*
2825  * Inherit all options settings from the parent/listener to the eager.
2826  * Returns zero on success; ENOMEM if memory allocation failed.
2827  *
2828  * We assume that the eager has not had any work done i.e., the conn_ixa
2829  * and conn_xmit_ipp are all zero.
2830  * Furthermore we assume that no other thread can access the eager (because
2831  * it isn't inserted in any fanout list).
2832  */
2833 int
2834 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2835 {
2836 	cred_t	*credp;
2837 	int	err;
2838 	void	*notify_cookie;
2839 	uint32_t xmit_hint;
2840 
2841 	econnp->conn_family = lconnp->conn_family;
2842 	econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2843 	econnp->conn_wq = lconnp->conn_wq;
2844 	econnp->conn_rq = lconnp->conn_rq;
2845 
2846 	/*
2847 	 * Make a safe copy of the transmit attributes.
2848 	 * conn_connect will later be used by the caller to setup the ire etc.
2849 	 */
2850 	ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2851 	ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2852 	ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2853 	ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2854 
2855 	/* Preserve ixa_notify_cookie and xmit_hint */
2856 	notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2857 	xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2858 	ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2859 	econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2860 	econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2861 
2862 	econnp->conn_bound_if = lconnp->conn_bound_if;
2863 	econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2864 
2865 	/* Inherit all RECV options */
2866 	econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2867 
2868 	err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2869 	    KM_NOSLEEP);
2870 	if (err != 0)
2871 		return (err);
2872 
2873 	econnp->conn_zoneid = lconnp->conn_zoneid;
2874 	econnp->conn_allzones = lconnp->conn_allzones;
2875 
2876 	/* This is odd. Pick a flowlabel for each connection instead? */
2877 	econnp->conn_flowinfo = lconnp->conn_flowinfo;
2878 
2879 	econnp->conn_default_ttl = lconnp->conn_default_ttl;
2880 
2881 	/*
2882 	 * TSOL: tsol_input_proc() needs the eager's cred before the
2883 	 * eager is accepted
2884 	 */
2885 	ASSERT(lconnp->conn_cred != NULL);
2886 	econnp->conn_cred = credp = lconnp->conn_cred;
2887 	crhold(credp);
2888 	econnp->conn_cpid = lconnp->conn_cpid;
2889 	econnp->conn_open_time = ddi_get_lbolt64();
2890 
2891 	/*
2892 	 * Cache things in the ixa without any refhold.
2893 	 * Listener might not have set up ixa_cred
2894 	 */
2895 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2896 	econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2897 	if (is_system_labeled())
2898 		econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2899 
2900 	/*
2901 	 * If the caller has the process-wide flag set, then default to MAC
2902 	 * exempt mode.  This allows read-down to unlabeled hosts.
2903 	 */
2904 	if (getpflags(NET_MAC_AWARE, credp) != 0)
2905 		econnp->conn_mac_mode = CONN_MAC_AWARE;
2906 
2907 	econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2908 
2909 	/*
2910 	 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2911 	 * via soaccept()->soinheritoptions() which essentially applies
2912 	 * all the listener options to the new connection. The options that we
2913 	 * need to take care of are:
2914 	 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2915 	 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2916 	 * SO_SNDBUF, SO_RCVBUF.
2917 	 *
2918 	 * SO_RCVBUF:	conn_rcvbuf is set.
2919 	 * SO_SNDBUF:	conn_sndbuf is set.
2920 	 */
2921 
2922 	/* Could we define a struct and use a struct copy for this? */
2923 	econnp->conn_sndbuf = lconnp->conn_sndbuf;
2924 	econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2925 	econnp->conn_sndlowat = lconnp->conn_sndlowat;
2926 	econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2927 	econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2928 	econnp->conn_oobinline = lconnp->conn_oobinline;
2929 	econnp->conn_debug = lconnp->conn_debug;
2930 	econnp->conn_keepalive = lconnp->conn_keepalive;
2931 	econnp->conn_linger = lconnp->conn_linger;
2932 	econnp->conn_lingertime = lconnp->conn_lingertime;
2933 
2934 	/* Set the IP options */
2935 	econnp->conn_broadcast = lconnp->conn_broadcast;
2936 	econnp->conn_useloopback = lconnp->conn_useloopback;
2937 	econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2938 	return (0);
2939 }
2940