xref: /illumos-gate/usr/src/uts/common/inet/ip/conn_opt.c (revision e82490700e19f1b8a2cef6102f4726144d281988)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
25  * Copyright 2024 Oxide Computer Company
26  */
27 /* Copyright (c) 1990 Mentat Inc. */
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/strsun.h>
32 #define	_SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/xti_inet.h>
35 #include <sys/ucred.h>
36 #include <sys/zone.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/atomic.h>
42 #include <sys/policy.h>
43 
44 #include <sys/systm.h>
45 #include <sys/param.h>
46 #include <sys/kmem.h>
47 #include <sys/sdt.h>
48 #include <sys/socket.h>
49 #include <sys/ethernet.h>
50 #include <sys/mac.h>
51 #include <net/if.h>
52 #include <net/if_types.h>
53 #include <net/if_arp.h>
54 #include <net/route.h>
55 #include <sys/sockio.h>
56 #include <netinet/in.h>
57 #include <net/if_dl.h>
58 
59 #include <inet/common.h>
60 #include <inet/mi.h>
61 #include <inet/mib2.h>
62 #include <inet/nd.h>
63 #include <inet/arp.h>
64 #include <inet/snmpcom.h>
65 #include <inet/kstatcom.h>
66 
67 #include <netinet/igmp_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet/icmp6.h>
70 #include <netinet/sctp.h>
71 
72 #include <inet/ip.h>
73 #include <inet/ip_impl.h>
74 #include <inet/ip6.h>
75 #include <inet/ip6_asp.h>
76 #include <inet/tcp.h>
77 #include <inet/ip_multi.h>
78 #include <inet/ip_if.h>
79 #include <inet/ip_ire.h>
80 #include <inet/ip_ftable.h>
81 #include <inet/ip_rts.h>
82 #include <inet/optcom.h>
83 #include <inet/ip_ndp.h>
84 #include <inet/ip_listutils.h>
85 #include <netinet/igmp.h>
86 #include <netinet/ip_mroute.h>
87 #include <netinet/udp.h>
88 #include <inet/ipp_common.h>
89 
90 #include <net/pfkeyv2.h>
91 #include <inet/sadb.h>
92 #include <inet/ipsec_impl.h>
93 #include <inet/ipdrop.h>
94 #include <inet/ip_netinfo.h>
95 
96 #include <inet/ipclassifier.h>
97 #include <inet/sctp_ip.h>
98 #include <inet/sctp/sctp_impl.h>
99 #include <inet/udp_impl.h>
100 #include <sys/sunddi.h>
101 
102 #include <sys/tsol/label.h>
103 #include <sys/tsol/tnet.h>
104 
105 /*
106  * Return how much size is needed for the different ancillary data items
107  */
108 uint_t
conn_recvancillary_size(conn_t * connp,crb_t recv_ancillary,ip_recv_attr_t * ira,mblk_t * mp,ip_pkt_t * ipp)109 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
110     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
111 {
112 	uint_t		ancil_size;
113 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
114 
115 	/*
116 	 * If IP_RECVDSTADDR is set we include the destination IP
117 	 * address as an option. With IP_RECVOPTS we include all
118 	 * the IP options.
119 	 */
120 	ancil_size = 0;
121 	if (recv_ancillary.crb_recvdstaddr &&
122 	    (ira->ira_flags & IRAF_IS_IPV4)) {
123 		ancil_size += sizeof (struct T_opthdr) +
124 		    sizeof (struct in_addr);
125 		IP_STAT(ipst, conn_in_recvdstaddr);
126 	}
127 
128 	/*
129 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
130 	 * are different
131 	 */
132 	if (recv_ancillary.crb_ip_recvpktinfo &&
133 	    connp->conn_family == AF_INET) {
134 		ancil_size += sizeof (struct T_opthdr) +
135 		    sizeof (struct in_pktinfo);
136 		IP_STAT(ipst, conn_in_recvpktinfo);
137 	}
138 
139 	if ((recv_ancillary.crb_recvopts) &&
140 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
141 		ancil_size += sizeof (struct T_opthdr) +
142 		    ipp->ipp_ipv4_options_len;
143 		IP_STAT(ipst, conn_in_recvopts);
144 	}
145 
146 	if (recv_ancillary.crb_recvslla) {
147 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
148 		ill_t *ill;
149 
150 		/* Make sure ira_l2src is setup if not already */
151 		if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
152 			ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
153 			    ipst);
154 			if (ill != NULL) {
155 				ip_setl2src(mp, ira, ill);
156 				ill_refrele(ill);
157 			}
158 		}
159 		ancil_size += sizeof (struct T_opthdr) +
160 		    sizeof (struct sockaddr_dl);
161 		IP_STAT(ipst, conn_in_recvslla);
162 	}
163 
164 	if (recv_ancillary.crb_recvif) {
165 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
166 		IP_STAT(ipst, conn_in_recvif);
167 	}
168 
169 	/*
170 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
171 	 * are different
172 	 */
173 	if (recv_ancillary.crb_ip_recvpktinfo &&
174 	    connp->conn_family == AF_INET6) {
175 		ancil_size += sizeof (struct T_opthdr) +
176 		    sizeof (struct in6_pktinfo);
177 		IP_STAT(ipst, conn_in_recvpktinfo);
178 	}
179 
180 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
181 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
182 		IP_STAT(ipst, conn_in_recvhoplimit);
183 	}
184 
185 	if (recv_ancillary.crb_ipv6_recvtclass) {
186 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
187 		IP_STAT(ipst, conn_in_recvtclass);
188 	}
189 
190 	if (recv_ancillary.crb_ipv6_recvhopopts &&
191 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
192 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
193 		IP_STAT(ipst, conn_in_recvhopopts);
194 	}
195 	/*
196 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
197 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
198 	 * options that appear before a routing header.
199 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
200 	 */
201 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
202 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
203 		    (recv_ancillary.crb_ipv6_recvdstopts &&
204 		    recv_ancillary.crb_ipv6_recvrthdr)) {
205 			ancil_size += sizeof (struct T_opthdr) +
206 			    ipp->ipp_rthdrdstoptslen;
207 			IP_STAT(ipst, conn_in_recvrthdrdstopts);
208 		}
209 	}
210 	if ((recv_ancillary.crb_ipv6_recvrthdr) &&
211 	    (ipp->ipp_fields & IPPF_RTHDR)) {
212 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
213 		IP_STAT(ipst, conn_in_recvrthdr);
214 	}
215 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
216 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
217 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
218 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
219 		IP_STAT(ipst, conn_in_recvdstopts);
220 	}
221 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
222 		ancil_size += sizeof (struct T_opthdr) +
223 		    ucredminsize(ira->ira_cred);
224 		IP_STAT(ipst, conn_in_recvucred);
225 	}
226 
227 	/*
228 	 * If SO_TIMESTAMP is set allocate the appropriate sized
229 	 * buffer. Since gethrestime() expects a pointer aligned
230 	 * argument, we allocate space necessary for extra
231 	 * alignment (even though it might not be used).
232 	 */
233 	if (recv_ancillary.crb_timestamp) {
234 		ancil_size += sizeof (struct T_opthdr) +
235 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
236 		IP_STAT(ipst, conn_in_timestamp);
237 	}
238 
239 	/*
240 	 * If IP_RECVTOS is set allocate the appropriately sized buffer
241 	 */
242 	if (recv_ancillary.crb_recvtos &&
243 	    (ira->ira_flags & IRAF_IS_IPV4)) {
244 		ancil_size += sizeof (struct T_opthdr) +
245 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
246 		IP_STAT(ipst, conn_in_recvtos);
247 	}
248 
249 	/*
250 	 * If IP_RECVTTL is set allocate the appropriate sized buffer
251 	 */
252 	if (recv_ancillary.crb_recvttl &&
253 	    (ira->ira_flags & IRAF_IS_IPV4)) {
254 		ancil_size += sizeof (struct T_opthdr) +
255 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
256 		IP_STAT(ipst, conn_in_recvttl);
257 	}
258 
259 	return (ancil_size);
260 }
261 
262 /*
263  * Lay down the ancillary data items at "ancil_buf".
264  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
265  * large buffer - ancil_size.
266  */
267 void
conn_recvancillary_add(conn_t * connp,crb_t recv_ancillary,ip_recv_attr_t * ira,ip_pkt_t * ipp,uchar_t * ancil_buf,uint_t ancil_size)268 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
269     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
270 {
271 	/*
272 	 * Copy in destination address before options to avoid
273 	 * any padding issues.
274 	 */
275 	if (recv_ancillary.crb_recvdstaddr &&
276 	    (ira->ira_flags & IRAF_IS_IPV4)) {
277 		struct T_opthdr *toh;
278 		ipaddr_t *dstptr;
279 
280 		toh = (struct T_opthdr *)ancil_buf;
281 		toh->level = IPPROTO_IP;
282 		toh->name = IP_RECVDSTADDR;
283 		toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
284 		toh->status = 0;
285 		ancil_buf += sizeof (struct T_opthdr);
286 		dstptr = (ipaddr_t *)ancil_buf;
287 		*dstptr = ipp->ipp_addr_v4;
288 		ancil_buf += sizeof (ipaddr_t);
289 		ancil_size -= toh->len;
290 	}
291 
292 	/*
293 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
294 	 * are different
295 	 */
296 	if (recv_ancillary.crb_ip_recvpktinfo &&
297 	    connp->conn_family == AF_INET) {
298 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
299 		struct T_opthdr *toh;
300 		struct in_pktinfo *pktinfop;
301 		ill_t *ill;
302 		ipif_t *ipif;
303 
304 		toh = (struct T_opthdr *)ancil_buf;
305 		toh->level = IPPROTO_IP;
306 		toh->name = IP_PKTINFO;
307 		toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
308 		toh->status = 0;
309 		ancil_buf += sizeof (struct T_opthdr);
310 		pktinfop = (struct in_pktinfo *)ancil_buf;
311 
312 		pktinfop->ipi_ifindex = ira->ira_ruifindex;
313 		pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
314 
315 		/* Find a good address to report */
316 		ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
317 		if (ill != NULL) {
318 			ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
319 			if (ipif != NULL) {
320 				pktinfop->ipi_spec_dst.s_addr =
321 				    ipif->ipif_lcl_addr;
322 				ipif_refrele(ipif);
323 			}
324 			ill_refrele(ill);
325 		}
326 		pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
327 		ancil_buf += sizeof (struct in_pktinfo);
328 		ancil_size -= toh->len;
329 	}
330 
331 	if ((recv_ancillary.crb_recvopts) &&
332 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
333 		struct T_opthdr *toh;
334 
335 		toh = (struct T_opthdr *)ancil_buf;
336 		toh->level = IPPROTO_IP;
337 		toh->name = IP_RECVOPTS;
338 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
339 		toh->status = 0;
340 		ancil_buf += sizeof (struct T_opthdr);
341 		bcopy(ipp->ipp_ipv4_options, ancil_buf,
342 		    ipp->ipp_ipv4_options_len);
343 		ancil_buf += ipp->ipp_ipv4_options_len;
344 		ancil_size -= toh->len;
345 	}
346 
347 	if (recv_ancillary.crb_recvslla) {
348 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
349 		struct T_opthdr *toh;
350 		struct sockaddr_dl *dstptr;
351 		ill_t *ill;
352 		int alen = 0;
353 
354 		ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
355 		if (ill != NULL)
356 			alen = ill->ill_phys_addr_length;
357 
358 		/*
359 		 * For loopback multicast and broadcast the packet arrives
360 		 * with ira_ruifdex being the physical interface, but
361 		 * ira_l2src is all zero since ip_postfrag_loopback doesn't
362 		 * know our l2src. We don't report the address in that case.
363 		 */
364 		if (ira->ira_flags & IRAF_LOOPBACK)
365 			alen = 0;
366 
367 		toh = (struct T_opthdr *)ancil_buf;
368 		toh->level = IPPROTO_IP;
369 		toh->name = IP_RECVSLLA;
370 		toh->len = sizeof (struct T_opthdr) +
371 		    sizeof (struct sockaddr_dl);
372 		toh->status = 0;
373 		ancil_buf += sizeof (struct T_opthdr);
374 		dstptr = (struct sockaddr_dl *)ancil_buf;
375 		dstptr->sdl_family = AF_LINK;
376 		dstptr->sdl_index = ira->ira_ruifindex;
377 		if (ill != NULL)
378 			dstptr->sdl_type = ill->ill_type;
379 		else
380 			dstptr->sdl_type = 0;
381 		dstptr->sdl_nlen = 0;
382 		dstptr->sdl_alen = alen;
383 		dstptr->sdl_slen = 0;
384 		bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
385 		ancil_buf += sizeof (struct sockaddr_dl);
386 		ancil_size -= toh->len;
387 		if (ill != NULL)
388 			ill_refrele(ill);
389 	}
390 
391 	if (recv_ancillary.crb_recvif) {
392 		struct T_opthdr *toh;
393 		uint_t		*dstptr;
394 
395 		toh = (struct T_opthdr *)ancil_buf;
396 		toh->level = IPPROTO_IP;
397 		toh->name = IP_RECVIF;
398 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
399 		toh->status = 0;
400 		ancil_buf += sizeof (struct T_opthdr);
401 		dstptr = (uint_t *)ancil_buf;
402 		*dstptr = ira->ira_ruifindex;
403 		ancil_buf += sizeof (uint_t);
404 		ancil_size -= toh->len;
405 	}
406 
407 	/*
408 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
409 	 * are different
410 	 */
411 	if (recv_ancillary.crb_ip_recvpktinfo &&
412 	    connp->conn_family == AF_INET6) {
413 		struct T_opthdr *toh;
414 		struct in6_pktinfo *pkti;
415 
416 		toh = (struct T_opthdr *)ancil_buf;
417 		toh->level = IPPROTO_IPV6;
418 		toh->name = IPV6_PKTINFO;
419 		toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
420 		toh->status = 0;
421 		ancil_buf += sizeof (struct T_opthdr);
422 		pkti = (struct in6_pktinfo *)ancil_buf;
423 		if (ira->ira_flags & IRAF_IS_IPV4) {
424 			IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
425 			    &pkti->ipi6_addr);
426 		} else {
427 			pkti->ipi6_addr = ipp->ipp_addr;
428 		}
429 		pkti->ipi6_ifindex = ira->ira_ruifindex;
430 
431 		ancil_buf += sizeof (*pkti);
432 		ancil_size -= toh->len;
433 	}
434 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
435 		struct T_opthdr *toh;
436 
437 		toh = (struct T_opthdr *)ancil_buf;
438 		toh->level = IPPROTO_IPV6;
439 		toh->name = IPV6_HOPLIMIT;
440 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
441 		toh->status = 0;
442 		ancil_buf += sizeof (struct T_opthdr);
443 		*(uint_t *)ancil_buf = ipp->ipp_hoplimit;
444 		ancil_buf += sizeof (uint_t);
445 		ancil_size -= toh->len;
446 	}
447 	if (recv_ancillary.crb_ipv6_recvtclass) {
448 		struct T_opthdr *toh;
449 
450 		toh = (struct T_opthdr *)ancil_buf;
451 		toh->level = IPPROTO_IPV6;
452 		toh->name = IPV6_TCLASS;
453 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
454 		toh->status = 0;
455 		ancil_buf += sizeof (struct T_opthdr);
456 
457 		if (ira->ira_flags & IRAF_IS_IPV4)
458 			*(uint_t *)ancil_buf = ipp->ipp_type_of_service;
459 		else
460 			*(uint_t *)ancil_buf = ipp->ipp_tclass;
461 		ancil_buf += sizeof (uint_t);
462 		ancil_size -= toh->len;
463 	}
464 	if (recv_ancillary.crb_ipv6_recvhopopts &&
465 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
466 		struct T_opthdr *toh;
467 
468 		toh = (struct T_opthdr *)ancil_buf;
469 		toh->level = IPPROTO_IPV6;
470 		toh->name = IPV6_HOPOPTS;
471 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
472 		toh->status = 0;
473 		ancil_buf += sizeof (struct T_opthdr);
474 		bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
475 		ancil_buf += ipp->ipp_hopoptslen;
476 		ancil_size -= toh->len;
477 	}
478 	/*
479 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
480 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
481 	 * options that appear before a routing header.
482 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
483 	 */
484 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
485 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
486 		    (recv_ancillary.crb_ipv6_recvdstopts &&
487 		    recv_ancillary.crb_ipv6_recvrthdr)) {
488 			struct T_opthdr *toh;
489 
490 			toh = (struct T_opthdr *)ancil_buf;
491 			toh->level = IPPROTO_IPV6;
492 			toh->name = IPV6_DSTOPTS;
493 			toh->len = sizeof (struct T_opthdr) +
494 			    ipp->ipp_rthdrdstoptslen;
495 			toh->status = 0;
496 			ancil_buf += sizeof (struct T_opthdr);
497 			bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
498 			    ipp->ipp_rthdrdstoptslen);
499 			ancil_buf += ipp->ipp_rthdrdstoptslen;
500 			ancil_size -= toh->len;
501 		}
502 	}
503 	if (recv_ancillary.crb_ipv6_recvrthdr &&
504 	    (ipp->ipp_fields & IPPF_RTHDR)) {
505 		struct T_opthdr *toh;
506 
507 		toh = (struct T_opthdr *)ancil_buf;
508 		toh->level = IPPROTO_IPV6;
509 		toh->name = IPV6_RTHDR;
510 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
511 		toh->status = 0;
512 		ancil_buf += sizeof (struct T_opthdr);
513 		bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
514 		ancil_buf += ipp->ipp_rthdrlen;
515 		ancil_size -= toh->len;
516 	}
517 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
518 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
519 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
520 		struct T_opthdr *toh;
521 
522 		toh = (struct T_opthdr *)ancil_buf;
523 		toh->level = IPPROTO_IPV6;
524 		toh->name = IPV6_DSTOPTS;
525 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
526 		toh->status = 0;
527 		ancil_buf += sizeof (struct T_opthdr);
528 		bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
529 		ancil_buf += ipp->ipp_dstoptslen;
530 		ancil_size -= toh->len;
531 	}
532 
533 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
534 		struct T_opthdr *toh;
535 		cred_t		*rcr = connp->conn_cred;
536 
537 		toh = (struct T_opthdr *)ancil_buf;
538 		toh->level = SOL_SOCKET;
539 		toh->name = SCM_UCRED;
540 		toh->len = sizeof (struct T_opthdr) +
541 		    ucredminsize(ira->ira_cred);
542 		toh->status = 0;
543 		(void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
544 		ancil_buf += toh->len;
545 		ancil_size -= toh->len;
546 	}
547 	if (recv_ancillary.crb_timestamp) {
548 		struct	T_opthdr *toh;
549 
550 		toh = (struct T_opthdr *)ancil_buf;
551 		toh->level = SOL_SOCKET;
552 		toh->name = SCM_TIMESTAMP;
553 		toh->len = sizeof (struct T_opthdr) +
554 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
555 		toh->status = 0;
556 		ancil_buf += sizeof (struct T_opthdr);
557 		/* Align for gethrestime() */
558 		ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
559 		    sizeof (intptr_t));
560 		gethrestime((timestruc_t *)ancil_buf);
561 		ancil_buf = (uchar_t *)toh + toh->len;
562 		ancil_size -= toh->len;
563 	}
564 
565 	if (recv_ancillary.crb_recvtos &&
566 	    (ira->ira_flags & IRAF_IS_IPV4)) {
567 		struct	T_opthdr *toh;
568 		uint8_t	*dstptr;
569 
570 		toh = (struct T_opthdr *)ancil_buf;
571 		toh->level = IPPROTO_IP;
572 		toh->name = IP_RECVTOS;
573 		toh->len = sizeof (struct T_opthdr) +
574 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
575 		toh->status = 0;
576 		ancil_buf += sizeof (struct T_opthdr);
577 		dstptr = (uint8_t *)ancil_buf;
578 		*dstptr = ipp->ipp_type_of_service;
579 		ancil_buf = (uchar_t *)toh + toh->len;
580 		ancil_size -= toh->len;
581 		ASSERT(__TPI_TOPT_ISALIGNED(toh));
582 	}
583 
584 	if (recv_ancillary.crb_recvttl &&
585 	    (ira->ira_flags & IRAF_IS_IPV4)) {
586 		struct	T_opthdr *toh;
587 		uint8_t	*dstptr;
588 
589 		toh = (struct T_opthdr *)ancil_buf;
590 		toh->level = IPPROTO_IP;
591 		toh->name = IP_RECVTTL;
592 		toh->len = sizeof (struct T_opthdr) +
593 		    P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
594 		toh->status = 0;
595 		ancil_buf += sizeof (struct T_opthdr);
596 		dstptr = (uint8_t *)ancil_buf;
597 		*dstptr = ipp->ipp_hoplimit;
598 		ancil_buf = (uchar_t *)toh + toh->len;
599 		ancil_size -= toh->len;
600 		ASSERT(__TPI_TOPT_ISALIGNED(toh));
601 	}
602 
603 	/* Consumed all of allocated space */
604 	ASSERT(ancil_size == 0);
605 
606 }
607 
608 /*
609  * This routine retrieves the current status of socket options.
610  * It returns the size of the option retrieved, or -1.
611  */
612 int
conn_opt_get(conn_opt_arg_t * coa,t_scalar_t level,t_scalar_t name,uchar_t * ptr)613 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
614     uchar_t *ptr)
615 {
616 	int		*i1 = (int *)ptr;
617 	conn_t		*connp = coa->coa_connp;
618 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
619 	ip_pkt_t	*ipp = coa->coa_ipp;
620 	ip_stack_t	*ipst = ixa->ixa_ipst;
621 	uint_t		len;
622 
623 	ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
624 
625 	switch (level) {
626 	case SOL_SOCKET:
627 		switch (name) {
628 		case SO_DEBUG:
629 			*i1 = connp->conn_debug ? SO_DEBUG : 0;
630 			break;	/* goto sizeof (int) option return */
631 		case SO_KEEPALIVE:
632 			*i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
633 			break;
634 		case SO_LINGER:	{
635 			struct linger *lgr = (struct linger *)ptr;
636 
637 			lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
638 			lgr->l_linger = connp->conn_lingertime;
639 			}
640 			return (sizeof (struct linger));
641 
642 		case SO_OOBINLINE:
643 			*i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
644 			break;
645 		case SO_REUSEADDR:
646 			*i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
647 			break;	/* goto sizeof (int) option return */
648 		case SO_TYPE:
649 			*i1 = connp->conn_so_type;
650 			break;	/* goto sizeof (int) option return */
651 		case SO_DONTROUTE:
652 			*i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
653 			    SO_DONTROUTE : 0;
654 			break;	/* goto sizeof (int) option return */
655 		case SO_USELOOPBACK:
656 			*i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
657 			break;	/* goto sizeof (int) option return */
658 		case SO_BROADCAST:
659 			*i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
660 			break;	/* goto sizeof (int) option return */
661 
662 		case SO_SNDBUF:
663 			*i1 = connp->conn_sndbuf;
664 			break;	/* goto sizeof (int) option return */
665 		case SO_RCVBUF:
666 			*i1 = connp->conn_rcvbuf;
667 			break;	/* goto sizeof (int) option return */
668 		case SO_RCVTIMEO:
669 		case SO_SNDTIMEO:
670 			/*
671 			 * Pass these two options in order for third part
672 			 * protocol usage. Here just return directly.
673 			 */
674 			*i1 = 0;
675 			break;
676 		case SO_DGRAM_ERRIND:
677 			*i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
678 			break;	/* goto sizeof (int) option return */
679 		case SO_RECVUCRED:
680 			*i1 = connp->conn_recv_ancillary.crb_recvucred;
681 			break;	/* goto sizeof (int) option return */
682 		case SO_TIMESTAMP:
683 			*i1 = connp->conn_recv_ancillary.crb_timestamp;
684 			break;	/* goto sizeof (int) option return */
685 		case SO_VRRP:
686 			*i1 = connp->conn_isvrrp;
687 			break;	/* goto sizeof (int) option return */
688 		case SO_ANON_MLP:
689 			*i1 = connp->conn_anon_mlp;
690 			break;	/* goto sizeof (int) option return */
691 		case SO_MAC_EXEMPT:
692 			*i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
693 			break;	/* goto sizeof (int) option return */
694 		case SO_MAC_IMPLICIT:
695 			*i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
696 			break;	/* goto sizeof (int) option return */
697 		case SO_ALLZONES:
698 			*i1 = connp->conn_allzones;
699 			break;	/* goto sizeof (int) option return */
700 		case SO_EXCLBIND:
701 			*i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
702 			break;
703 		case SO_PROTOTYPE:
704 			*i1 = connp->conn_proto;
705 			break;
706 
707 		case SO_DOMAIN:
708 			*i1 = connp->conn_family;
709 			break;
710 		default:
711 			return (-1);
712 		}
713 		break;
714 	case IPPROTO_IP:
715 		if (connp->conn_family != AF_INET)
716 			return (-1);
717 		switch (name) {
718 		case IP_OPTIONS:
719 		case T_IP_OPTIONS:
720 			if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
721 				return (0);
722 
723 			len = ipp->ipp_ipv4_options_len;
724 			if (len > 0) {
725 				bcopy(ipp->ipp_ipv4_options, ptr, len);
726 			}
727 			return (len);
728 
729 		case IP_PKTINFO: {
730 			/*
731 			 * This also handles IP_RECVPKTINFO.
732 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
733 			 * Differentiation is based on the size of the
734 			 * argument passed in.
735 			 */
736 			struct in_pktinfo *pktinfo;
737 
738 #ifdef notdef
739 			/* optcom doesn't provide a length with "get" */
740 			if (inlen == sizeof (int)) {
741 				/* This is IP_RECVPKTINFO option. */
742 				*i1 = connp->conn_recv_ancillary.
743 				    crb_ip_recvpktinfo;
744 				return (sizeof (int));
745 			}
746 #endif
747 			/* XXX assumes that caller has room for max size! */
748 
749 			pktinfo = (struct in_pktinfo *)ptr;
750 			pktinfo->ipi_ifindex = ixa->ixa_ifindex;
751 			if (ipp->ipp_fields & IPPF_ADDR)
752 				pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
753 			else
754 				pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
755 			return (sizeof (struct in_pktinfo));
756 		}
757 		case IP_DONTFRAG:
758 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
759 			return (sizeof (int));
760 		case IP_TOS:
761 		case T_IP_TOS:
762 			*i1 = (int)ipp->ipp_type_of_service;
763 			break;	/* goto sizeof (int) option return */
764 		case IP_TTL:
765 			*i1 = (int)ipp->ipp_unicast_hops;
766 			break;	/* goto sizeof (int) option return */
767 		case IP_DHCPINIT_IF:
768 			return (-1);
769 		case IP_NEXTHOP:
770 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
771 				*(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
772 				return (sizeof (ipaddr_t));
773 			} else {
774 				return (0);
775 			}
776 
777 		case IP_MULTICAST_IF:
778 			/* 0 address if not set */
779 			*(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
780 			return (sizeof (ipaddr_t));
781 		case IP_MULTICAST_TTL:
782 			*(uchar_t *)ptr = ixa->ixa_multicast_ttl;
783 			return (sizeof (uchar_t));
784 		case IP_MULTICAST_LOOP:
785 			*ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
786 			return (sizeof (uint8_t));
787 		case IP_RECVOPTS:
788 			*i1 = connp->conn_recv_ancillary.crb_recvopts;
789 			break;	/* goto sizeof (int) option return */
790 		case IP_RECVDSTADDR:
791 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
792 			break;	/* goto sizeof (int) option return */
793 		case IP_RECVIF:
794 			*i1 = connp->conn_recv_ancillary.crb_recvif;
795 			break;	/* goto sizeof (int) option return */
796 		case IP_RECVSLLA:
797 			*i1 = connp->conn_recv_ancillary.crb_recvslla;
798 			break;	/* goto sizeof (int) option return */
799 		case IP_RECVTTL:
800 			*i1 = connp->conn_recv_ancillary.crb_recvttl;
801 			break;	/* goto sizeof (int) option return */
802 		case IP_RECVTOS:
803 			*i1 = connp->conn_recv_ancillary.crb_recvtos;
804 			break;	/* goto sizeof (int) option return */
805 		case IP_ADD_MEMBERSHIP:
806 		case IP_DROP_MEMBERSHIP:
807 		case MCAST_JOIN_GROUP:
808 		case MCAST_LEAVE_GROUP:
809 		case IP_BLOCK_SOURCE:
810 		case IP_UNBLOCK_SOURCE:
811 		case IP_ADD_SOURCE_MEMBERSHIP:
812 		case IP_DROP_SOURCE_MEMBERSHIP:
813 		case MCAST_BLOCK_SOURCE:
814 		case MCAST_UNBLOCK_SOURCE:
815 		case MCAST_JOIN_SOURCE_GROUP:
816 		case MCAST_LEAVE_SOURCE_GROUP:
817 		case MRT_INIT:
818 		case MRT_DONE:
819 		case MRT_ADD_VIF:
820 		case MRT_DEL_VIF:
821 		case MRT_ADD_MFC:
822 		case MRT_DEL_MFC:
823 			/* cannot "get" the value for these */
824 			return (-1);
825 		case MRT_VERSION:
826 		case MRT_ASSERT:
827 			(void) ip_mrouter_get(name, connp, ptr);
828 			return (sizeof (int));
829 		case IP_SEC_OPT:
830 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
831 			    IPSEC_AF_V4));
832 		case IP_BOUND_IF:
833 			/* Zero if not set */
834 			*i1 = connp->conn_bound_if;
835 			break;	/* goto sizeof (int) option return */
836 		case IP_UNSPEC_SRC:
837 			*i1 = connp->conn_unspec_src;
838 			break;	/* goto sizeof (int) option return */
839 		case IP_BROADCAST_TTL:
840 			if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
841 				*(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
842 			else
843 				*(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
844 			return (sizeof (uchar_t));
845 		case IP_MINTTL:
846 			*i1 = connp->conn_min_ttl;
847 			return (sizeof (int));
848 		default:
849 			return (-1);
850 		}
851 		break;
852 	case IPPROTO_IPV6:
853 		if (connp->conn_family != AF_INET6)
854 			return (-1);
855 		switch (name) {
856 		case IPV6_UNICAST_HOPS:
857 			*i1 = (int)ipp->ipp_unicast_hops;
858 			break;	/* goto sizeof (int) option return */
859 		case IPV6_MULTICAST_IF:
860 			/* 0 index if not set */
861 			*i1 = ixa->ixa_multicast_ifindex;
862 			break;	/* goto sizeof (int) option return */
863 		case IPV6_MULTICAST_HOPS:
864 			*i1 = ixa->ixa_multicast_ttl;
865 			break;	/* goto sizeof (int) option return */
866 		case IPV6_MULTICAST_LOOP:
867 			*i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
868 			break;	/* goto sizeof (int) option return */
869 		case IPV6_JOIN_GROUP:
870 		case IPV6_LEAVE_GROUP:
871 		case MCAST_JOIN_GROUP:
872 		case MCAST_LEAVE_GROUP:
873 		case MCAST_BLOCK_SOURCE:
874 		case MCAST_UNBLOCK_SOURCE:
875 		case MCAST_JOIN_SOURCE_GROUP:
876 		case MCAST_LEAVE_SOURCE_GROUP:
877 			/* cannot "get" the value for these */
878 			return (-1);
879 		case IPV6_BOUND_IF:
880 			/* Zero if not set */
881 			*i1 = connp->conn_bound_if;
882 			break;	/* goto sizeof (int) option return */
883 		case IPV6_UNSPEC_SRC:
884 			*i1 = connp->conn_unspec_src;
885 			break;	/* goto sizeof (int) option return */
886 		case IPV6_RECVPKTINFO:
887 			*i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
888 			break;	/* goto sizeof (int) option return */
889 		case IPV6_RECVTCLASS:
890 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
891 			break;	/* goto sizeof (int) option return */
892 		case IPV6_RECVPATHMTU:
893 			*i1 = connp->conn_ipv6_recvpathmtu;
894 			break;	/* goto sizeof (int) option return */
895 		case IPV6_RECVHOPLIMIT:
896 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
897 			break;	/* goto sizeof (int) option return */
898 		case IPV6_RECVHOPOPTS:
899 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
900 			break;	/* goto sizeof (int) option return */
901 		case IPV6_RECVDSTOPTS:
902 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
903 			break;	/* goto sizeof (int) option return */
904 		case _OLD_IPV6_RECVDSTOPTS:
905 			*i1 =
906 			    connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
907 			break;	/* goto sizeof (int) option return */
908 		case IPV6_RECVRTHDRDSTOPTS:
909 			*i1 = connp->conn_recv_ancillary.
910 			    crb_ipv6_recvrthdrdstopts;
911 			break;	/* goto sizeof (int) option return */
912 		case IPV6_RECVRTHDR:
913 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
914 			break;	/* goto sizeof (int) option return */
915 		case IPV6_PKTINFO: {
916 			/* XXX assumes that caller has room for max size! */
917 			struct in6_pktinfo *pkti;
918 
919 			pkti = (struct in6_pktinfo *)ptr;
920 			pkti->ipi6_ifindex = ixa->ixa_ifindex;
921 			if (ipp->ipp_fields & IPPF_ADDR)
922 				pkti->ipi6_addr = ipp->ipp_addr;
923 			else
924 				pkti->ipi6_addr = ipv6_all_zeros;
925 			return (sizeof (struct in6_pktinfo));
926 		}
927 		case IPV6_TCLASS:
928 			*i1 = ipp->ipp_tclass;
929 			break;	/* goto sizeof (int) option return */
930 		case IPV6_NEXTHOP: {
931 			sin6_t *sin6 = (sin6_t *)ptr;
932 
933 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
934 				return (0);
935 
936 			*sin6 = sin6_null;
937 			sin6->sin6_family = AF_INET6;
938 			sin6->sin6_addr = ixa->ixa_nexthop_v6;
939 
940 			return (sizeof (sin6_t));
941 		}
942 		case IPV6_HOPOPTS:
943 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
944 				return (0);
945 			bcopy(ipp->ipp_hopopts, ptr,
946 			    ipp->ipp_hopoptslen);
947 			return (ipp->ipp_hopoptslen);
948 		case IPV6_RTHDRDSTOPTS:
949 			if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
950 				return (0);
951 			bcopy(ipp->ipp_rthdrdstopts, ptr,
952 			    ipp->ipp_rthdrdstoptslen);
953 			return (ipp->ipp_rthdrdstoptslen);
954 		case IPV6_RTHDR:
955 			if (!(ipp->ipp_fields & IPPF_RTHDR))
956 				return (0);
957 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
958 			return (ipp->ipp_rthdrlen);
959 		case IPV6_DSTOPTS:
960 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
961 				return (0);
962 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
963 			return (ipp->ipp_dstoptslen);
964 		case IPV6_PATHMTU:
965 			return (ip_fill_mtuinfo(connp, ixa,
966 			    (struct ip6_mtuinfo *)ptr));
967 		case IPV6_SEC_OPT:
968 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
969 			    IPSEC_AF_V6));
970 		case IPV6_SRC_PREFERENCES:
971 			return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
972 		case IPV6_DONTFRAG:
973 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
974 			return (sizeof (int));
975 		case IPV6_USE_MIN_MTU:
976 			if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
977 				*i1 = ixa->ixa_use_min_mtu;
978 			else
979 				*i1 = IPV6_USE_MIN_MTU_MULTICAST;
980 			break;
981 		case IPV6_V6ONLY:
982 			*i1 = connp->conn_ipv6_v6only;
983 			return (sizeof (int));
984 		case IPV6_MINHOPCOUNT:
985 			*i1 = connp->conn_min_ttl;
986 			return (sizeof (int));
987 		default:
988 			return (-1);
989 		}
990 		break;
991 	case IPPROTO_UDP:
992 		switch (name) {
993 		case UDP_ANONPRIVBIND:
994 			*i1 = connp->conn_anon_priv_bind;
995 			break;
996 		case UDP_EXCLBIND:
997 			*i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
998 			break;
999 		default:
1000 			return (-1);
1001 		}
1002 		break;
1003 	case IPPROTO_TCP:
1004 		switch (name) {
1005 		case TCP_RECVDSTADDR:
1006 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
1007 			break;
1008 		case TCP_ANONPRIVBIND:
1009 			*i1 = connp->conn_anon_priv_bind;
1010 			break;
1011 		case TCP_EXCLBIND:
1012 			*i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
1013 			break;
1014 		default:
1015 			return (-1);
1016 		}
1017 		break;
1018 	default:
1019 		return (-1);
1020 	}
1021 	return (sizeof (int));
1022 }
1023 
1024 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
1025     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1026 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
1027     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1028 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
1029     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1030 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1031     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1032 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1033     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1034 
1035 /*
1036  * This routine sets the most common socket options including some
1037  * that are transport/ULP specific.
1038  * It returns errno or zero.
1039  *
1040  * For fixed length options, there is no sanity check
1041  * of passed in length is done. It is assumed *_optcom_req()
1042  * routines do the right thing.
1043  */
1044 int
conn_opt_set(conn_opt_arg_t * coa,t_scalar_t level,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1045 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1046     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1047 {
1048 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1049 
1050 	/* We have different functions for different levels */
1051 	switch (level) {
1052 	case SOL_SOCKET:
1053 		return (conn_opt_set_socket(coa, name, inlen, invalp,
1054 		    checkonly, cr));
1055 	case IPPROTO_IP:
1056 		return (conn_opt_set_ip(coa, name, inlen, invalp,
1057 		    checkonly, cr));
1058 	case IPPROTO_IPV6:
1059 		return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1060 		    checkonly, cr));
1061 	case IPPROTO_UDP:
1062 		return (conn_opt_set_udp(coa, name, inlen, invalp,
1063 		    checkonly, cr));
1064 	case IPPROTO_TCP:
1065 		return (conn_opt_set_tcp(coa, name, inlen, invalp,
1066 		    checkonly, cr));
1067 	default:
1068 		return (0);
1069 	}
1070 }
1071 
1072 /*
1073  * Handle SOL_SOCKET
1074  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1075  * it implement their own checks and setting of conn_proto.
1076  */
1077 /* ARGSUSED1 */
1078 static int
conn_opt_set_socket(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1079 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1080     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1081 {
1082 	conn_t		*connp = coa->coa_connp;
1083 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1084 	int		*i1 = (int *)invalp;
1085 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1086 
1087 	switch (name) {
1088 	case SO_ALLZONES:
1089 		if (IPCL_IS_BOUND(connp))
1090 			return (EINVAL);
1091 		break;
1092 	case SO_VRRP:
1093 		if (secpolicy_ip_config(cr, checkonly) != 0)
1094 			return (EACCES);
1095 		break;
1096 	case SO_MAC_EXEMPT:
1097 		if (secpolicy_net_mac_aware(cr) != 0)
1098 			return (EACCES);
1099 		if (IPCL_IS_BOUND(connp))
1100 			return (EINVAL);
1101 		break;
1102 	case SO_MAC_IMPLICIT:
1103 		if (secpolicy_net_mac_implicit(cr) != 0)
1104 			return (EACCES);
1105 		break;
1106 	}
1107 	if (checkonly)
1108 		return (0);
1109 
1110 	mutex_enter(&connp->conn_lock);
1111 	/* Here we set the actual option value */
1112 	switch (name) {
1113 	case SO_DEBUG:
1114 		connp->conn_debug = onoff;
1115 		break;
1116 	case SO_KEEPALIVE:
1117 		connp->conn_keepalive = onoff;
1118 		break;
1119 	case SO_LINGER: {
1120 		struct linger *lgr = (struct linger *)invalp;
1121 
1122 		if (lgr->l_onoff) {
1123 			connp->conn_linger = 1;
1124 			connp->conn_lingertime = lgr->l_linger;
1125 		} else {
1126 			connp->conn_linger = 0;
1127 			connp->conn_lingertime = 0;
1128 		}
1129 		break;
1130 	}
1131 	case SO_OOBINLINE:
1132 		connp->conn_oobinline = onoff;
1133 		coa->coa_changed |= COA_OOBINLINE_CHANGED;
1134 		break;
1135 	case SO_REUSEADDR:
1136 		connp->conn_reuseaddr = onoff;
1137 		break;
1138 	case SO_DONTROUTE:
1139 		if (onoff)
1140 			ixa->ixa_flags |= IXAF_DONTROUTE;
1141 		else
1142 			ixa->ixa_flags &= ~IXAF_DONTROUTE;
1143 		coa->coa_changed |= COA_ROUTE_CHANGED;
1144 		break;
1145 	case SO_USELOOPBACK:
1146 		connp->conn_useloopback = onoff;
1147 		break;
1148 	case SO_BROADCAST:
1149 		connp->conn_broadcast = onoff;
1150 		break;
1151 	case SO_SNDBUF:
1152 		/* ULP has range checked the value */
1153 		connp->conn_sndbuf = *i1;
1154 		coa->coa_changed |= COA_SNDBUF_CHANGED;
1155 		break;
1156 	case SO_RCVBUF:
1157 		/* ULP has range checked the value */
1158 		connp->conn_rcvbuf = *i1;
1159 		coa->coa_changed |= COA_RCVBUF_CHANGED;
1160 		break;
1161 	case SO_RCVTIMEO:
1162 	case SO_SNDTIMEO:
1163 		/*
1164 		 * Pass these two options in order for third part
1165 		 * protocol usage.
1166 		 */
1167 		break;
1168 	case SO_DGRAM_ERRIND:
1169 		connp->conn_dgram_errind = onoff;
1170 		break;
1171 	case SO_RECVUCRED:
1172 		connp->conn_recv_ancillary.crb_recvucred = onoff;
1173 		break;
1174 	case SO_ALLZONES:
1175 		connp->conn_allzones = onoff;
1176 		coa->coa_changed |= COA_ROUTE_CHANGED;
1177 		if (onoff)
1178 			ixa->ixa_zoneid = ALL_ZONES;
1179 		else
1180 			ixa->ixa_zoneid = connp->conn_zoneid;
1181 		break;
1182 	case SO_TIMESTAMP:
1183 		connp->conn_recv_ancillary.crb_timestamp = onoff;
1184 		break;
1185 	case SO_VRRP:
1186 		connp->conn_isvrrp = onoff;
1187 		break;
1188 	case SO_ANON_MLP:
1189 		connp->conn_anon_mlp = onoff;
1190 		break;
1191 	case SO_MAC_EXEMPT:
1192 		connp->conn_mac_mode = onoff ?
1193 		    CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1194 		break;
1195 	case SO_MAC_IMPLICIT:
1196 		connp->conn_mac_mode = onoff ?
1197 		    CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1198 		break;
1199 	case SO_EXCLBIND:
1200 		connp->conn_exclbind = onoff;
1201 		break;
1202 	}
1203 	mutex_exit(&connp->conn_lock);
1204 	return (0);
1205 }
1206 
1207 /* Handle IPPROTO_IP */
1208 static int
conn_opt_set_ip(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1209 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1210     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1211 {
1212 	conn_t		*connp = coa->coa_connp;
1213 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1214 	ip_pkt_t	*ipp = coa->coa_ipp;
1215 	int		*i1 = (int *)invalp;
1216 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1217 	ipaddr_t	addr = (ipaddr_t)*i1;
1218 	uint_t		ifindex;
1219 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1220 	ipif_t		*ipif;
1221 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1222 	int		error;
1223 
1224 	if (connp->conn_family != AF_INET)
1225 		return (EINVAL);
1226 
1227 	ifindex = UINT_MAX;
1228 	switch (name) {
1229 	case IP_TTL:
1230 		/* Don't allow zero */
1231 		if (*i1 < 1 || *i1 > MAXTTL)
1232 			return (EINVAL);
1233 		break;
1234 	case IP_MULTICAST_IF:
1235 		if (addr == INADDR_ANY) {
1236 			/* Clear */
1237 			ifindex = 0;
1238 			break;
1239 		}
1240 		ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1241 		if (ipif == NULL)
1242 			return (EHOSTUNREACH);
1243 		/* not supported by the virtual network iface */
1244 		if (IS_VNI(ipif->ipif_ill)) {
1245 			ipif_refrele(ipif);
1246 			return (EINVAL);
1247 		}
1248 		ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1249 		ipif_refrele(ipif);
1250 		break;
1251 	case IP_NEXTHOP: {
1252 		ire_t	*ire;
1253 
1254 		if (addr == INADDR_ANY) {
1255 			/* Clear */
1256 			break;
1257 		}
1258 		/* Verify that the next-hop is on-link */
1259 		ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1260 		    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1261 		if (ire == NULL)
1262 			return (EHOSTUNREACH);
1263 		ire_refrele(ire);
1264 		break;
1265 	}
1266 	case IP_OPTIONS:
1267 	case T_IP_OPTIONS: {
1268 		uint_t newlen;
1269 
1270 		if (ipp->ipp_fields & IPPF_LABEL_V4)
1271 			newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1272 		else
1273 			newlen = inlen;
1274 		if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1275 			return (EINVAL);
1276 		}
1277 		break;
1278 	}
1279 	case IP_PKTINFO: {
1280 		struct in_pktinfo *pktinfo;
1281 
1282 		/* Two different valid lengths */
1283 		if (inlen != sizeof (int) &&
1284 		    inlen != sizeof (struct in_pktinfo))
1285 			return (EINVAL);
1286 		if (inlen == sizeof (int))
1287 			break;
1288 
1289 		pktinfo = (struct in_pktinfo *)invalp;
1290 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1291 			switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1292 			    zoneid, ipst, B_FALSE)) {
1293 			case IPVL_UNICAST_UP:
1294 			case IPVL_UNICAST_DOWN:
1295 				break;
1296 			default:
1297 				return (EADDRNOTAVAIL);
1298 			}
1299 		}
1300 		if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1301 		    B_FALSE, ipst))
1302 			return (ENXIO);
1303 		break;
1304 	}
1305 	case IP_BOUND_IF:
1306 		ifindex = *(uint_t *)i1;
1307 
1308 		/* Just check it is ok. */
1309 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1310 			return (ENXIO);
1311 		break;
1312 	case IP_MINTTL:
1313 		if (*i1 < 0 || *i1 > MAXTTL)
1314 			return (EINVAL);
1315 		break;
1316 	}
1317 	if (checkonly)
1318 		return (0);
1319 
1320 	/* Here we set the actual option value */
1321 	/*
1322 	 * conn_lock protects the bitfields, and is used to
1323 	 * set the fields atomically. Not needed for ixa settings since
1324 	 * the caller has an exclusive copy of the ixa.
1325 	 * We can not hold conn_lock across the multicast options though.
1326 	 */
1327 	switch (name) {
1328 	case IP_OPTIONS:
1329 	case T_IP_OPTIONS:
1330 		/* Save options for use by IP. */
1331 		mutex_enter(&connp->conn_lock);
1332 		error = optcom_pkt_set(invalp, inlen,
1333 		    (uchar_t **)&ipp->ipp_ipv4_options,
1334 		    &ipp->ipp_ipv4_options_len);
1335 		if (error != 0) {
1336 			mutex_exit(&connp->conn_lock);
1337 			return (error);
1338 		}
1339 		if (ipp->ipp_ipv4_options_len == 0) {
1340 			ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1341 		} else {
1342 			ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1343 		}
1344 		mutex_exit(&connp->conn_lock);
1345 		coa->coa_changed |= COA_HEADER_CHANGED;
1346 		coa->coa_changed |= COA_WROFF_CHANGED;
1347 		break;
1348 
1349 	case IP_TTL:
1350 		mutex_enter(&connp->conn_lock);
1351 		ipp->ipp_unicast_hops = *i1;
1352 		mutex_exit(&connp->conn_lock);
1353 		coa->coa_changed |= COA_HEADER_CHANGED;
1354 		break;
1355 	case IP_TOS:
1356 	case T_IP_TOS:
1357 		mutex_enter(&connp->conn_lock);
1358 		if (*i1 == -1) {
1359 			ipp->ipp_type_of_service = 0;
1360 		} else {
1361 			ipp->ipp_type_of_service = *i1;
1362 		}
1363 		mutex_exit(&connp->conn_lock);
1364 		coa->coa_changed |= COA_HEADER_CHANGED;
1365 		break;
1366 	case IP_MULTICAST_IF:
1367 		ixa->ixa_multicast_ifindex = ifindex;
1368 		ixa->ixa_multicast_ifaddr = addr;
1369 		coa->coa_changed |= COA_ROUTE_CHANGED;
1370 		break;
1371 	case IP_MULTICAST_TTL:
1372 		ixa->ixa_multicast_ttl = *invalp;
1373 		/* Handled automatically by ip_output */
1374 		break;
1375 	case IP_MULTICAST_LOOP:
1376 		if (*invalp != 0)
1377 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1378 		else
1379 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1380 		/* Handled automatically by ip_output */
1381 		break;
1382 	case IP_RECVOPTS:
1383 		mutex_enter(&connp->conn_lock);
1384 		connp->conn_recv_ancillary.crb_recvopts = onoff;
1385 		mutex_exit(&connp->conn_lock);
1386 		break;
1387 	case IP_RECVDSTADDR:
1388 		mutex_enter(&connp->conn_lock);
1389 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1390 		mutex_exit(&connp->conn_lock);
1391 		break;
1392 	case IP_RECVIF:
1393 		mutex_enter(&connp->conn_lock);
1394 		connp->conn_recv_ancillary.crb_recvif = onoff;
1395 		mutex_exit(&connp->conn_lock);
1396 		break;
1397 	case IP_RECVSLLA:
1398 		mutex_enter(&connp->conn_lock);
1399 		connp->conn_recv_ancillary.crb_recvslla = onoff;
1400 		mutex_exit(&connp->conn_lock);
1401 		break;
1402 	case IP_RECVTTL:
1403 		mutex_enter(&connp->conn_lock);
1404 		connp->conn_recv_ancillary.crb_recvttl = onoff;
1405 		mutex_exit(&connp->conn_lock);
1406 		break;
1407 	case IP_RECVTOS:
1408 		mutex_enter(&connp->conn_lock);
1409 		connp->conn_recv_ancillary.crb_recvtos = onoff;
1410 		mutex_exit(&connp->conn_lock);
1411 		break;
1412 	case IP_PKTINFO: {
1413 		/*
1414 		 * This also handles IP_RECVPKTINFO.
1415 		 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1416 		 * Differentiation is based on the size of the
1417 		 * argument passed in.
1418 		 */
1419 		struct in_pktinfo *pktinfo;
1420 
1421 		if (inlen == sizeof (int)) {
1422 			/* This is IP_RECVPKTINFO option. */
1423 			mutex_enter(&connp->conn_lock);
1424 			connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1425 			    onoff;
1426 			mutex_exit(&connp->conn_lock);
1427 			break;
1428 		}
1429 
1430 		/* This is IP_PKTINFO option. */
1431 		mutex_enter(&connp->conn_lock);
1432 		pktinfo = (struct in_pktinfo *)invalp;
1433 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1434 			ipp->ipp_fields |= IPPF_ADDR;
1435 			IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1436 			    &ipp->ipp_addr);
1437 		} else {
1438 			ipp->ipp_fields &= ~IPPF_ADDR;
1439 			ipp->ipp_addr = ipv6_all_zeros;
1440 		}
1441 		mutex_exit(&connp->conn_lock);
1442 		ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1443 		coa->coa_changed |= COA_ROUTE_CHANGED;
1444 		coa->coa_changed |= COA_HEADER_CHANGED;
1445 		break;
1446 	}
1447 	case IP_DONTFRAG:
1448 		if (onoff) {
1449 			ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1450 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1451 		} else {
1452 			ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1453 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1454 		}
1455 		/* Need to redo ip_attr_connect */
1456 		coa->coa_changed |= COA_ROUTE_CHANGED;
1457 		break;
1458 	case IP_ADD_MEMBERSHIP:
1459 	case IP_DROP_MEMBERSHIP:
1460 	case MCAST_JOIN_GROUP:
1461 	case MCAST_LEAVE_GROUP:
1462 		return (ip_opt_set_multicast_group(connp, name,
1463 		    invalp, B_FALSE, checkonly));
1464 
1465 	case IP_BLOCK_SOURCE:
1466 	case IP_UNBLOCK_SOURCE:
1467 	case IP_ADD_SOURCE_MEMBERSHIP:
1468 	case IP_DROP_SOURCE_MEMBERSHIP:
1469 	case MCAST_BLOCK_SOURCE:
1470 	case MCAST_UNBLOCK_SOURCE:
1471 	case MCAST_JOIN_SOURCE_GROUP:
1472 	case MCAST_LEAVE_SOURCE_GROUP:
1473 		return (ip_opt_set_multicast_sources(connp, name,
1474 		    invalp, B_FALSE, checkonly));
1475 
1476 	case IP_SEC_OPT:
1477 		mutex_enter(&connp->conn_lock);
1478 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1479 		mutex_exit(&connp->conn_lock);
1480 		if (error != 0) {
1481 			return (error);
1482 		}
1483 		/* This is an IPsec policy change - redo ip_attr_connect */
1484 		coa->coa_changed |= COA_ROUTE_CHANGED;
1485 		break;
1486 	case IP_NEXTHOP:
1487 		ixa->ixa_nexthop_v4 = addr;
1488 		if (addr != INADDR_ANY)
1489 			ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1490 		else
1491 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1492 		coa->coa_changed |= COA_ROUTE_CHANGED;
1493 		break;
1494 
1495 	case IP_BOUND_IF:
1496 		ixa->ixa_ifindex = ifindex;		/* Send */
1497 		mutex_enter(&connp->conn_lock);
1498 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1499 		connp->conn_bound_if = ifindex;		/* getsockopt */
1500 		mutex_exit(&connp->conn_lock);
1501 		coa->coa_changed |= COA_ROUTE_CHANGED;
1502 		break;
1503 	case IP_UNSPEC_SRC:
1504 		mutex_enter(&connp->conn_lock);
1505 		connp->conn_unspec_src = onoff;
1506 		if (onoff)
1507 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1508 		else
1509 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1510 
1511 		mutex_exit(&connp->conn_lock);
1512 		break;
1513 	case IP_BROADCAST_TTL:
1514 		ixa->ixa_broadcast_ttl = *invalp;
1515 		ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1516 		/* Handled automatically by ip_output */
1517 		break;
1518 	case MRT_INIT:
1519 	case MRT_DONE:
1520 	case MRT_ADD_VIF:
1521 	case MRT_DEL_VIF:
1522 	case MRT_ADD_MFC:
1523 	case MRT_DEL_MFC:
1524 	case MRT_ASSERT:
1525 		if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1526 			return (error);
1527 		}
1528 		error = ip_mrouter_set((int)name, connp, checkonly,
1529 		    (uchar_t *)invalp, inlen);
1530 		if (error) {
1531 			return (error);
1532 		}
1533 		return (0);
1534 	case IP_MINTTL:
1535 		mutex_enter(&connp->conn_lock);
1536 		connp->conn_min_ttl = *i1;
1537 		mutex_exit(&connp->conn_lock);
1538 		break;
1539 	}
1540 	return (0);
1541 }
1542 
1543 /* Handle IPPROTO_IPV6 */
1544 static int
conn_opt_set_ipv6(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1545 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1546     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1547 {
1548 	conn_t		*connp = coa->coa_connp;
1549 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1550 	ip_pkt_t	*ipp = coa->coa_ipp;
1551 	int		*i1 = (int *)invalp;
1552 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1553 	uint_t		ifindex;
1554 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1555 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1556 	int		error;
1557 
1558 	if (connp->conn_family != AF_INET6)
1559 		return (EINVAL);
1560 
1561 	ifindex = UINT_MAX;
1562 	switch (name) {
1563 	case IPV6_MULTICAST_IF:
1564 		/*
1565 		 * The only possible error is EINVAL.
1566 		 * We call this option on both V4 and V6
1567 		 * If both fail, then this call returns
1568 		 * EINVAL. If at least one of them succeeds we
1569 		 * return success.
1570 		 */
1571 		ifindex = *(uint_t *)i1;
1572 
1573 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1574 		    !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1575 			return (EINVAL);
1576 		break;
1577 	case IPV6_UNICAST_HOPS:
1578 		/* Don't allow zero. -1 means to use default */
1579 		if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1580 			return (EINVAL);
1581 		break;
1582 	case IPV6_MULTICAST_HOPS:
1583 		/* -1 means use default */
1584 		if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1585 			return (EINVAL);
1586 		break;
1587 	case IPV6_MULTICAST_LOOP:
1588 		if (*i1 != 0 && *i1 != 1)
1589 			return (EINVAL);
1590 		break;
1591 	case IPV6_BOUND_IF:
1592 		ifindex = *(uint_t *)i1;
1593 
1594 		if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1595 			return (ENXIO);
1596 		break;
1597 	case IPV6_PKTINFO: {
1598 		struct in6_pktinfo *pkti;
1599 		boolean_t isv6;
1600 
1601 		if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1602 			return (EINVAL);
1603 		if (inlen == 0)
1604 			break;	/* Clear values below */
1605 
1606 		/*
1607 		 * Verify the source address and ifindex. Privileged users
1608 		 * can use any source address.
1609 		 */
1610 		pkti = (struct in6_pktinfo *)invalp;
1611 
1612 		/*
1613 		 * For link-local addresses we use the ipi6_ifindex when
1614 		 * we verify the local address.
1615 		 * If net_rawaccess then any source address can be used.
1616 		 */
1617 		if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1618 		    secpolicy_net_rawaccess(cr) != 0) {
1619 			uint_t scopeid = 0;
1620 			in6_addr_t *v6src = &pkti->ipi6_addr;
1621 			ipaddr_t v4src;
1622 			ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1623 
1624 			if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1625 				IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1626 				if (v4src != INADDR_ANY) {
1627 					laddr_type = ip_laddr_verify_v4(v4src,
1628 					    zoneid, ipst, B_FALSE);
1629 				}
1630 			} else {
1631 				if (IN6_IS_ADDR_LINKSCOPE(v6src))
1632 					scopeid = pkti->ipi6_ifindex;
1633 
1634 				laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1635 				    ipst, B_FALSE, scopeid);
1636 			}
1637 			switch (laddr_type) {
1638 			case IPVL_UNICAST_UP:
1639 			case IPVL_UNICAST_DOWN:
1640 				break;
1641 			default:
1642 				return (EADDRNOTAVAIL);
1643 			}
1644 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1645 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1646 			/* Allow any source */
1647 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1648 		}
1649 		isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1650 		if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1651 		    ipst))
1652 			return (ENXIO);
1653 		break;
1654 	}
1655 	case IPV6_HOPLIMIT:
1656 		/* It is only allowed as ancilary data */
1657 		if (!coa->coa_ancillary)
1658 			return (EINVAL);
1659 
1660 		if (inlen != 0 && inlen != sizeof (int))
1661 			return (EINVAL);
1662 		if (inlen == sizeof (int)) {
1663 			if (*i1 > IPV6_MAX_HOPS || *i1 < -1 || *i1 == 0)
1664 				return (EINVAL);
1665 		}
1666 		break;
1667 	case IPV6_TCLASS:
1668 		if (inlen != 0 && inlen != sizeof (int))
1669 			return (EINVAL);
1670 		if (inlen == sizeof (int)) {
1671 			if (*i1 > 255 || *i1 < -1)
1672 				return (EINVAL);
1673 		}
1674 		break;
1675 	case IPV6_NEXTHOP:
1676 		if (inlen != 0 && inlen != sizeof (sin6_t))
1677 			return (EINVAL);
1678 		if (inlen == sizeof (sin6_t)) {
1679 			sin6_t *sin6 = (sin6_t *)invalp;
1680 			ire_t	*ire;
1681 
1682 			if (sin6->sin6_family != AF_INET6)
1683 				return (EAFNOSUPPORT);
1684 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1685 				return (EADDRNOTAVAIL);
1686 
1687 			/* Verify that the next-hop is on-link */
1688 			ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1689 			    0, 0, IRE_ONLINK, NULL, zoneid,
1690 			    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1691 			if (ire == NULL)
1692 				return (EHOSTUNREACH);
1693 			ire_refrele(ire);
1694 			break;
1695 		}
1696 		break;
1697 	case IPV6_RTHDR:
1698 	case IPV6_DSTOPTS:
1699 	case IPV6_RTHDRDSTOPTS:
1700 	case IPV6_HOPOPTS: {
1701 		/* All have the length field in the same place */
1702 		ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1703 		/*
1704 		 * Sanity checks - minimum size, size a multiple of
1705 		 * eight bytes, and matching size passed in.
1706 		 */
1707 		if (inlen != 0 &&
1708 		    inlen != (8 * (hopts->ip6h_len + 1)))
1709 			return (EINVAL);
1710 		break;
1711 	}
1712 	case IPV6_PATHMTU:
1713 		/* Can't be set */
1714 		return (EINVAL);
1715 
1716 	case IPV6_USE_MIN_MTU:
1717 		if (inlen != sizeof (int))
1718 			return (EINVAL);
1719 		if (*i1 < -1 || *i1 > 1)
1720 			return (EINVAL);
1721 		break;
1722 	case IPV6_SRC_PREFERENCES:
1723 		if (inlen != sizeof (uint32_t))
1724 			return (EINVAL);
1725 		break;
1726 	case IPV6_V6ONLY:
1727 		if (*i1 < 0 || *i1 > 1) {
1728 			return (EINVAL);
1729 		}
1730 		break;
1731 	case IPV6_MINHOPCOUNT:
1732 		if (*i1 < 0 || *i1 > IPV6_MAX_HOPS)
1733 			return (EINVAL);
1734 		break;
1735 	}
1736 	if (checkonly)
1737 		return (0);
1738 
1739 	/* Here we set the actual option value */
1740 	/*
1741 	 * conn_lock protects the bitfields, and is used to
1742 	 * set the fields atomically. Not needed for ixa settings since
1743 	 * the caller has an exclusive copy of the ixa.
1744 	 * We can not hold conn_lock across the multicast options though.
1745 	 */
1746 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1747 	switch (name) {
1748 	case IPV6_MULTICAST_IF:
1749 		ixa->ixa_multicast_ifindex = ifindex;
1750 		/* Need to redo ip_attr_connect */
1751 		coa->coa_changed |= COA_ROUTE_CHANGED;
1752 		break;
1753 	case IPV6_UNICAST_HOPS:
1754 		/* -1 means use default */
1755 		mutex_enter(&connp->conn_lock);
1756 		if (*i1 == -1) {
1757 			ipp->ipp_unicast_hops = connp->conn_default_ttl;
1758 		} else {
1759 			ipp->ipp_unicast_hops = (uint8_t)*i1;
1760 		}
1761 		mutex_exit(&connp->conn_lock);
1762 		coa->coa_changed |= COA_HEADER_CHANGED;
1763 		break;
1764 	case IPV6_MULTICAST_HOPS:
1765 		/* -1 means use default */
1766 		if (*i1 == -1) {
1767 			ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1768 		} else {
1769 			ixa->ixa_multicast_ttl = (uint8_t)*i1;
1770 		}
1771 		/* Handled automatically by ip_output */
1772 		break;
1773 	case IPV6_MULTICAST_LOOP:
1774 		if (*i1 != 0)
1775 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1776 		else
1777 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1778 		/* Handled automatically by ip_output */
1779 		break;
1780 	case IPV6_JOIN_GROUP:
1781 	case IPV6_LEAVE_GROUP:
1782 	case MCAST_JOIN_GROUP:
1783 	case MCAST_LEAVE_GROUP:
1784 		return (ip_opt_set_multicast_group(connp, name,
1785 		    invalp, B_TRUE, checkonly));
1786 
1787 	case MCAST_BLOCK_SOURCE:
1788 	case MCAST_UNBLOCK_SOURCE:
1789 	case MCAST_JOIN_SOURCE_GROUP:
1790 	case MCAST_LEAVE_SOURCE_GROUP:
1791 		return (ip_opt_set_multicast_sources(connp, name,
1792 		    invalp, B_TRUE, checkonly));
1793 
1794 	case IPV6_BOUND_IF:
1795 		ixa->ixa_ifindex = ifindex;		/* Send */
1796 		mutex_enter(&connp->conn_lock);
1797 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1798 		connp->conn_bound_if = ifindex;		/* getsockopt */
1799 		mutex_exit(&connp->conn_lock);
1800 		coa->coa_changed |= COA_ROUTE_CHANGED;
1801 		break;
1802 	case IPV6_UNSPEC_SRC:
1803 		mutex_enter(&connp->conn_lock);
1804 		connp->conn_unspec_src = onoff;
1805 		if (onoff)
1806 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1807 		else
1808 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1809 		mutex_exit(&connp->conn_lock);
1810 		break;
1811 	case IPV6_RECVPKTINFO:
1812 		mutex_enter(&connp->conn_lock);
1813 		connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1814 		mutex_exit(&connp->conn_lock);
1815 		break;
1816 	case IPV6_RECVTCLASS:
1817 		mutex_enter(&connp->conn_lock);
1818 		connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1819 		mutex_exit(&connp->conn_lock);
1820 		break;
1821 	case IPV6_RECVPATHMTU:
1822 		mutex_enter(&connp->conn_lock);
1823 		connp->conn_ipv6_recvpathmtu = onoff;
1824 		mutex_exit(&connp->conn_lock);
1825 		break;
1826 	case IPV6_RECVHOPLIMIT:
1827 		mutex_enter(&connp->conn_lock);
1828 		connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1829 		    onoff;
1830 		mutex_exit(&connp->conn_lock);
1831 		break;
1832 	case IPV6_RECVHOPOPTS:
1833 		mutex_enter(&connp->conn_lock);
1834 		connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1835 		mutex_exit(&connp->conn_lock);
1836 		break;
1837 	case IPV6_RECVDSTOPTS:
1838 		mutex_enter(&connp->conn_lock);
1839 		connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1840 		mutex_exit(&connp->conn_lock);
1841 		break;
1842 	case _OLD_IPV6_RECVDSTOPTS:
1843 		mutex_enter(&connp->conn_lock);
1844 		connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1845 		    onoff;
1846 		mutex_exit(&connp->conn_lock);
1847 		break;
1848 	case IPV6_RECVRTHDRDSTOPTS:
1849 		mutex_enter(&connp->conn_lock);
1850 		connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1851 		    onoff;
1852 		mutex_exit(&connp->conn_lock);
1853 		break;
1854 	case IPV6_RECVRTHDR:
1855 		mutex_enter(&connp->conn_lock);
1856 		connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1857 		mutex_exit(&connp->conn_lock);
1858 		break;
1859 	case IPV6_PKTINFO:
1860 		mutex_enter(&connp->conn_lock);
1861 		if (inlen == 0) {
1862 			ipp->ipp_fields &= ~IPPF_ADDR;
1863 			ipp->ipp_addr = ipv6_all_zeros;
1864 			ixa->ixa_ifindex = 0;
1865 		} else {
1866 			struct in6_pktinfo *pkti;
1867 
1868 			pkti = (struct in6_pktinfo *)invalp;
1869 			ipp->ipp_addr = pkti->ipi6_addr;
1870 			if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1871 				ipp->ipp_fields |= IPPF_ADDR;
1872 			else
1873 				ipp->ipp_fields &= ~IPPF_ADDR;
1874 			ixa->ixa_ifindex = pkti->ipi6_ifindex;
1875 		}
1876 		mutex_exit(&connp->conn_lock);
1877 		/* Source and ifindex might have changed */
1878 		coa->coa_changed |= COA_HEADER_CHANGED;
1879 		coa->coa_changed |= COA_ROUTE_CHANGED;
1880 		break;
1881 	case IPV6_HOPLIMIT:
1882 		mutex_enter(&connp->conn_lock);
1883 		if (inlen == 0 || *i1 == -1) {
1884 			/* Revert to default */
1885 			ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1886 			ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1887 		} else {
1888 			ipp->ipp_hoplimit = *i1;
1889 			ipp->ipp_fields |= IPPF_HOPLIMIT;
1890 			/* Ensure that it sticks for multicast packets */
1891 			ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1892 		}
1893 		mutex_exit(&connp->conn_lock);
1894 		coa->coa_changed |= COA_HEADER_CHANGED;
1895 		break;
1896 	case IPV6_TCLASS:
1897 		/*
1898 		 * IPV6_TCLASS accepts -1 as use kernel default
1899 		 * and [0, 255] as the actualy traffic class.
1900 		 */
1901 		mutex_enter(&connp->conn_lock);
1902 		if (inlen == 0 || *i1 == -1) {
1903 			ipp->ipp_tclass = 0;
1904 			ipp->ipp_fields &= ~IPPF_TCLASS;
1905 		} else {
1906 			ipp->ipp_tclass = *i1;
1907 			ipp->ipp_fields |= IPPF_TCLASS;
1908 		}
1909 		mutex_exit(&connp->conn_lock);
1910 		coa->coa_changed |= COA_HEADER_CHANGED;
1911 		break;
1912 	case IPV6_NEXTHOP:
1913 		if (inlen == 0) {
1914 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1915 		} else {
1916 			sin6_t *sin6 = (sin6_t *)invalp;
1917 
1918 			ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1919 			if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1920 				ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1921 			else
1922 				ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1923 		}
1924 		coa->coa_changed |= COA_ROUTE_CHANGED;
1925 		break;
1926 	case IPV6_HOPOPTS:
1927 		mutex_enter(&connp->conn_lock);
1928 		error = optcom_pkt_set(invalp, inlen,
1929 		    (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1930 		if (error != 0) {
1931 			mutex_exit(&connp->conn_lock);
1932 			return (error);
1933 		}
1934 		if (ipp->ipp_hopoptslen == 0) {
1935 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1936 		} else {
1937 			ipp->ipp_fields |= IPPF_HOPOPTS;
1938 		}
1939 		mutex_exit(&connp->conn_lock);
1940 		coa->coa_changed |= COA_HEADER_CHANGED;
1941 		coa->coa_changed |= COA_WROFF_CHANGED;
1942 		break;
1943 	case IPV6_RTHDRDSTOPTS:
1944 		mutex_enter(&connp->conn_lock);
1945 		error = optcom_pkt_set(invalp, inlen,
1946 		    (uchar_t **)&ipp->ipp_rthdrdstopts,
1947 		    &ipp->ipp_rthdrdstoptslen);
1948 		if (error != 0) {
1949 			mutex_exit(&connp->conn_lock);
1950 			return (error);
1951 		}
1952 		if (ipp->ipp_rthdrdstoptslen == 0) {
1953 			ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1954 		} else {
1955 			ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1956 		}
1957 		mutex_exit(&connp->conn_lock);
1958 		coa->coa_changed |= COA_HEADER_CHANGED;
1959 		coa->coa_changed |= COA_WROFF_CHANGED;
1960 		break;
1961 	case IPV6_DSTOPTS:
1962 		mutex_enter(&connp->conn_lock);
1963 		error = optcom_pkt_set(invalp, inlen,
1964 		    (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1965 		if (error != 0) {
1966 			mutex_exit(&connp->conn_lock);
1967 			return (error);
1968 		}
1969 		if (ipp->ipp_dstoptslen == 0) {
1970 			ipp->ipp_fields &= ~IPPF_DSTOPTS;
1971 		} else {
1972 			ipp->ipp_fields |= IPPF_DSTOPTS;
1973 		}
1974 		mutex_exit(&connp->conn_lock);
1975 		coa->coa_changed |= COA_HEADER_CHANGED;
1976 		coa->coa_changed |= COA_WROFF_CHANGED;
1977 		break;
1978 	case IPV6_RTHDR:
1979 		mutex_enter(&connp->conn_lock);
1980 		error = optcom_pkt_set(invalp, inlen,
1981 		    (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1982 		if (error != 0) {
1983 			mutex_exit(&connp->conn_lock);
1984 			return (error);
1985 		}
1986 		if (ipp->ipp_rthdrlen == 0) {
1987 			ipp->ipp_fields &= ~IPPF_RTHDR;
1988 		} else {
1989 			ipp->ipp_fields |= IPPF_RTHDR;
1990 		}
1991 		mutex_exit(&connp->conn_lock);
1992 		coa->coa_changed |= COA_HEADER_CHANGED;
1993 		coa->coa_changed |= COA_WROFF_CHANGED;
1994 		break;
1995 
1996 	case IPV6_DONTFRAG:
1997 		if (onoff) {
1998 			ixa->ixa_flags |= IXAF_DONTFRAG;
1999 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
2000 		} else {
2001 			ixa->ixa_flags &= ~IXAF_DONTFRAG;
2002 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
2003 		}
2004 		/* Need to redo ip_attr_connect */
2005 		coa->coa_changed |= COA_ROUTE_CHANGED;
2006 		break;
2007 
2008 	case IPV6_USE_MIN_MTU:
2009 		ixa->ixa_flags |= IXAF_USE_MIN_MTU;
2010 		ixa->ixa_use_min_mtu = *i1;
2011 		/* Need to redo ip_attr_connect */
2012 		coa->coa_changed |= COA_ROUTE_CHANGED;
2013 		break;
2014 
2015 	case IPV6_SEC_OPT:
2016 		mutex_enter(&connp->conn_lock);
2017 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
2018 		mutex_exit(&connp->conn_lock);
2019 		if (error != 0) {
2020 			return (error);
2021 		}
2022 		/* This is an IPsec policy change - redo ip_attr_connect */
2023 		coa->coa_changed |= COA_ROUTE_CHANGED;
2024 		break;
2025 	case IPV6_SRC_PREFERENCES:
2026 		/*
2027 		 * This socket option only affects connected
2028 		 * sockets that haven't already bound to a specific
2029 		 * IPv6 address.  In other words, sockets that
2030 		 * don't call bind() with an address other than the
2031 		 * unspecified address and that call connect().
2032 		 * ip_set_destination_v6() passes these preferences
2033 		 * to the ipif_select_source_v6() function.
2034 		 */
2035 		mutex_enter(&connp->conn_lock);
2036 		error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
2037 		mutex_exit(&connp->conn_lock);
2038 		if (error != 0) {
2039 			return (error);
2040 		}
2041 		break;
2042 	case IPV6_V6ONLY:
2043 		mutex_enter(&connp->conn_lock);
2044 		connp->conn_ipv6_v6only = onoff;
2045 		mutex_exit(&connp->conn_lock);
2046 		break;
2047 	case IPV6_MINHOPCOUNT:
2048 		mutex_enter(&connp->conn_lock);
2049 		connp->conn_min_ttl = *i1;
2050 		mutex_exit(&connp->conn_lock);
2051 		break;
2052 	}
2053 	return (0);
2054 }
2055 
2056 /* Handle IPPROTO_UDP */
2057 /* ARGSUSED1 */
2058 static int
conn_opt_set_udp(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)2059 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2060     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2061 {
2062 	conn_t		*connp = coa->coa_connp;
2063 	int		*i1 = (int *)invalp;
2064 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2065 	int		error;
2066 
2067 	switch (name) {
2068 	case UDP_ANONPRIVBIND:
2069 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2070 			return (error);
2071 		}
2072 		break;
2073 	}
2074 	if (checkonly)
2075 		return (0);
2076 
2077 	/* Here we set the actual option value */
2078 	mutex_enter(&connp->conn_lock);
2079 	switch (name) {
2080 	case UDP_ANONPRIVBIND:
2081 		connp->conn_anon_priv_bind = onoff;
2082 		break;
2083 	case UDP_EXCLBIND:
2084 		connp->conn_exclbind = onoff;
2085 		break;
2086 	}
2087 	mutex_exit(&connp->conn_lock);
2088 	return (0);
2089 }
2090 
2091 /* Handle IPPROTO_TCP */
2092 /* ARGSUSED1 */
2093 static int
conn_opt_set_tcp(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)2094 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2095     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2096 {
2097 	conn_t		*connp = coa->coa_connp;
2098 	int		*i1 = (int *)invalp;
2099 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2100 	int		error;
2101 
2102 	switch (name) {
2103 	case TCP_ANONPRIVBIND:
2104 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2105 			return (error);
2106 		}
2107 		break;
2108 	}
2109 	if (checkonly)
2110 		return (0);
2111 
2112 	/* Here we set the actual option value */
2113 	mutex_enter(&connp->conn_lock);
2114 	switch (name) {
2115 	case TCP_ANONPRIVBIND:
2116 		connp->conn_anon_priv_bind = onoff;
2117 		break;
2118 	case TCP_EXCLBIND:
2119 		connp->conn_exclbind = onoff;
2120 		break;
2121 	case TCP_RECVDSTADDR:
2122 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2123 		break;
2124 	}
2125 	mutex_exit(&connp->conn_lock);
2126 	return (0);
2127 }
2128 
2129 int
conn_getsockname(conn_t * connp,struct sockaddr * sa,uint_t * salenp)2130 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2131 {
2132 	sin_t		*sin;
2133 	sin6_t		*sin6;
2134 
2135 	if (connp->conn_family == AF_INET) {
2136 		if (*salenp < sizeof (sin_t))
2137 			return (EINVAL);
2138 
2139 		*salenp = sizeof (sin_t);
2140 		/* Fill zeroes and then initialize non-zero fields */
2141 		sin = (sin_t *)sa;
2142 		*sin = sin_null;
2143 		sin->sin_family = AF_INET;
2144 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2145 		    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2146 			sin->sin_addr.s_addr = connp->conn_saddr_v4;
2147 		} else {
2148 			/*
2149 			 * INADDR_ANY
2150 			 * conn_saddr is not set, we might be bound to
2151 			 * broadcast/multicast. Use conn_bound_addr as
2152 			 * local address instead (that could
2153 			 * also still be INADDR_ANY)
2154 			 */
2155 			sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2156 		}
2157 		sin->sin_port = connp->conn_lport;
2158 	} else {
2159 		if (*salenp < sizeof (sin6_t))
2160 			return (EINVAL);
2161 
2162 		*salenp = sizeof (sin6_t);
2163 		/* Fill zeroes and then initialize non-zero fields */
2164 		sin6 = (sin6_t *)sa;
2165 		*sin6 = sin6_null;
2166 		sin6->sin6_family = AF_INET6;
2167 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2168 			sin6->sin6_addr = connp->conn_saddr_v6;
2169 		} else {
2170 			/*
2171 			 * conn_saddr is not set, we might be bound to
2172 			 * broadcast/multicast. Use conn_bound_addr as
2173 			 * local address instead (which could
2174 			 * also still be unspecified)
2175 			 */
2176 			sin6->sin6_addr = connp->conn_bound_addr_v6;
2177 		}
2178 		sin6->sin6_port = connp->conn_lport;
2179 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2180 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2181 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2182 	}
2183 	return (0);
2184 }
2185 
2186 int
conn_getpeername(conn_t * connp,struct sockaddr * sa,uint_t * salenp)2187 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2188 {
2189 	struct sockaddr_in	*sin;
2190 	struct sockaddr_in6	*sin6;
2191 
2192 	if (connp->conn_family == AF_INET) {
2193 		if (*salenp < sizeof (sin_t))
2194 			return (EINVAL);
2195 
2196 		*salenp = sizeof (sin_t);
2197 		/* initialize */
2198 		sin = (sin_t *)sa;
2199 		*sin = sin_null;
2200 		sin->sin_family = AF_INET;
2201 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
2202 		sin->sin_port = connp->conn_fport;
2203 	} else {
2204 		if (*salenp < sizeof (sin6_t))
2205 			return (EINVAL);
2206 
2207 		*salenp = sizeof (sin6_t);
2208 		/* initialize */
2209 		sin6 = (sin6_t *)sa;
2210 		*sin6 = sin6_null;
2211 		sin6->sin6_family = AF_INET6;
2212 		sin6->sin6_addr = connp->conn_faddr_v6;
2213 		sin6->sin6_port =  connp->conn_fport;
2214 		sin6->sin6_flowinfo = connp->conn_flowinfo;
2215 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2216 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2217 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2218 	}
2219 	return (0);
2220 }
2221 
2222 static uint32_t	cksum_massage_options_v4(ipha_t *, netstack_t *);
2223 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2224 
2225 /*
2226  * Allocate and fill in conn_ht_iphc based on the current information
2227  * in the conn.
2228  * Normally used when we bind() and connect().
2229  * Returns failure if can't allocate memory, or if there is a problem
2230  * with a routing header/option.
2231  *
2232  * We allocate space for the transport header (ulp_hdr_len + extra) and
2233  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2234  * The extra is there for transports that want some spare room for future
2235  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2236  * excludes the extra part.
2237  *
2238  * We massage an routing option/header and store the ckecksum difference
2239  * in conn_sum.
2240  *
2241  * Caller needs to update conn_wroff if desired.
2242  */
2243 int
conn_build_hdr_template(conn_t * connp,uint_t ulp_hdr_length,uint_t extra,const in6_addr_t * v6src,const in6_addr_t * v6dst,uint32_t flowinfo)2244 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2245     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2246 {
2247 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2248 	ip_pkt_t	*ipp = &connp->conn_xmit_ipp;
2249 	uint_t		ip_hdr_length;
2250 	uchar_t		*hdrs;
2251 	uint_t		hdrs_len;
2252 
2253 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2254 
2255 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2256 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2257 		/* In case of TX label and IP options it can be too much */
2258 		if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2259 			/* Preserves existing TX errno for this */
2260 			return (EHOSTUNREACH);
2261 		}
2262 	} else {
2263 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2264 	}
2265 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2266 	hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2267 	ASSERT(hdrs_len != 0);
2268 
2269 	if (hdrs_len != connp->conn_ht_iphc_allocated) {
2270 		/* Allocate new before we free any old */
2271 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2272 		if (hdrs == NULL)
2273 			return (ENOMEM);
2274 
2275 		if (connp->conn_ht_iphc != NULL) {
2276 			kmem_free(connp->conn_ht_iphc,
2277 			    connp->conn_ht_iphc_allocated);
2278 		}
2279 		connp->conn_ht_iphc = hdrs;
2280 		connp->conn_ht_iphc_allocated = hdrs_len;
2281 	} else {
2282 		hdrs = connp->conn_ht_iphc;
2283 	}
2284 	hdrs_len -= extra;
2285 	connp->conn_ht_iphc_len = hdrs_len;
2286 
2287 	connp->conn_ht_ulp = hdrs + ip_hdr_length;
2288 	connp->conn_ht_ulp_len = ulp_hdr_length;
2289 
2290 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2291 		ipha_t	*ipha = (ipha_t *)hdrs;
2292 
2293 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2294 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2295 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2296 		ipha->ipha_length = htons(hdrs_len);
2297 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2298 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2299 		else
2300 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2301 
2302 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2303 			connp->conn_sum = cksum_massage_options_v4(ipha,
2304 			    connp->conn_netstack);
2305 		} else {
2306 			connp->conn_sum = 0;
2307 		}
2308 	} else {
2309 		ip6_t	*ip6h = (ip6_t *)hdrs;
2310 
2311 		ip6h->ip6_src = *v6src;
2312 		ip6h->ip6_dst = *v6dst;
2313 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2314 		    flowinfo);
2315 		ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2316 
2317 		if (ipp->ipp_fields & IPPF_RTHDR) {
2318 			connp->conn_sum = cksum_massage_options_v6(ip6h,
2319 			    ip_hdr_length, connp->conn_netstack);
2320 
2321 			/*
2322 			 * Verify that the first hop isn't a mapped address.
2323 			 * Routers along the path need to do this verification
2324 			 * for subsequent hops.
2325 			 */
2326 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2327 				return (EADDRNOTAVAIL);
2328 
2329 		} else {
2330 			connp->conn_sum = 0;
2331 		}
2332 	}
2333 	return (0);
2334 }
2335 
2336 /*
2337  * Prepend a header template to data_mp based on the ip_pkt_t
2338  * and the passed in source, destination and protocol.
2339  *
2340  * Returns failure if can't allocate memory, in which case data_mp is freed.
2341  * We allocate space for the transport header (ulp_hdr_len) and
2342  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2343  *
2344  * We massage an routing option/header and return the ckecksum difference
2345  * in *sump. This is in host byte order.
2346  *
2347  * Caller needs to update conn_wroff if desired.
2348  */
2349 mblk_t *
conn_prepend_hdr(ip_xmit_attr_t * ixa,const ip_pkt_t * ipp,const in6_addr_t * v6src,const in6_addr_t * v6dst,uint8_t protocol,uint32_t flowinfo,uint_t ulp_hdr_length,mblk_t * data_mp,uint_t data_length,uint_t wroff_extra,uint32_t * sump,int * errorp)2350 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2351     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2352     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2353     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2354 {
2355 	uint_t		ip_hdr_length;
2356 	uchar_t		*hdrs;
2357 	uint_t		hdrs_len;
2358 	mblk_t		*mp;
2359 
2360 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2361 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2362 		ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2363 	} else {
2364 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2365 	}
2366 	hdrs_len = ip_hdr_length + ulp_hdr_length;
2367 	ASSERT(hdrs_len != 0);
2368 
2369 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2370 
2371 	/* Can we prepend to data_mp? */
2372 	if (data_mp != NULL &&
2373 	    data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2374 	    data_mp->b_datap->db_ref == 1) {
2375 		hdrs = data_mp->b_rptr - hdrs_len;
2376 		data_mp->b_rptr = hdrs;
2377 		mp = data_mp;
2378 	} else {
2379 		mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2380 		if (mp == NULL) {
2381 			freemsg(data_mp);
2382 			*errorp = ENOMEM;
2383 			return (NULL);
2384 		}
2385 		mp->b_wptr = mp->b_datap->db_lim;
2386 		hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2387 		mp->b_cont = data_mp;
2388 	}
2389 
2390 	/*
2391 	 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2392 	 * if PKTINFO (aka IPPF_ADDR) was set.
2393 	 */
2394 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2395 		ipha_t *ipha = (ipha_t *)hdrs;
2396 
2397 		ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2398 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2399 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2400 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2401 		ipha->ipha_length = htons(hdrs_len + data_length);
2402 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2403 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2404 		else
2405 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2406 
2407 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2408 			*sump = cksum_massage_options_v4(ipha,
2409 			    ixa->ixa_ipst->ips_netstack);
2410 		} else {
2411 			*sump = 0;
2412 		}
2413 	} else {
2414 		ip6_t *ip6h = (ip6_t *)hdrs;
2415 
2416 		ip6h->ip6_src = *v6src;
2417 		ip6h->ip6_dst = *v6dst;
2418 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2419 		ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2420 
2421 		if (ipp->ipp_fields & IPPF_RTHDR) {
2422 			*sump = cksum_massage_options_v6(ip6h,
2423 			    ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2424 
2425 			/*
2426 			 * Verify that the first hop isn't a mapped address.
2427 			 * Routers along the path need to do this verification
2428 			 * for subsequent hops.
2429 			 */
2430 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2431 				*errorp = EADDRNOTAVAIL;
2432 				freemsg(mp);
2433 				return (NULL);
2434 			}
2435 		} else {
2436 			*sump = 0;
2437 		}
2438 	}
2439 	return (mp);
2440 }
2441 
2442 /*
2443  * Massage a source route if any putting the first hop
2444  * in ipha_dst. Compute a starting value for the checksum which
2445  * takes into account that the original ipha_dst should be
2446  * included in the checksum but that IP will include the
2447  * first hop from the source route in the tcp checksum.
2448  */
2449 static uint32_t
cksum_massage_options_v4(ipha_t * ipha,netstack_t * ns)2450 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2451 {
2452 	in_addr_t	dst;
2453 	uint32_t	cksum;
2454 
2455 	/* Get last hop then diff against first hop */
2456 	cksum = ip_massage_options(ipha, ns);
2457 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2458 	dst = ipha->ipha_dst;
2459 	cksum -= ((dst >> 16) + (dst & 0xffff));
2460 	if ((int)cksum < 0)
2461 		cksum--;
2462 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2463 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2464 	ASSERT(cksum < 0x10000);
2465 	return (ntohs(cksum));
2466 }
2467 
2468 static uint32_t
cksum_massage_options_v6(ip6_t * ip6h,uint_t ip_hdr_len,netstack_t * ns)2469 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2470 {
2471 	uint8_t		*end;
2472 	ip6_rthdr_t	*rth;
2473 	uint32_t	cksum;
2474 
2475 	end = (uint8_t *)ip6h + ip_hdr_len;
2476 	rth = ip_find_rthdr_v6(ip6h, end);
2477 	if (rth == NULL)
2478 		return (0);
2479 
2480 	cksum = ip_massage_options_v6(ip6h, rth, ns);
2481 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2482 	ASSERT(cksum < 0x10000);
2483 	return (ntohs(cksum));
2484 }
2485 
2486 /*
2487  * ULPs that change the destination address need to call this for each
2488  * change to discard any state about a previous destination that might
2489  * have been multicast or multirt.
2490  */
2491 void
ip_attr_newdst(ip_xmit_attr_t * ixa)2492 ip_attr_newdst(ip_xmit_attr_t *ixa)
2493 {
2494 	ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2495 	    IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2496 	    IXAF_NO_LOOP_ZONEID_SET);
2497 }
2498 
2499 /*
2500  * Determine the nexthop which will be used.
2501  * Normally this is just the destination, but if a IPv4 source route, or
2502  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2503  * there.
2504  */
2505 void
ip_attr_nexthop(const ip_pkt_t * ipp,const ip_xmit_attr_t * ixa,const in6_addr_t * dst,in6_addr_t * nexthop)2506 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2507     const in6_addr_t *dst, in6_addr_t *nexthop)
2508 {
2509 	if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2510 		*nexthop = *dst;
2511 		return;
2512 	}
2513 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2514 		ipaddr_t v4dst;
2515 		ipaddr_t v4nexthop;
2516 
2517 		IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2518 		v4nexthop = ip_pkt_source_route_v4(ipp);
2519 		if (v4nexthop == INADDR_ANY)
2520 			v4nexthop = v4dst;
2521 
2522 		IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2523 	} else {
2524 		const in6_addr_t *v6nexthop;
2525 
2526 		v6nexthop = ip_pkt_source_route_v6(ipp);
2527 		if (v6nexthop == NULL)
2528 			v6nexthop = dst;
2529 
2530 		*nexthop = *v6nexthop;
2531 	}
2532 }
2533 
2534 /*
2535  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2536  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2537  * case (connected latching is done in conn_connect).
2538  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2539  * set, but doesn't otherwise use the conn_t.
2540  *
2541  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2542  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2543  *
2544  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2545  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2546  *
2547  * Updates laddrp and uinfo if they are non-NULL.
2548  *
2549  * TSOL notes: The callers if ip_attr_connect must check if the destination
2550  * is different than before and in that case redo conn_update_label.
2551  * The callers of conn_connect do not need that since conn_connect
2552  * performs the conn_update_label.
2553  */
2554 int
ip_attr_connect(const conn_t * connp,ip_xmit_attr_t * ixa,const in6_addr_t * v6src,const in6_addr_t * v6dst,const in6_addr_t * v6nexthop,in_port_t dstport,in6_addr_t * laddrp,iulp_t * uinfo,uint32_t flags)2555 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2556     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2557     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2558     iulp_t *uinfo, uint32_t flags)
2559 {
2560 	in6_addr_t		laddr = *v6src;
2561 	int			error;
2562 
2563 	ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2564 
2565 	if (connp->conn_zone_is_global)
2566 		flags |= IPDF_ZONE_IS_GLOBAL;
2567 	else
2568 		flags &= ~IPDF_ZONE_IS_GLOBAL;
2569 
2570 	/*
2571 	 * Lookup the route to determine a source address and the uinfo.
2572 	 * If the ULP has a source route option then the caller will
2573 	 * have set v6nexthop to be the first hop.
2574 	 */
2575 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2576 		ipaddr_t v4dst;
2577 		ipaddr_t v4src, v4nexthop;
2578 
2579 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2580 		IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2581 		IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2582 
2583 		if (connp->conn_unspec_src || v4src != INADDR_ANY)
2584 			flags &= ~IPDF_SELECT_SRC;
2585 		else
2586 			flags |= IPDF_SELECT_SRC;
2587 
2588 		error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2589 		    uinfo, flags, connp->conn_mac_mode);
2590 		IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2591 	} else {
2592 		if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2593 			flags &= ~IPDF_SELECT_SRC;
2594 		else
2595 			flags |= IPDF_SELECT_SRC;
2596 
2597 		error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2598 		    uinfo, flags, connp->conn_mac_mode);
2599 	}
2600 	/* Pass out some address even if we hit a RTF_REJECT etc */
2601 	if (laddrp != NULL)
2602 		*laddrp = laddr;
2603 
2604 	if (error != 0)
2605 		return (error);
2606 
2607 	if (flags & IPDF_IPSEC) {
2608 		/*
2609 		 * Set any IPsec policy in ixa. Routine also looks at ULP
2610 		 * ports.
2611 		 */
2612 		ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2613 	}
2614 	return (0);
2615 }
2616 
2617 /*
2618  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2619  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2620  * usable for SCTP, since SCTP has multiple faddrs.
2621  *
2622  * Caller must hold conn_lock to provide atomic constency between the
2623  * conn_t's addresses and the ixa.
2624  * NOTE: this function drops and reaquires conn_lock since it can't be
2625  * held across ip_attr_connect/ip_set_destination.
2626  *
2627  * The caller needs to handle inserting in the receive-side fanout when
2628  * appropriate after conn_connect returns.
2629  */
2630 int
conn_connect(conn_t * connp,iulp_t * uinfo,uint32_t flags)2631 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2632 {
2633 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2634 	in6_addr_t	nexthop;
2635 	in6_addr_t	saddr, faddr;
2636 	in_port_t	fport;
2637 	int		error;
2638 
2639 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2640 
2641 	if (connp->conn_ipversion == IPV4_VERSION)
2642 		ixa->ixa_flags |= IXAF_IS_IPV4;
2643 	else
2644 		ixa->ixa_flags &= ~IXAF_IS_IPV4;
2645 
2646 	/* We do IPsec latching below - hence no caching in ip_attr_connect */
2647 	flags &= ~IPDF_IPSEC;
2648 
2649 	/* In case we had previously done an ip_attr_connect */
2650 	ip_attr_newdst(ixa);
2651 
2652 	/*
2653 	 * Determine the nexthop and copy the addresses before dropping
2654 	 * conn_lock.
2655 	 */
2656 	ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2657 	    &connp->conn_faddr_v6, &nexthop);
2658 	saddr = connp->conn_saddr_v6;
2659 	faddr = connp->conn_faddr_v6;
2660 	fport = connp->conn_fport;
2661 
2662 	mutex_exit(&connp->conn_lock);
2663 	error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2664 	    &saddr, uinfo, flags | IPDF_VERIFY_DST);
2665 	mutex_enter(&connp->conn_lock);
2666 
2667 	/* Could have changed even if an error */
2668 	connp->conn_saddr_v6 = saddr;
2669 	if (error != 0)
2670 		return (error);
2671 
2672 	/*
2673 	 * Check whether Trusted Solaris policy allows communication with this
2674 	 * host, and pretend that the destination is unreachable if not.
2675 	 * Compute any needed label and place it in ipp_label_v4/v6.
2676 	 *
2677 	 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2678 	 * the packet.
2679 	 *
2680 	 * TSOL Note: Any concurrent threads would pick a different ixa
2681 	 * (and ipp if they are to change the ipp)  so we
2682 	 * don't have to worry about concurrent threads.
2683 	 */
2684 	if (is_system_labeled()) {
2685 		if (connp->conn_mlp_type != mlptSingle)
2686 			return (ECONNREFUSED);
2687 
2688 		/*
2689 		 * conn_update_label will set ipp_label* which will later
2690 		 * be used by conn_build_hdr_template.
2691 		 */
2692 		error = conn_update_label(connp, ixa,
2693 		    &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2694 		if (error != 0)
2695 			return (error);
2696 	}
2697 
2698 	/*
2699 	 * Ensure that we match on the selected local address.
2700 	 * This overrides conn_laddr in the case we had earlier bound to a
2701 	 * multicast or broadcast address.
2702 	 */
2703 	connp->conn_laddr_v6 = connp->conn_saddr_v6;
2704 
2705 	/*
2706 	 * Allow setting new policies.
2707 	 * The addresses/ports are already set, thus the IPsec policy calls
2708 	 * can handle their passed-in conn's.
2709 	 */
2710 	connp->conn_policy_cached = B_FALSE;
2711 
2712 	/*
2713 	 * Cache IPsec policy in this conn.  If we have per-socket policy,
2714 	 * we'll cache that.  If we don't, we'll inherit global policy.
2715 	 *
2716 	 * This is done before the caller inserts in the receive-side fanout.
2717 	 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2718 	 * for connections where we don't have a policy. This is to prevent
2719 	 * global policy lookups in the inbound path.
2720 	 *
2721 	 * If we insert before we set conn_policy_cached,
2722 	 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2723 	 * because global policy cound be non-empty. We normally call
2724 	 * ipsec_check_policy() for conn_policy_cached connections only if
2725 	 * conn_in_enforce_policy is set. But in this case,
2726 	 * conn_policy_cached can get set anytime since we made the
2727 	 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2728 	 * called, which will make the above assumption false.  Thus, we
2729 	 * need to insert after we set conn_policy_cached.
2730 	 */
2731 	error = ipsec_conn_cache_policy(connp,
2732 	    connp->conn_ipversion == IPV4_VERSION);
2733 	if (error != 0)
2734 		return (error);
2735 
2736 	/*
2737 	 * We defer to do LSO check until here since now we have better idea
2738 	 * whether IPsec is present. If the underlying ill is LSO capable,
2739 	 * copy its capability in so the ULP can decide whether to enable LSO
2740 	 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2741 	 * claim LSO for IPv6.
2742 	 *
2743 	 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2744 	 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2745 	 */
2746 	ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2747 
2748 	ASSERT(ixa->ixa_ire != NULL);
2749 	if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2750 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2751 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2752 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2753 	    (ixa->ixa_nce != NULL) &&
2754 	    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2755 	    ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2756 	    ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2757 		ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2758 		ixa->ixa_flags |= IXAF_LSO_CAPAB;
2759 	}
2760 
2761 	/* Check whether ZEROCOPY capability is usable for this connection. */
2762 	ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2763 
2764 	if ((flags & IPDF_ZCOPY) &&
2765 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2766 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2767 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2768 	    (ixa->ixa_nce != NULL) &&
2769 	    ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2770 		ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2771 	}
2772 	return (0);
2773 }
2774 
2775 /*
2776  * Predicates to check if the addresses match conn_last*
2777  */
2778 
2779 /*
2780  * Compare the conn against an address.
2781  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2782  */
2783 boolean_t
conn_same_as_last_v4(conn_t * connp,sin_t * sin)2784 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2785 {
2786 	ASSERT(connp->conn_family == AF_INET);
2787 	return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2788 	    sin->sin_port == connp->conn_lastdstport);
2789 }
2790 
2791 /*
2792  * Compare, including for mapped addresses
2793  */
2794 boolean_t
conn_same_as_last_v6(conn_t * connp,sin6_t * sin6)2795 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2796 {
2797 	return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2798 	    sin6->sin6_port == connp->conn_lastdstport &&
2799 	    sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2800 	    sin6->sin6_scope_id == connp->conn_lastscopeid);
2801 }
2802 
2803 /*
2804  * Compute a label and place it in the ip_packet_t.
2805  * Handles IPv4 and IPv6.
2806  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2807  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2808  * has been called.
2809  */
2810 int
conn_update_label(const conn_t * connp,const ip_xmit_attr_t * ixa,const in6_addr_t * v6dst,ip_pkt_t * ipp)2811 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2812     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2813 {
2814 	int		err;
2815 	ipaddr_t	v4dst;
2816 
2817 	if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2818 		uchar_t		opt_storage[IP_MAX_OPT_LENGTH];
2819 
2820 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2821 
2822 		err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2823 		    v4dst, opt_storage, ixa->ixa_ipst);
2824 		if (err == 0) {
2825 			/* Length contained in opt_storage[IPOPT_OLEN] */
2826 			err = optcom_pkt_set(opt_storage,
2827 			    opt_storage[IPOPT_OLEN],
2828 			    (uchar_t **)&ipp->ipp_label_v4,
2829 			    &ipp->ipp_label_len_v4);
2830 		}
2831 		if (err != 0) {
2832 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2833 			    char *, "conn(1) failed to update options(2) "
2834 			    "on ixa(3)",
2835 			    conn_t *, connp, char *, opt_storage,
2836 			    ip_xmit_attr_t *, ixa);
2837 		}
2838 		if (ipp->ipp_label_len_v4 != 0)
2839 			ipp->ipp_fields |= IPPF_LABEL_V4;
2840 		else
2841 			ipp->ipp_fields &= ~IPPF_LABEL_V4;
2842 	} else {
2843 		uchar_t		opt_storage[TSOL_MAX_IPV6_OPTION];
2844 		uint_t		optlen;
2845 
2846 		err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2847 		    v6dst, opt_storage, ixa->ixa_ipst);
2848 		if (err == 0) {
2849 			/*
2850 			 * Note that ipp_label_v6 is just the option - not
2851 			 * the hopopts extension header.
2852 			 *
2853 			 * Length contained in opt_storage[IPOPT_OLEN], but
2854 			 * that doesn't include the two byte options header.
2855 			 */
2856 			optlen = opt_storage[IPOPT_OLEN];
2857 			if (optlen != 0)
2858 				optlen += 2;
2859 
2860 			err = optcom_pkt_set(opt_storage, optlen,
2861 			    (uchar_t **)&ipp->ipp_label_v6,
2862 			    &ipp->ipp_label_len_v6);
2863 		}
2864 		if (err != 0) {
2865 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2866 			    char *, "conn(1) failed to update options(2) "
2867 			    "on ixa(3)",
2868 			    conn_t *, connp, char *, opt_storage,
2869 			    ip_xmit_attr_t *, ixa);
2870 		}
2871 		if (ipp->ipp_label_len_v6 != 0)
2872 			ipp->ipp_fields |= IPPF_LABEL_V6;
2873 		else
2874 			ipp->ipp_fields &= ~IPPF_LABEL_V6;
2875 	}
2876 	return (err);
2877 }
2878 
2879 /*
2880  * Inherit all options settings from the parent/listener to the eager.
2881  * Returns zero on success; ENOMEM if memory allocation failed.
2882  *
2883  * We assume that the eager has not had any work done i.e., the conn_ixa
2884  * and conn_xmit_ipp are all zero.
2885  * Furthermore we assume that no other thread can access the eager (because
2886  * it isn't inserted in any fanout list).
2887  */
2888 int
conn_inherit_parent(conn_t * lconnp,conn_t * econnp)2889 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2890 {
2891 	cred_t	*credp;
2892 	int	err;
2893 	void	*notify_cookie;
2894 	uint32_t xmit_hint;
2895 
2896 	econnp->conn_family = lconnp->conn_family;
2897 	econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2898 	econnp->conn_wq = lconnp->conn_wq;
2899 	econnp->conn_rq = lconnp->conn_rq;
2900 
2901 	/*
2902 	 * Make a safe copy of the transmit attributes.
2903 	 * conn_connect will later be used by the caller to setup the ire etc.
2904 	 */
2905 	ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2906 	ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2907 	ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2908 	ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2909 
2910 	/* Preserve ixa_notify_cookie and xmit_hint */
2911 	notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2912 	xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2913 	ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2914 	econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2915 	econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2916 
2917 	econnp->conn_bound_if = lconnp->conn_bound_if;
2918 	econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2919 
2920 	/* Inherit all RECV options */
2921 	econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2922 
2923 	err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2924 	    KM_NOSLEEP);
2925 	if (err != 0)
2926 		return (err);
2927 
2928 	econnp->conn_zoneid = lconnp->conn_zoneid;
2929 	econnp->conn_allzones = lconnp->conn_allzones;
2930 
2931 	/* This is odd. Pick a flowlabel for each connection instead? */
2932 	econnp->conn_flowinfo = lconnp->conn_flowinfo;
2933 
2934 	econnp->conn_default_ttl = lconnp->conn_default_ttl;
2935 	econnp->conn_min_ttl = lconnp->conn_min_ttl;
2936 
2937 	/*
2938 	 * TSOL: tsol_input_proc() needs the eager's cred before the
2939 	 * eager is accepted
2940 	 */
2941 	ASSERT(lconnp->conn_cred != NULL);
2942 	econnp->conn_cred = credp = lconnp->conn_cred;
2943 	crhold(credp);
2944 	econnp->conn_cpid = lconnp->conn_cpid;
2945 	econnp->conn_open_time = ddi_get_lbolt64();
2946 
2947 	/*
2948 	 * Cache things in the ixa without any refhold.
2949 	 * Listener might not have set up ixa_cred
2950 	 */
2951 	ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2952 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2953 	econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2954 	if (is_system_labeled())
2955 		econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2956 
2957 	/*
2958 	 * If the caller has the process-wide flag set, then default to MAC
2959 	 * exempt mode.  This allows read-down to unlabeled hosts.
2960 	 */
2961 	if (getpflags(NET_MAC_AWARE, credp) != 0)
2962 		econnp->conn_mac_mode = CONN_MAC_AWARE;
2963 
2964 	econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2965 
2966 	/*
2967 	 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2968 	 * via soaccept()->soinheritoptions() which essentially applies
2969 	 * all the listener options to the new connection. The options that we
2970 	 * need to take care of are:
2971 	 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2972 	 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2973 	 * SO_SNDBUF, SO_RCVBUF.
2974 	 *
2975 	 * SO_RCVBUF:	conn_rcvbuf is set.
2976 	 * SO_SNDBUF:	conn_sndbuf is set.
2977 	 */
2978 
2979 	/* Could we define a struct and use a struct copy for this? */
2980 	econnp->conn_sndbuf = lconnp->conn_sndbuf;
2981 	econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2982 	econnp->conn_sndlowat = lconnp->conn_sndlowat;
2983 	econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2984 	econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2985 	econnp->conn_oobinline = lconnp->conn_oobinline;
2986 	econnp->conn_debug = lconnp->conn_debug;
2987 	econnp->conn_keepalive = lconnp->conn_keepalive;
2988 	econnp->conn_linger = lconnp->conn_linger;
2989 	econnp->conn_lingertime = lconnp->conn_lingertime;
2990 
2991 	/* Set the IP options */
2992 	econnp->conn_broadcast = lconnp->conn_broadcast;
2993 	econnp->conn_useloopback = lconnp->conn_useloopback;
2994 	econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2995 	return (0);
2996 }
2997