1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 /* Copyright (c) 1990 Mentat Inc. */
26
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #define _SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/xti_inet.h>
33 #include <sys/ucred.h>
34 #include <sys/zone.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/debug.h>
39 #include <sys/atomic.h>
40 #include <sys/policy.h>
41
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/kmem.h>
45 #include <sys/sdt.h>
46 #include <sys/socket.h>
47 #include <sys/ethernet.h>
48 #include <sys/mac.h>
49 #include <net/if.h>
50 #include <net/if_types.h>
51 #include <net/if_arp.h>
52 #include <net/route.h>
53 #include <sys/sockio.h>
54 #include <netinet/in.h>
55 #include <net/if_dl.h>
56
57 #include <inet/common.h>
58 #include <inet/mi.h>
59 #include <inet/mib2.h>
60 #include <inet/nd.h>
61 #include <inet/arp.h>
62 #include <inet/snmpcom.h>
63 #include <inet/kstatcom.h>
64
65 #include <netinet/igmp_var.h>
66 #include <netinet/ip6.h>
67 #include <netinet/icmp6.h>
68 #include <netinet/sctp.h>
69
70 #include <inet/ip.h>
71 #include <inet/ip_impl.h>
72 #include <inet/ip6.h>
73 #include <inet/ip6_asp.h>
74 #include <inet/tcp.h>
75 #include <inet/ip_multi.h>
76 #include <inet/ip_if.h>
77 #include <inet/ip_ire.h>
78 #include <inet/ip_ftable.h>
79 #include <inet/ip_rts.h>
80 #include <inet/optcom.h>
81 #include <inet/ip_ndp.h>
82 #include <inet/ip_listutils.h>
83 #include <netinet/igmp.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/udp.h>
86 #include <inet/ipp_common.h>
87
88 #include <net/pfkeyv2.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsec_impl.h>
91 #include <inet/ipdrop.h>
92 #include <inet/ip_netinfo.h>
93
94 #include <inet/ipclassifier.h>
95 #include <inet/sctp_ip.h>
96 #include <inet/sctp/sctp_impl.h>
97 #include <inet/udp_impl.h>
98 #include <sys/sunddi.h>
99
100 #include <sys/tsol/label.h>
101 #include <sys/tsol/tnet.h>
102
103 /*
104 * Return how much size is needed for the different ancillary data items
105 */
106 uint_t
conn_recvancillary_size(conn_t * connp,crb_t recv_ancillary,ip_recv_attr_t * ira,mblk_t * mp,ip_pkt_t * ipp)107 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
108 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
109 {
110 uint_t ancil_size;
111 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
112
113 /*
114 * If IP_RECVDSTADDR is set we include the destination IP
115 * address as an option. With IP_RECVOPTS we include all
116 * the IP options.
117 */
118 ancil_size = 0;
119 if (recv_ancillary.crb_recvdstaddr &&
120 (ira->ira_flags & IRAF_IS_IPV4)) {
121 ancil_size += sizeof (struct T_opthdr) +
122 sizeof (struct in_addr);
123 IP_STAT(ipst, conn_in_recvdstaddr);
124 }
125
126 /*
127 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
128 * are different
129 */
130 if (recv_ancillary.crb_ip_recvpktinfo &&
131 connp->conn_family == AF_INET) {
132 ancil_size += sizeof (struct T_opthdr) +
133 sizeof (struct in_pktinfo);
134 IP_STAT(ipst, conn_in_recvpktinfo);
135 }
136
137 if ((recv_ancillary.crb_recvopts) &&
138 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
139 ancil_size += sizeof (struct T_opthdr) +
140 ipp->ipp_ipv4_options_len;
141 IP_STAT(ipst, conn_in_recvopts);
142 }
143
144 if (recv_ancillary.crb_recvslla) {
145 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
146 ill_t *ill;
147
148 /* Make sure ira_l2src is setup if not already */
149 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
150 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
151 ipst);
152 if (ill != NULL) {
153 ip_setl2src(mp, ira, ill);
154 ill_refrele(ill);
155 }
156 }
157 ancil_size += sizeof (struct T_opthdr) +
158 sizeof (struct sockaddr_dl);
159 IP_STAT(ipst, conn_in_recvslla);
160 }
161
162 if (recv_ancillary.crb_recvif) {
163 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
164 IP_STAT(ipst, conn_in_recvif);
165 }
166
167 /*
168 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
169 * are different
170 */
171 if (recv_ancillary.crb_ip_recvpktinfo &&
172 connp->conn_family == AF_INET6) {
173 ancil_size += sizeof (struct T_opthdr) +
174 sizeof (struct in6_pktinfo);
175 IP_STAT(ipst, conn_in_recvpktinfo);
176 }
177
178 if (recv_ancillary.crb_ipv6_recvhoplimit) {
179 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
180 IP_STAT(ipst, conn_in_recvhoplimit);
181 }
182
183 if (recv_ancillary.crb_ipv6_recvtclass) {
184 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
185 IP_STAT(ipst, conn_in_recvtclass);
186 }
187
188 if (recv_ancillary.crb_ipv6_recvhopopts &&
189 (ipp->ipp_fields & IPPF_HOPOPTS)) {
190 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
191 IP_STAT(ipst, conn_in_recvhopopts);
192 }
193 /*
194 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
195 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
196 * options that appear before a routing header.
197 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
198 */
199 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
200 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
201 (recv_ancillary.crb_ipv6_recvdstopts &&
202 recv_ancillary.crb_ipv6_recvrthdr)) {
203 ancil_size += sizeof (struct T_opthdr) +
204 ipp->ipp_rthdrdstoptslen;
205 IP_STAT(ipst, conn_in_recvrthdrdstopts);
206 }
207 }
208 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
209 (ipp->ipp_fields & IPPF_RTHDR)) {
210 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
211 IP_STAT(ipst, conn_in_recvrthdr);
212 }
213 if ((recv_ancillary.crb_ipv6_recvdstopts ||
214 recv_ancillary.crb_old_ipv6_recvdstopts) &&
215 (ipp->ipp_fields & IPPF_DSTOPTS)) {
216 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
217 IP_STAT(ipst, conn_in_recvdstopts);
218 }
219 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
220 ancil_size += sizeof (struct T_opthdr) +
221 ucredminsize(ira->ira_cred);
222 IP_STAT(ipst, conn_in_recvucred);
223 }
224
225 /*
226 * If SO_TIMESTAMP is set allocate the appropriate sized
227 * buffer. Since gethrestime() expects a pointer aligned
228 * argument, we allocate space necessary for extra
229 * alignment (even though it might not be used).
230 */
231 if (recv_ancillary.crb_timestamp) {
232 ancil_size += sizeof (struct T_opthdr) +
233 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
234 IP_STAT(ipst, conn_in_timestamp);
235 }
236
237 /*
238 * If IP_RECVTTL is set allocate the appropriate sized buffer
239 */
240 if (recv_ancillary.crb_recvttl &&
241 (ira->ira_flags & IRAF_IS_IPV4)) {
242 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
243 IP_STAT(ipst, conn_in_recvttl);
244 }
245
246 return (ancil_size);
247 }
248
249 /*
250 * Lay down the ancillary data items at "ancil_buf".
251 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
252 * large buffer - ancil_size.
253 */
254 void
conn_recvancillary_add(conn_t * connp,crb_t recv_ancillary,ip_recv_attr_t * ira,ip_pkt_t * ipp,uchar_t * ancil_buf,uint_t ancil_size)255 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
256 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
257 {
258 /*
259 * Copy in destination address before options to avoid
260 * any padding issues.
261 */
262 if (recv_ancillary.crb_recvdstaddr &&
263 (ira->ira_flags & IRAF_IS_IPV4)) {
264 struct T_opthdr *toh;
265 ipaddr_t *dstptr;
266
267 toh = (struct T_opthdr *)ancil_buf;
268 toh->level = IPPROTO_IP;
269 toh->name = IP_RECVDSTADDR;
270 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
271 toh->status = 0;
272 ancil_buf += sizeof (struct T_opthdr);
273 dstptr = (ipaddr_t *)ancil_buf;
274 *dstptr = ipp->ipp_addr_v4;
275 ancil_buf += sizeof (ipaddr_t);
276 ancil_size -= toh->len;
277 }
278
279 /*
280 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
281 * are different
282 */
283 if (recv_ancillary.crb_ip_recvpktinfo &&
284 connp->conn_family == AF_INET) {
285 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
286 struct T_opthdr *toh;
287 struct in_pktinfo *pktinfop;
288 ill_t *ill;
289 ipif_t *ipif;
290
291 toh = (struct T_opthdr *)ancil_buf;
292 toh->level = IPPROTO_IP;
293 toh->name = IP_PKTINFO;
294 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
295 toh->status = 0;
296 ancil_buf += sizeof (struct T_opthdr);
297 pktinfop = (struct in_pktinfo *)ancil_buf;
298
299 pktinfop->ipi_ifindex = ira->ira_ruifindex;
300 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
301
302 /* Find a good address to report */
303 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
304 if (ill != NULL) {
305 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
306 if (ipif != NULL) {
307 pktinfop->ipi_spec_dst.s_addr =
308 ipif->ipif_lcl_addr;
309 ipif_refrele(ipif);
310 }
311 ill_refrele(ill);
312 }
313 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
314 ancil_buf += sizeof (struct in_pktinfo);
315 ancil_size -= toh->len;
316 }
317
318 if ((recv_ancillary.crb_recvopts) &&
319 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
320 struct T_opthdr *toh;
321
322 toh = (struct T_opthdr *)ancil_buf;
323 toh->level = IPPROTO_IP;
324 toh->name = IP_RECVOPTS;
325 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
326 toh->status = 0;
327 ancil_buf += sizeof (struct T_opthdr);
328 bcopy(ipp->ipp_ipv4_options, ancil_buf,
329 ipp->ipp_ipv4_options_len);
330 ancil_buf += ipp->ipp_ipv4_options_len;
331 ancil_size -= toh->len;
332 }
333
334 if (recv_ancillary.crb_recvslla) {
335 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
336 struct T_opthdr *toh;
337 struct sockaddr_dl *dstptr;
338 ill_t *ill;
339 int alen = 0;
340
341 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
342 if (ill != NULL)
343 alen = ill->ill_phys_addr_length;
344
345 /*
346 * For loopback multicast and broadcast the packet arrives
347 * with ira_ruifdex being the physical interface, but
348 * ira_l2src is all zero since ip_postfrag_loopback doesn't
349 * know our l2src. We don't report the address in that case.
350 */
351 if (ira->ira_flags & IRAF_LOOPBACK)
352 alen = 0;
353
354 toh = (struct T_opthdr *)ancil_buf;
355 toh->level = IPPROTO_IP;
356 toh->name = IP_RECVSLLA;
357 toh->len = sizeof (struct T_opthdr) +
358 sizeof (struct sockaddr_dl);
359 toh->status = 0;
360 ancil_buf += sizeof (struct T_opthdr);
361 dstptr = (struct sockaddr_dl *)ancil_buf;
362 dstptr->sdl_family = AF_LINK;
363 dstptr->sdl_index = ira->ira_ruifindex;
364 if (ill != NULL)
365 dstptr->sdl_type = ill->ill_type;
366 else
367 dstptr->sdl_type = 0;
368 dstptr->sdl_nlen = 0;
369 dstptr->sdl_alen = alen;
370 dstptr->sdl_slen = 0;
371 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
372 ancil_buf += sizeof (struct sockaddr_dl);
373 ancil_size -= toh->len;
374 if (ill != NULL)
375 ill_refrele(ill);
376 }
377
378 if (recv_ancillary.crb_recvif) {
379 struct T_opthdr *toh;
380 uint_t *dstptr;
381
382 toh = (struct T_opthdr *)ancil_buf;
383 toh->level = IPPROTO_IP;
384 toh->name = IP_RECVIF;
385 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
386 toh->status = 0;
387 ancil_buf += sizeof (struct T_opthdr);
388 dstptr = (uint_t *)ancil_buf;
389 *dstptr = ira->ira_ruifindex;
390 ancil_buf += sizeof (uint_t);
391 ancil_size -= toh->len;
392 }
393
394 /*
395 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
396 * are different
397 */
398 if (recv_ancillary.crb_ip_recvpktinfo &&
399 connp->conn_family == AF_INET6) {
400 struct T_opthdr *toh;
401 struct in6_pktinfo *pkti;
402
403 toh = (struct T_opthdr *)ancil_buf;
404 toh->level = IPPROTO_IPV6;
405 toh->name = IPV6_PKTINFO;
406 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
407 toh->status = 0;
408 ancil_buf += sizeof (struct T_opthdr);
409 pkti = (struct in6_pktinfo *)ancil_buf;
410 if (ira->ira_flags & IRAF_IS_IPV4) {
411 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
412 &pkti->ipi6_addr);
413 } else {
414 pkti->ipi6_addr = ipp->ipp_addr;
415 }
416 pkti->ipi6_ifindex = ira->ira_ruifindex;
417
418 ancil_buf += sizeof (*pkti);
419 ancil_size -= toh->len;
420 }
421 if (recv_ancillary.crb_ipv6_recvhoplimit) {
422 struct T_opthdr *toh;
423
424 toh = (struct T_opthdr *)ancil_buf;
425 toh->level = IPPROTO_IPV6;
426 toh->name = IPV6_HOPLIMIT;
427 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
428 toh->status = 0;
429 ancil_buf += sizeof (struct T_opthdr);
430 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
431 ancil_buf += sizeof (uint_t);
432 ancil_size -= toh->len;
433 }
434 if (recv_ancillary.crb_ipv6_recvtclass) {
435 struct T_opthdr *toh;
436
437 toh = (struct T_opthdr *)ancil_buf;
438 toh->level = IPPROTO_IPV6;
439 toh->name = IPV6_TCLASS;
440 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
441 toh->status = 0;
442 ancil_buf += sizeof (struct T_opthdr);
443
444 if (ira->ira_flags & IRAF_IS_IPV4)
445 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
446 else
447 *(uint_t *)ancil_buf = ipp->ipp_tclass;
448 ancil_buf += sizeof (uint_t);
449 ancil_size -= toh->len;
450 }
451 if (recv_ancillary.crb_ipv6_recvhopopts &&
452 (ipp->ipp_fields & IPPF_HOPOPTS)) {
453 struct T_opthdr *toh;
454
455 toh = (struct T_opthdr *)ancil_buf;
456 toh->level = IPPROTO_IPV6;
457 toh->name = IPV6_HOPOPTS;
458 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
459 toh->status = 0;
460 ancil_buf += sizeof (struct T_opthdr);
461 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
462 ancil_buf += ipp->ipp_hopoptslen;
463 ancil_size -= toh->len;
464 }
465 /*
466 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
467 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
468 * options that appear before a routing header.
469 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
470 */
471 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
472 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
473 (recv_ancillary.crb_ipv6_recvdstopts &&
474 recv_ancillary.crb_ipv6_recvrthdr)) {
475 struct T_opthdr *toh;
476
477 toh = (struct T_opthdr *)ancil_buf;
478 toh->level = IPPROTO_IPV6;
479 toh->name = IPV6_DSTOPTS;
480 toh->len = sizeof (struct T_opthdr) +
481 ipp->ipp_rthdrdstoptslen;
482 toh->status = 0;
483 ancil_buf += sizeof (struct T_opthdr);
484 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
485 ipp->ipp_rthdrdstoptslen);
486 ancil_buf += ipp->ipp_rthdrdstoptslen;
487 ancil_size -= toh->len;
488 }
489 }
490 if (recv_ancillary.crb_ipv6_recvrthdr &&
491 (ipp->ipp_fields & IPPF_RTHDR)) {
492 struct T_opthdr *toh;
493
494 toh = (struct T_opthdr *)ancil_buf;
495 toh->level = IPPROTO_IPV6;
496 toh->name = IPV6_RTHDR;
497 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
498 toh->status = 0;
499 ancil_buf += sizeof (struct T_opthdr);
500 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
501 ancil_buf += ipp->ipp_rthdrlen;
502 ancil_size -= toh->len;
503 }
504 if ((recv_ancillary.crb_ipv6_recvdstopts ||
505 recv_ancillary.crb_old_ipv6_recvdstopts) &&
506 (ipp->ipp_fields & IPPF_DSTOPTS)) {
507 struct T_opthdr *toh;
508
509 toh = (struct T_opthdr *)ancil_buf;
510 toh->level = IPPROTO_IPV6;
511 toh->name = IPV6_DSTOPTS;
512 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
513 toh->status = 0;
514 ancil_buf += sizeof (struct T_opthdr);
515 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
516 ancil_buf += ipp->ipp_dstoptslen;
517 ancil_size -= toh->len;
518 }
519
520 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
521 struct T_opthdr *toh;
522 cred_t *rcr = connp->conn_cred;
523
524 toh = (struct T_opthdr *)ancil_buf;
525 toh->level = SOL_SOCKET;
526 toh->name = SCM_UCRED;
527 toh->len = sizeof (struct T_opthdr) +
528 ucredminsize(ira->ira_cred);
529 toh->status = 0;
530 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
531 ancil_buf += toh->len;
532 ancil_size -= toh->len;
533 }
534 if (recv_ancillary.crb_timestamp) {
535 struct T_opthdr *toh;
536
537 toh = (struct T_opthdr *)ancil_buf;
538 toh->level = SOL_SOCKET;
539 toh->name = SCM_TIMESTAMP;
540 toh->len = sizeof (struct T_opthdr) +
541 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
542 toh->status = 0;
543 ancil_buf += sizeof (struct T_opthdr);
544 /* Align for gethrestime() */
545 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
546 sizeof (intptr_t));
547 gethrestime((timestruc_t *)ancil_buf);
548 ancil_buf = (uchar_t *)toh + toh->len;
549 ancil_size -= toh->len;
550 }
551
552 /*
553 * CAUTION:
554 * Due to aligment issues
555 * Processing of IP_RECVTTL option
556 * should always be the last. Adding
557 * any option processing after this will
558 * cause alignment panic.
559 */
560 if (recv_ancillary.crb_recvttl &&
561 (ira->ira_flags & IRAF_IS_IPV4)) {
562 struct T_opthdr *toh;
563 uint8_t *dstptr;
564
565 toh = (struct T_opthdr *)ancil_buf;
566 toh->level = IPPROTO_IP;
567 toh->name = IP_RECVTTL;
568 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
569 toh->status = 0;
570 ancil_buf += sizeof (struct T_opthdr);
571 dstptr = (uint8_t *)ancil_buf;
572 *dstptr = ipp->ipp_hoplimit;
573 ancil_buf += sizeof (uint8_t);
574 ancil_size -= toh->len;
575 }
576
577 /* Consumed all of allocated space */
578 ASSERT(ancil_size == 0);
579
580 }
581
582 /*
583 * This routine retrieves the current status of socket options.
584 * It returns the size of the option retrieved, or -1.
585 */
586 int
conn_opt_get(conn_opt_arg_t * coa,t_scalar_t level,t_scalar_t name,uchar_t * ptr)587 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
588 uchar_t *ptr)
589 {
590 int *i1 = (int *)ptr;
591 conn_t *connp = coa->coa_connp;
592 ip_xmit_attr_t *ixa = coa->coa_ixa;
593 ip_pkt_t *ipp = coa->coa_ipp;
594 ip_stack_t *ipst = ixa->ixa_ipst;
595 uint_t len;
596
597 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
598
599 switch (level) {
600 case SOL_SOCKET:
601 switch (name) {
602 case SO_DEBUG:
603 *i1 = connp->conn_debug ? SO_DEBUG : 0;
604 break; /* goto sizeof (int) option return */
605 case SO_KEEPALIVE:
606 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
607 break;
608 case SO_LINGER: {
609 struct linger *lgr = (struct linger *)ptr;
610
611 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
612 lgr->l_linger = connp->conn_lingertime;
613 }
614 return (sizeof (struct linger));
615
616 case SO_OOBINLINE:
617 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
618 break;
619 case SO_REUSEADDR:
620 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
621 break; /* goto sizeof (int) option return */
622 case SO_REUSEPORT:
623 *i1 = connp->conn_reuseport ? SO_REUSEPORT : 0;
624 break; /* goto sizeof (int) option return */
625 case SO_TYPE:
626 *i1 = connp->conn_so_type;
627 break; /* goto sizeof (int) option return */
628 case SO_DONTROUTE:
629 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
630 SO_DONTROUTE : 0;
631 break; /* goto sizeof (int) option return */
632 case SO_USELOOPBACK:
633 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
634 break; /* goto sizeof (int) option return */
635 case SO_BROADCAST:
636 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
637 break; /* goto sizeof (int) option return */
638
639 case SO_SNDBUF:
640 *i1 = connp->conn_sndbuf;
641 break; /* goto sizeof (int) option return */
642 case SO_RCVBUF:
643 *i1 = connp->conn_rcvbuf;
644 break; /* goto sizeof (int) option return */
645 case SO_RCVTIMEO:
646 case SO_SNDTIMEO:
647 /*
648 * Pass these two options in order for third part
649 * protocol usage. Here just return directly.
650 */
651 *i1 = 0;
652 break;
653 case SO_DGRAM_ERRIND:
654 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
655 break; /* goto sizeof (int) option return */
656 case SO_RECVUCRED:
657 *i1 = connp->conn_recv_ancillary.crb_recvucred;
658 break; /* goto sizeof (int) option return */
659 case SO_TIMESTAMP:
660 *i1 = connp->conn_recv_ancillary.crb_timestamp;
661 break; /* goto sizeof (int) option return */
662 case SO_VRRP:
663 *i1 = connp->conn_isvrrp;
664 break; /* goto sizeof (int) option return */
665 case SO_ANON_MLP:
666 *i1 = connp->conn_anon_mlp;
667 break; /* goto sizeof (int) option return */
668 case SO_MAC_EXEMPT:
669 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
670 break; /* goto sizeof (int) option return */
671 case SO_MAC_IMPLICIT:
672 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
673 break; /* goto sizeof (int) option return */
674 case SO_ALLZONES:
675 *i1 = connp->conn_allzones;
676 break; /* goto sizeof (int) option return */
677 case SO_EXCLBIND:
678 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
679 break;
680 case SO_PROTOTYPE:
681 *i1 = connp->conn_proto;
682 break;
683
684 case SO_DOMAIN:
685 *i1 = connp->conn_family;
686 break;
687 default:
688 return (-1);
689 }
690 break;
691 case IPPROTO_IP:
692 if (connp->conn_family != AF_INET)
693 return (-1);
694 switch (name) {
695 case IP_OPTIONS:
696 case T_IP_OPTIONS:
697 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
698 return (0);
699
700 len = ipp->ipp_ipv4_options_len;
701 if (len > 0) {
702 bcopy(ipp->ipp_ipv4_options, ptr, len);
703 }
704 return (len);
705
706 case IP_PKTINFO: {
707 /*
708 * This also handles IP_RECVPKTINFO.
709 * IP_PKTINFO and IP_RECVPKTINFO have same value.
710 * Differentiation is based on the size of the
711 * argument passed in.
712 */
713 struct in_pktinfo *pktinfo;
714
715 #ifdef notdef
716 /* optcom doesn't provide a length with "get" */
717 if (inlen == sizeof (int)) {
718 /* This is IP_RECVPKTINFO option. */
719 *i1 = connp->conn_recv_ancillary.
720 crb_ip_recvpktinfo;
721 return (sizeof (int));
722 }
723 #endif
724 /* XXX assumes that caller has room for max size! */
725
726 pktinfo = (struct in_pktinfo *)ptr;
727 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
728 if (ipp->ipp_fields & IPPF_ADDR)
729 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
730 else
731 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
732 return (sizeof (struct in_pktinfo));
733 }
734 case IP_DONTFRAG:
735 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
736 return (sizeof (int));
737 case IP_TOS:
738 case T_IP_TOS:
739 *i1 = (int)ipp->ipp_type_of_service;
740 break; /* goto sizeof (int) option return */
741 case IP_TTL:
742 *i1 = (int)ipp->ipp_unicast_hops;
743 break; /* goto sizeof (int) option return */
744 case IP_DHCPINIT_IF:
745 return (-1);
746 case IP_NEXTHOP:
747 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
748 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
749 return (sizeof (ipaddr_t));
750 } else {
751 return (0);
752 }
753
754 case IP_MULTICAST_IF:
755 /* 0 address if not set */
756 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
757 return (sizeof (ipaddr_t));
758 case IP_MULTICAST_TTL:
759 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
760 return (sizeof (uchar_t));
761 case IP_MULTICAST_LOOP:
762 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
763 return (sizeof (uint8_t));
764 case IP_RECVOPTS:
765 *i1 = connp->conn_recv_ancillary.crb_recvopts;
766 break; /* goto sizeof (int) option return */
767 case IP_RECVDSTADDR:
768 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
769 break; /* goto sizeof (int) option return */
770 case IP_RECVIF:
771 *i1 = connp->conn_recv_ancillary.crb_recvif;
772 break; /* goto sizeof (int) option return */
773 case IP_RECVSLLA:
774 *i1 = connp->conn_recv_ancillary.crb_recvslla;
775 break; /* goto sizeof (int) option return */
776 case IP_RECVTTL:
777 *i1 = connp->conn_recv_ancillary.crb_recvttl;
778 break; /* goto sizeof (int) option return */
779 case IP_ADD_MEMBERSHIP:
780 case IP_DROP_MEMBERSHIP:
781 case MCAST_JOIN_GROUP:
782 case MCAST_LEAVE_GROUP:
783 case IP_BLOCK_SOURCE:
784 case IP_UNBLOCK_SOURCE:
785 case IP_ADD_SOURCE_MEMBERSHIP:
786 case IP_DROP_SOURCE_MEMBERSHIP:
787 case MCAST_BLOCK_SOURCE:
788 case MCAST_UNBLOCK_SOURCE:
789 case MCAST_JOIN_SOURCE_GROUP:
790 case MCAST_LEAVE_SOURCE_GROUP:
791 case MRT_INIT:
792 case MRT_DONE:
793 case MRT_ADD_VIF:
794 case MRT_DEL_VIF:
795 case MRT_ADD_MFC:
796 case MRT_DEL_MFC:
797 /* cannot "get" the value for these */
798 return (-1);
799 case MRT_VERSION:
800 case MRT_ASSERT:
801 (void) ip_mrouter_get(name, connp, ptr);
802 return (sizeof (int));
803 case IP_SEC_OPT:
804 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
805 IPSEC_AF_V4));
806 case IP_BOUND_IF:
807 /* Zero if not set */
808 *i1 = connp->conn_bound_if;
809 break; /* goto sizeof (int) option return */
810 case IP_UNSPEC_SRC:
811 *i1 = connp->conn_unspec_src;
812 break; /* goto sizeof (int) option return */
813 case IP_BROADCAST_TTL:
814 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
815 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
816 else
817 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
818 return (sizeof (uchar_t));
819 default:
820 return (-1);
821 }
822 break;
823 case IPPROTO_IPV6:
824 if (connp->conn_family != AF_INET6)
825 return (-1);
826 switch (name) {
827 case IPV6_UNICAST_HOPS:
828 *i1 = (int)ipp->ipp_unicast_hops;
829 break; /* goto sizeof (int) option return */
830 case IPV6_MULTICAST_IF:
831 /* 0 index if not set */
832 *i1 = ixa->ixa_multicast_ifindex;
833 break; /* goto sizeof (int) option return */
834 case IPV6_MULTICAST_HOPS:
835 *i1 = ixa->ixa_multicast_ttl;
836 break; /* goto sizeof (int) option return */
837 case IPV6_MULTICAST_LOOP:
838 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
839 break; /* goto sizeof (int) option return */
840 case IPV6_JOIN_GROUP:
841 case IPV6_LEAVE_GROUP:
842 case MCAST_JOIN_GROUP:
843 case MCAST_LEAVE_GROUP:
844 case MCAST_BLOCK_SOURCE:
845 case MCAST_UNBLOCK_SOURCE:
846 case MCAST_JOIN_SOURCE_GROUP:
847 case MCAST_LEAVE_SOURCE_GROUP:
848 /* cannot "get" the value for these */
849 return (-1);
850 case IPV6_BOUND_IF:
851 /* Zero if not set */
852 *i1 = connp->conn_bound_if;
853 break; /* goto sizeof (int) option return */
854 case IPV6_UNSPEC_SRC:
855 *i1 = connp->conn_unspec_src;
856 break; /* goto sizeof (int) option return */
857 case IPV6_RECVPKTINFO:
858 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
859 break; /* goto sizeof (int) option return */
860 case IPV6_RECVTCLASS:
861 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
862 break; /* goto sizeof (int) option return */
863 case IPV6_RECVPATHMTU:
864 *i1 = connp->conn_ipv6_recvpathmtu;
865 break; /* goto sizeof (int) option return */
866 case IPV6_RECVHOPLIMIT:
867 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
868 break; /* goto sizeof (int) option return */
869 case IPV6_RECVHOPOPTS:
870 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
871 break; /* goto sizeof (int) option return */
872 case IPV6_RECVDSTOPTS:
873 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
874 break; /* goto sizeof (int) option return */
875 case _OLD_IPV6_RECVDSTOPTS:
876 *i1 =
877 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
878 break; /* goto sizeof (int) option return */
879 case IPV6_RECVRTHDRDSTOPTS:
880 *i1 = connp->conn_recv_ancillary.
881 crb_ipv6_recvrthdrdstopts;
882 break; /* goto sizeof (int) option return */
883 case IPV6_RECVRTHDR:
884 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
885 break; /* goto sizeof (int) option return */
886 case IPV6_PKTINFO: {
887 /* XXX assumes that caller has room for max size! */
888 struct in6_pktinfo *pkti;
889
890 pkti = (struct in6_pktinfo *)ptr;
891 pkti->ipi6_ifindex = ixa->ixa_ifindex;
892 if (ipp->ipp_fields & IPPF_ADDR)
893 pkti->ipi6_addr = ipp->ipp_addr;
894 else
895 pkti->ipi6_addr = ipv6_all_zeros;
896 return (sizeof (struct in6_pktinfo));
897 }
898 case IPV6_TCLASS:
899 *i1 = ipp->ipp_tclass;
900 break; /* goto sizeof (int) option return */
901 case IPV6_NEXTHOP: {
902 sin6_t *sin6 = (sin6_t *)ptr;
903
904 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
905 return (0);
906
907 *sin6 = sin6_null;
908 sin6->sin6_family = AF_INET6;
909 sin6->sin6_addr = ixa->ixa_nexthop_v6;
910
911 return (sizeof (sin6_t));
912 }
913 case IPV6_HOPOPTS:
914 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
915 return (0);
916 bcopy(ipp->ipp_hopopts, ptr,
917 ipp->ipp_hopoptslen);
918 return (ipp->ipp_hopoptslen);
919 case IPV6_RTHDRDSTOPTS:
920 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
921 return (0);
922 bcopy(ipp->ipp_rthdrdstopts, ptr,
923 ipp->ipp_rthdrdstoptslen);
924 return (ipp->ipp_rthdrdstoptslen);
925 case IPV6_RTHDR:
926 if (!(ipp->ipp_fields & IPPF_RTHDR))
927 return (0);
928 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
929 return (ipp->ipp_rthdrlen);
930 case IPV6_DSTOPTS:
931 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
932 return (0);
933 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
934 return (ipp->ipp_dstoptslen);
935 case IPV6_PATHMTU:
936 return (ip_fill_mtuinfo(connp, ixa,
937 (struct ip6_mtuinfo *)ptr));
938 case IPV6_SEC_OPT:
939 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
940 IPSEC_AF_V6));
941 case IPV6_SRC_PREFERENCES:
942 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
943 case IPV6_DONTFRAG:
944 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
945 return (sizeof (int));
946 case IPV6_USE_MIN_MTU:
947 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
948 *i1 = ixa->ixa_use_min_mtu;
949 else
950 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
951 break;
952 case IPV6_V6ONLY:
953 *i1 = connp->conn_ipv6_v6only;
954 return (sizeof (int));
955 default:
956 return (-1);
957 }
958 break;
959 case IPPROTO_UDP:
960 switch (name) {
961 case UDP_ANONPRIVBIND:
962 *i1 = connp->conn_anon_priv_bind;
963 break;
964 case UDP_EXCLBIND:
965 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
966 break;
967 default:
968 return (-1);
969 }
970 break;
971 case IPPROTO_TCP:
972 switch (name) {
973 case TCP_RECVDSTADDR:
974 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
975 break;
976 case TCP_ANONPRIVBIND:
977 *i1 = connp->conn_anon_priv_bind;
978 break;
979 case TCP_EXCLBIND:
980 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
981 break;
982 default:
983 return (-1);
984 }
985 break;
986 default:
987 return (-1);
988 }
989 return (sizeof (int));
990 }
991
992 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
993 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
994 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
995 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
996 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
997 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
998 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
999 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1000 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1001 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1002
1003 /*
1004 * This routine sets the most common socket options including some
1005 * that are transport/ULP specific.
1006 * It returns errno or zero.
1007 *
1008 * For fixed length options, there is no sanity check
1009 * of passed in length is done. It is assumed *_optcom_req()
1010 * routines do the right thing.
1011 */
1012 int
conn_opt_set(conn_opt_arg_t * coa,t_scalar_t level,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1013 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1014 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1015 {
1016 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1017
1018 /* We have different functions for different levels */
1019 switch (level) {
1020 case SOL_SOCKET:
1021 return (conn_opt_set_socket(coa, name, inlen, invalp,
1022 checkonly, cr));
1023 case IPPROTO_IP:
1024 return (conn_opt_set_ip(coa, name, inlen, invalp,
1025 checkonly, cr));
1026 case IPPROTO_IPV6:
1027 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1028 checkonly, cr));
1029 case IPPROTO_UDP:
1030 return (conn_opt_set_udp(coa, name, inlen, invalp,
1031 checkonly, cr));
1032 case IPPROTO_TCP:
1033 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1034 checkonly, cr));
1035 default:
1036 return (0);
1037 }
1038 }
1039
1040 /*
1041 * Handle SOL_SOCKET
1042 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1043 * it implement their own checks and setting of conn_proto.
1044 */
1045 /* ARGSUSED1 */
1046 static int
conn_opt_set_socket(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1047 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1048 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1049 {
1050 conn_t *connp = coa->coa_connp;
1051 ip_xmit_attr_t *ixa = coa->coa_ixa;
1052 int *i1 = (int *)invalp;
1053 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1054
1055 switch (name) {
1056 case SO_ALLZONES:
1057 if (IPCL_IS_BOUND(connp))
1058 return (EINVAL);
1059 break;
1060 case SO_VRRP:
1061 if (secpolicy_ip_config(cr, checkonly) != 0)
1062 return (EACCES);
1063 break;
1064 case SO_MAC_EXEMPT:
1065 if (secpolicy_net_mac_aware(cr) != 0)
1066 return (EACCES);
1067 if (IPCL_IS_BOUND(connp))
1068 return (EINVAL);
1069 break;
1070 case SO_MAC_IMPLICIT:
1071 if (secpolicy_net_mac_implicit(cr) != 0)
1072 return (EACCES);
1073 break;
1074 }
1075 if (checkonly)
1076 return (0);
1077
1078 mutex_enter(&connp->conn_lock);
1079 /* Here we set the actual option value */
1080 switch (name) {
1081 case SO_DEBUG:
1082 connp->conn_debug = onoff;
1083 break;
1084 case SO_KEEPALIVE:
1085 connp->conn_keepalive = onoff;
1086 break;
1087 case SO_LINGER: {
1088 struct linger *lgr = (struct linger *)invalp;
1089
1090 if (lgr->l_onoff) {
1091 connp->conn_linger = 1;
1092 connp->conn_lingertime = lgr->l_linger;
1093 } else {
1094 connp->conn_linger = 0;
1095 connp->conn_lingertime = 0;
1096 }
1097 break;
1098 }
1099 case SO_OOBINLINE:
1100 connp->conn_oobinline = onoff;
1101 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1102 break;
1103 case SO_REUSEADDR:
1104 connp->conn_reuseaddr = onoff;
1105 break;
1106 case SO_REUSEPORT:
1107 connp->conn_reuseport = onoff;
1108 break;
1109 case SO_DONTROUTE:
1110 if (onoff)
1111 ixa->ixa_flags |= IXAF_DONTROUTE;
1112 else
1113 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1114 coa->coa_changed |= COA_ROUTE_CHANGED;
1115 break;
1116 case SO_USELOOPBACK:
1117 connp->conn_useloopback = onoff;
1118 break;
1119 case SO_BROADCAST:
1120 connp->conn_broadcast = onoff;
1121 break;
1122 case SO_SNDBUF:
1123 /* ULP has range checked the value */
1124 connp->conn_sndbuf = *i1;
1125 coa->coa_changed |= COA_SNDBUF_CHANGED;
1126 break;
1127 case SO_RCVBUF:
1128 /* ULP has range checked the value */
1129 connp->conn_rcvbuf = *i1;
1130 coa->coa_changed |= COA_RCVBUF_CHANGED;
1131 break;
1132 case SO_RCVTIMEO:
1133 case SO_SNDTIMEO:
1134 /*
1135 * Pass these two options in order for third part
1136 * protocol usage.
1137 */
1138 break;
1139 case SO_DGRAM_ERRIND:
1140 connp->conn_dgram_errind = onoff;
1141 break;
1142 case SO_RECVUCRED:
1143 connp->conn_recv_ancillary.crb_recvucred = onoff;
1144 break;
1145 case SO_ALLZONES:
1146 connp->conn_allzones = onoff;
1147 coa->coa_changed |= COA_ROUTE_CHANGED;
1148 if (onoff)
1149 ixa->ixa_zoneid = ALL_ZONES;
1150 else
1151 ixa->ixa_zoneid = connp->conn_zoneid;
1152 break;
1153 case SO_TIMESTAMP:
1154 connp->conn_recv_ancillary.crb_timestamp = onoff;
1155 break;
1156 case SO_VRRP:
1157 connp->conn_isvrrp = onoff;
1158 break;
1159 case SO_ANON_MLP:
1160 connp->conn_anon_mlp = onoff;
1161 break;
1162 case SO_MAC_EXEMPT:
1163 connp->conn_mac_mode = onoff ?
1164 CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1165 break;
1166 case SO_MAC_IMPLICIT:
1167 connp->conn_mac_mode = onoff ?
1168 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1169 break;
1170 case SO_EXCLBIND:
1171 connp->conn_exclbind = onoff;
1172 break;
1173 }
1174 mutex_exit(&connp->conn_lock);
1175 return (0);
1176 }
1177
1178 /* Handle IPPROTO_IP */
1179 static int
conn_opt_set_ip(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1180 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1181 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1182 {
1183 conn_t *connp = coa->coa_connp;
1184 ip_xmit_attr_t *ixa = coa->coa_ixa;
1185 ip_pkt_t *ipp = coa->coa_ipp;
1186 int *i1 = (int *)invalp;
1187 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1188 ipaddr_t addr = (ipaddr_t)*i1;
1189 uint_t ifindex;
1190 zoneid_t zoneid = IPCL_ZONEID(connp);
1191 ipif_t *ipif;
1192 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1193 int error;
1194
1195 if (connp->conn_family != AF_INET)
1196 return (EINVAL);
1197
1198 switch (name) {
1199 case IP_TTL:
1200 /* Don't allow zero */
1201 if (*i1 < 1 || *i1 > 255)
1202 return (EINVAL);
1203 break;
1204 case IP_MULTICAST_IF:
1205 if (addr == INADDR_ANY) {
1206 /* Clear */
1207 ifindex = 0;
1208 break;
1209 }
1210 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1211 if (ipif == NULL)
1212 return (EHOSTUNREACH);
1213 /* not supported by the virtual network iface */
1214 if (IS_VNI(ipif->ipif_ill)) {
1215 ipif_refrele(ipif);
1216 return (EINVAL);
1217 }
1218 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1219 ipif_refrele(ipif);
1220 break;
1221 case IP_NEXTHOP: {
1222 ire_t *ire;
1223
1224 if (addr == INADDR_ANY) {
1225 /* Clear */
1226 break;
1227 }
1228 /* Verify that the next-hop is on-link */
1229 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1230 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1231 if (ire == NULL)
1232 return (EHOSTUNREACH);
1233 ire_refrele(ire);
1234 break;
1235 }
1236 case IP_OPTIONS:
1237 case T_IP_OPTIONS: {
1238 uint_t newlen;
1239
1240 if (ipp->ipp_fields & IPPF_LABEL_V4)
1241 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1242 else
1243 newlen = inlen;
1244 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1245 return (EINVAL);
1246 }
1247 break;
1248 }
1249 case IP_PKTINFO: {
1250 struct in_pktinfo *pktinfo;
1251
1252 /* Two different valid lengths */
1253 if (inlen != sizeof (int) &&
1254 inlen != sizeof (struct in_pktinfo))
1255 return (EINVAL);
1256 if (inlen == sizeof (int))
1257 break;
1258
1259 pktinfo = (struct in_pktinfo *)invalp;
1260 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1261 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1262 zoneid, ipst, B_FALSE)) {
1263 case IPVL_UNICAST_UP:
1264 case IPVL_UNICAST_DOWN:
1265 break;
1266 default:
1267 return (EADDRNOTAVAIL);
1268 }
1269 }
1270 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1271 B_FALSE, ipst))
1272 return (ENXIO);
1273 break;
1274 }
1275 case IP_BOUND_IF:
1276 ifindex = *(uint_t *)i1;
1277
1278 /* Just check it is ok. */
1279 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1280 return (ENXIO);
1281 break;
1282 }
1283 if (checkonly)
1284 return (0);
1285
1286 /* Here we set the actual option value */
1287 /*
1288 * conn_lock protects the bitfields, and is used to
1289 * set the fields atomically. Not needed for ixa settings since
1290 * the caller has an exclusive copy of the ixa.
1291 * We can not hold conn_lock across the multicast options though.
1292 */
1293 switch (name) {
1294 case IP_OPTIONS:
1295 case T_IP_OPTIONS:
1296 /* Save options for use by IP. */
1297 mutex_enter(&connp->conn_lock);
1298 error = optcom_pkt_set(invalp, inlen,
1299 (uchar_t **)&ipp->ipp_ipv4_options,
1300 &ipp->ipp_ipv4_options_len);
1301 if (error != 0) {
1302 mutex_exit(&connp->conn_lock);
1303 return (error);
1304 }
1305 if (ipp->ipp_ipv4_options_len == 0) {
1306 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1307 } else {
1308 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1309 }
1310 mutex_exit(&connp->conn_lock);
1311 coa->coa_changed |= COA_HEADER_CHANGED;
1312 coa->coa_changed |= COA_WROFF_CHANGED;
1313 break;
1314
1315 case IP_TTL:
1316 mutex_enter(&connp->conn_lock);
1317 ipp->ipp_unicast_hops = *i1;
1318 mutex_exit(&connp->conn_lock);
1319 coa->coa_changed |= COA_HEADER_CHANGED;
1320 break;
1321 case IP_TOS:
1322 case T_IP_TOS:
1323 mutex_enter(&connp->conn_lock);
1324 if (*i1 == -1) {
1325 ipp->ipp_type_of_service = 0;
1326 } else {
1327 ipp->ipp_type_of_service = *i1;
1328 }
1329 mutex_exit(&connp->conn_lock);
1330 coa->coa_changed |= COA_HEADER_CHANGED;
1331 break;
1332 case IP_MULTICAST_IF:
1333 ixa->ixa_multicast_ifindex = ifindex;
1334 ixa->ixa_multicast_ifaddr = addr;
1335 coa->coa_changed |= COA_ROUTE_CHANGED;
1336 break;
1337 case IP_MULTICAST_TTL:
1338 ixa->ixa_multicast_ttl = *invalp;
1339 /* Handled automatically by ip_output */
1340 break;
1341 case IP_MULTICAST_LOOP:
1342 if (*invalp != 0)
1343 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1344 else
1345 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1346 /* Handled automatically by ip_output */
1347 break;
1348 case IP_RECVOPTS:
1349 mutex_enter(&connp->conn_lock);
1350 connp->conn_recv_ancillary.crb_recvopts = onoff;
1351 mutex_exit(&connp->conn_lock);
1352 break;
1353 case IP_RECVDSTADDR:
1354 mutex_enter(&connp->conn_lock);
1355 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1356 mutex_exit(&connp->conn_lock);
1357 break;
1358 case IP_RECVIF:
1359 mutex_enter(&connp->conn_lock);
1360 connp->conn_recv_ancillary.crb_recvif = onoff;
1361 mutex_exit(&connp->conn_lock);
1362 break;
1363 case IP_RECVSLLA:
1364 mutex_enter(&connp->conn_lock);
1365 connp->conn_recv_ancillary.crb_recvslla = onoff;
1366 mutex_exit(&connp->conn_lock);
1367 break;
1368 case IP_RECVTTL:
1369 mutex_enter(&connp->conn_lock);
1370 connp->conn_recv_ancillary.crb_recvttl = onoff;
1371 mutex_exit(&connp->conn_lock);
1372 break;
1373 case IP_PKTINFO: {
1374 /*
1375 * This also handles IP_RECVPKTINFO.
1376 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1377 * Differentiation is based on the size of the
1378 * argument passed in.
1379 */
1380 struct in_pktinfo *pktinfo;
1381
1382 if (inlen == sizeof (int)) {
1383 /* This is IP_RECVPKTINFO option. */
1384 mutex_enter(&connp->conn_lock);
1385 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1386 onoff;
1387 mutex_exit(&connp->conn_lock);
1388 break;
1389 }
1390
1391 /* This is IP_PKTINFO option. */
1392 mutex_enter(&connp->conn_lock);
1393 pktinfo = (struct in_pktinfo *)invalp;
1394 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1395 ipp->ipp_fields |= IPPF_ADDR;
1396 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1397 &ipp->ipp_addr);
1398 } else {
1399 ipp->ipp_fields &= ~IPPF_ADDR;
1400 ipp->ipp_addr = ipv6_all_zeros;
1401 }
1402 mutex_exit(&connp->conn_lock);
1403 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1404 coa->coa_changed |= COA_ROUTE_CHANGED;
1405 coa->coa_changed |= COA_HEADER_CHANGED;
1406 break;
1407 }
1408 case IP_DONTFRAG:
1409 if (onoff) {
1410 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1411 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1412 } else {
1413 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1414 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1415 }
1416 /* Need to redo ip_attr_connect */
1417 coa->coa_changed |= COA_ROUTE_CHANGED;
1418 break;
1419 case IP_ADD_MEMBERSHIP:
1420 case IP_DROP_MEMBERSHIP:
1421 case MCAST_JOIN_GROUP:
1422 case MCAST_LEAVE_GROUP:
1423 return (ip_opt_set_multicast_group(connp, name,
1424 invalp, B_FALSE, checkonly));
1425
1426 case IP_BLOCK_SOURCE:
1427 case IP_UNBLOCK_SOURCE:
1428 case IP_ADD_SOURCE_MEMBERSHIP:
1429 case IP_DROP_SOURCE_MEMBERSHIP:
1430 case MCAST_BLOCK_SOURCE:
1431 case MCAST_UNBLOCK_SOURCE:
1432 case MCAST_JOIN_SOURCE_GROUP:
1433 case MCAST_LEAVE_SOURCE_GROUP:
1434 return (ip_opt_set_multicast_sources(connp, name,
1435 invalp, B_FALSE, checkonly));
1436
1437 case IP_SEC_OPT:
1438 mutex_enter(&connp->conn_lock);
1439 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1440 mutex_exit(&connp->conn_lock);
1441 if (error != 0) {
1442 return (error);
1443 }
1444 /* This is an IPsec policy change - redo ip_attr_connect */
1445 coa->coa_changed |= COA_ROUTE_CHANGED;
1446 break;
1447 case IP_NEXTHOP:
1448 ixa->ixa_nexthop_v4 = addr;
1449 if (addr != INADDR_ANY)
1450 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1451 else
1452 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1453 coa->coa_changed |= COA_ROUTE_CHANGED;
1454 break;
1455
1456 case IP_BOUND_IF:
1457 ixa->ixa_ifindex = ifindex; /* Send */
1458 mutex_enter(&connp->conn_lock);
1459 connp->conn_incoming_ifindex = ifindex; /* Receive */
1460 connp->conn_bound_if = ifindex; /* getsockopt */
1461 mutex_exit(&connp->conn_lock);
1462 coa->coa_changed |= COA_ROUTE_CHANGED;
1463 break;
1464 case IP_UNSPEC_SRC:
1465 mutex_enter(&connp->conn_lock);
1466 connp->conn_unspec_src = onoff;
1467 if (onoff)
1468 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1469 else
1470 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1471
1472 mutex_exit(&connp->conn_lock);
1473 break;
1474 case IP_BROADCAST_TTL:
1475 ixa->ixa_broadcast_ttl = *invalp;
1476 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1477 /* Handled automatically by ip_output */
1478 break;
1479 case MRT_INIT:
1480 case MRT_DONE:
1481 case MRT_ADD_VIF:
1482 case MRT_DEL_VIF:
1483 case MRT_ADD_MFC:
1484 case MRT_DEL_MFC:
1485 case MRT_ASSERT:
1486 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1487 return (error);
1488 }
1489 error = ip_mrouter_set((int)name, connp, checkonly,
1490 (uchar_t *)invalp, inlen);
1491 if (error) {
1492 return (error);
1493 }
1494 return (0);
1495
1496 }
1497 return (0);
1498 }
1499
1500 /* Handle IPPROTO_IPV6 */
1501 static int
conn_opt_set_ipv6(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)1502 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1503 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1504 {
1505 conn_t *connp = coa->coa_connp;
1506 ip_xmit_attr_t *ixa = coa->coa_ixa;
1507 ip_pkt_t *ipp = coa->coa_ipp;
1508 int *i1 = (int *)invalp;
1509 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1510 uint_t ifindex;
1511 zoneid_t zoneid = IPCL_ZONEID(connp);
1512 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1513 int error;
1514
1515 if (connp->conn_family != AF_INET6)
1516 return (EINVAL);
1517
1518 switch (name) {
1519 case IPV6_MULTICAST_IF:
1520 /*
1521 * The only possible error is EINVAL.
1522 * We call this option on both V4 and V6
1523 * If both fail, then this call returns
1524 * EINVAL. If at least one of them succeeds we
1525 * return success.
1526 */
1527 ifindex = *(uint_t *)i1;
1528
1529 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1530 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1531 return (EINVAL);
1532 break;
1533 case IPV6_UNICAST_HOPS:
1534 /* Don't allow zero. -1 means to use default */
1535 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1536 return (EINVAL);
1537 break;
1538 case IPV6_MULTICAST_HOPS:
1539 /* -1 means use default */
1540 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1541 return (EINVAL);
1542 break;
1543 case IPV6_MULTICAST_LOOP:
1544 if (*i1 != 0 && *i1 != 1)
1545 return (EINVAL);
1546 break;
1547 case IPV6_BOUND_IF:
1548 ifindex = *(uint_t *)i1;
1549
1550 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1551 return (ENXIO);
1552 break;
1553 case IPV6_PKTINFO: {
1554 struct in6_pktinfo *pkti;
1555 boolean_t isv6;
1556
1557 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1558 return (EINVAL);
1559 if (inlen == 0)
1560 break; /* Clear values below */
1561
1562 /*
1563 * Verify the source address and ifindex. Privileged users
1564 * can use any source address.
1565 */
1566 pkti = (struct in6_pktinfo *)invalp;
1567
1568 /*
1569 * For link-local addresses we use the ipi6_ifindex when
1570 * we verify the local address.
1571 * If net_rawaccess then any source address can be used.
1572 */
1573 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1574 secpolicy_net_rawaccess(cr) != 0) {
1575 uint_t scopeid = 0;
1576 in6_addr_t *v6src = &pkti->ipi6_addr;
1577 ipaddr_t v4src;
1578 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1579
1580 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1581 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1582 if (v4src != INADDR_ANY) {
1583 laddr_type = ip_laddr_verify_v4(v4src,
1584 zoneid, ipst, B_FALSE);
1585 }
1586 } else {
1587 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1588 scopeid = pkti->ipi6_ifindex;
1589
1590 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1591 ipst, B_FALSE, scopeid);
1592 }
1593 switch (laddr_type) {
1594 case IPVL_UNICAST_UP:
1595 case IPVL_UNICAST_DOWN:
1596 break;
1597 default:
1598 return (EADDRNOTAVAIL);
1599 }
1600 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1601 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1602 /* Allow any source */
1603 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1604 }
1605 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1606 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1607 ipst))
1608 return (ENXIO);
1609 break;
1610 }
1611 case IPV6_HOPLIMIT:
1612 /* It is only allowed as ancilary data */
1613 if (!coa->coa_ancillary)
1614 return (EINVAL);
1615
1616 if (inlen != 0 && inlen != sizeof (int))
1617 return (EINVAL);
1618 if (inlen == sizeof (int)) {
1619 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1620 return (EINVAL);
1621 }
1622 break;
1623 case IPV6_TCLASS:
1624 if (inlen != 0 && inlen != sizeof (int))
1625 return (EINVAL);
1626 if (inlen == sizeof (int)) {
1627 if (*i1 > 255 || *i1 < -1)
1628 return (EINVAL);
1629 }
1630 break;
1631 case IPV6_NEXTHOP:
1632 if (inlen != 0 && inlen != sizeof (sin6_t))
1633 return (EINVAL);
1634 if (inlen == sizeof (sin6_t)) {
1635 sin6_t *sin6 = (sin6_t *)invalp;
1636 ire_t *ire;
1637
1638 if (sin6->sin6_family != AF_INET6)
1639 return (EAFNOSUPPORT);
1640 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1641 return (EADDRNOTAVAIL);
1642
1643 /* Verify that the next-hop is on-link */
1644 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1645 0, 0, IRE_ONLINK, NULL, zoneid,
1646 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1647 if (ire == NULL)
1648 return (EHOSTUNREACH);
1649 ire_refrele(ire);
1650 break;
1651 }
1652 break;
1653 case IPV6_RTHDR:
1654 case IPV6_DSTOPTS:
1655 case IPV6_RTHDRDSTOPTS:
1656 case IPV6_HOPOPTS: {
1657 /* All have the length field in the same place */
1658 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1659 /*
1660 * Sanity checks - minimum size, size a multiple of
1661 * eight bytes, and matching size passed in.
1662 */
1663 if (inlen != 0 &&
1664 inlen != (8 * (hopts->ip6h_len + 1)))
1665 return (EINVAL);
1666 break;
1667 }
1668 case IPV6_PATHMTU:
1669 /* Can't be set */
1670 return (EINVAL);
1671
1672 case IPV6_USE_MIN_MTU:
1673 if (inlen != sizeof (int))
1674 return (EINVAL);
1675 if (*i1 < -1 || *i1 > 1)
1676 return (EINVAL);
1677 break;
1678 case IPV6_SRC_PREFERENCES:
1679 if (inlen != sizeof (uint32_t))
1680 return (EINVAL);
1681 break;
1682 case IPV6_V6ONLY:
1683 if (*i1 < 0 || *i1 > 1) {
1684 return (EINVAL);
1685 }
1686 break;
1687 }
1688 if (checkonly)
1689 return (0);
1690
1691 /* Here we set the actual option value */
1692 /*
1693 * conn_lock protects the bitfields, and is used to
1694 * set the fields atomically. Not needed for ixa settings since
1695 * the caller has an exclusive copy of the ixa.
1696 * We can not hold conn_lock across the multicast options though.
1697 */
1698 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1699 switch (name) {
1700 case IPV6_MULTICAST_IF:
1701 ixa->ixa_multicast_ifindex = ifindex;
1702 /* Need to redo ip_attr_connect */
1703 coa->coa_changed |= COA_ROUTE_CHANGED;
1704 break;
1705 case IPV6_UNICAST_HOPS:
1706 /* -1 means use default */
1707 mutex_enter(&connp->conn_lock);
1708 if (*i1 == -1) {
1709 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1710 } else {
1711 ipp->ipp_unicast_hops = (uint8_t)*i1;
1712 }
1713 mutex_exit(&connp->conn_lock);
1714 coa->coa_changed |= COA_HEADER_CHANGED;
1715 break;
1716 case IPV6_MULTICAST_HOPS:
1717 /* -1 means use default */
1718 if (*i1 == -1) {
1719 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1720 } else {
1721 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1722 }
1723 /* Handled automatically by ip_output */
1724 break;
1725 case IPV6_MULTICAST_LOOP:
1726 if (*i1 != 0)
1727 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1728 else
1729 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1730 /* Handled automatically by ip_output */
1731 break;
1732 case IPV6_JOIN_GROUP:
1733 case IPV6_LEAVE_GROUP:
1734 case MCAST_JOIN_GROUP:
1735 case MCAST_LEAVE_GROUP:
1736 return (ip_opt_set_multicast_group(connp, name,
1737 invalp, B_TRUE, checkonly));
1738
1739 case MCAST_BLOCK_SOURCE:
1740 case MCAST_UNBLOCK_SOURCE:
1741 case MCAST_JOIN_SOURCE_GROUP:
1742 case MCAST_LEAVE_SOURCE_GROUP:
1743 return (ip_opt_set_multicast_sources(connp, name,
1744 invalp, B_TRUE, checkonly));
1745
1746 case IPV6_BOUND_IF:
1747 ixa->ixa_ifindex = ifindex; /* Send */
1748 mutex_enter(&connp->conn_lock);
1749 connp->conn_incoming_ifindex = ifindex; /* Receive */
1750 connp->conn_bound_if = ifindex; /* getsockopt */
1751 mutex_exit(&connp->conn_lock);
1752 coa->coa_changed |= COA_ROUTE_CHANGED;
1753 break;
1754 case IPV6_UNSPEC_SRC:
1755 mutex_enter(&connp->conn_lock);
1756 connp->conn_unspec_src = onoff;
1757 if (onoff)
1758 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1759 else
1760 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1761 mutex_exit(&connp->conn_lock);
1762 break;
1763 case IPV6_RECVPKTINFO:
1764 mutex_enter(&connp->conn_lock);
1765 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1766 mutex_exit(&connp->conn_lock);
1767 break;
1768 case IPV6_RECVTCLASS:
1769 mutex_enter(&connp->conn_lock);
1770 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1771 mutex_exit(&connp->conn_lock);
1772 break;
1773 case IPV6_RECVPATHMTU:
1774 mutex_enter(&connp->conn_lock);
1775 connp->conn_ipv6_recvpathmtu = onoff;
1776 mutex_exit(&connp->conn_lock);
1777 break;
1778 case IPV6_RECVHOPLIMIT:
1779 mutex_enter(&connp->conn_lock);
1780 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1781 onoff;
1782 mutex_exit(&connp->conn_lock);
1783 break;
1784 case IPV6_RECVHOPOPTS:
1785 mutex_enter(&connp->conn_lock);
1786 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1787 mutex_exit(&connp->conn_lock);
1788 break;
1789 case IPV6_RECVDSTOPTS:
1790 mutex_enter(&connp->conn_lock);
1791 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1792 mutex_exit(&connp->conn_lock);
1793 break;
1794 case _OLD_IPV6_RECVDSTOPTS:
1795 mutex_enter(&connp->conn_lock);
1796 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1797 onoff;
1798 mutex_exit(&connp->conn_lock);
1799 break;
1800 case IPV6_RECVRTHDRDSTOPTS:
1801 mutex_enter(&connp->conn_lock);
1802 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1803 onoff;
1804 mutex_exit(&connp->conn_lock);
1805 break;
1806 case IPV6_RECVRTHDR:
1807 mutex_enter(&connp->conn_lock);
1808 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1809 mutex_exit(&connp->conn_lock);
1810 break;
1811 case IPV6_PKTINFO:
1812 mutex_enter(&connp->conn_lock);
1813 if (inlen == 0) {
1814 ipp->ipp_fields &= ~IPPF_ADDR;
1815 ipp->ipp_addr = ipv6_all_zeros;
1816 ixa->ixa_ifindex = 0;
1817 } else {
1818 struct in6_pktinfo *pkti;
1819
1820 pkti = (struct in6_pktinfo *)invalp;
1821 ipp->ipp_addr = pkti->ipi6_addr;
1822 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1823 ipp->ipp_fields |= IPPF_ADDR;
1824 else
1825 ipp->ipp_fields &= ~IPPF_ADDR;
1826 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1827 }
1828 mutex_exit(&connp->conn_lock);
1829 /* Source and ifindex might have changed */
1830 coa->coa_changed |= COA_HEADER_CHANGED;
1831 coa->coa_changed |= COA_ROUTE_CHANGED;
1832 break;
1833 case IPV6_HOPLIMIT:
1834 mutex_enter(&connp->conn_lock);
1835 if (inlen == 0 || *i1 == -1) {
1836 /* Revert to default */
1837 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1838 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1839 } else {
1840 ipp->ipp_hoplimit = *i1;
1841 ipp->ipp_fields |= IPPF_HOPLIMIT;
1842 /* Ensure that it sticks for multicast packets */
1843 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1844 }
1845 mutex_exit(&connp->conn_lock);
1846 coa->coa_changed |= COA_HEADER_CHANGED;
1847 break;
1848 case IPV6_TCLASS:
1849 /*
1850 * IPV6_TCLASS accepts -1 as use kernel default
1851 * and [0, 255] as the actualy traffic class.
1852 */
1853 mutex_enter(&connp->conn_lock);
1854 if (inlen == 0 || *i1 == -1) {
1855 ipp->ipp_tclass = 0;
1856 ipp->ipp_fields &= ~IPPF_TCLASS;
1857 } else {
1858 ipp->ipp_tclass = *i1;
1859 ipp->ipp_fields |= IPPF_TCLASS;
1860 }
1861 mutex_exit(&connp->conn_lock);
1862 coa->coa_changed |= COA_HEADER_CHANGED;
1863 break;
1864 case IPV6_NEXTHOP:
1865 if (inlen == 0) {
1866 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1867 } else {
1868 sin6_t *sin6 = (sin6_t *)invalp;
1869
1870 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1871 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1872 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1873 else
1874 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1875 }
1876 coa->coa_changed |= COA_ROUTE_CHANGED;
1877 break;
1878 case IPV6_HOPOPTS:
1879 mutex_enter(&connp->conn_lock);
1880 error = optcom_pkt_set(invalp, inlen,
1881 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1882 if (error != 0) {
1883 mutex_exit(&connp->conn_lock);
1884 return (error);
1885 }
1886 if (ipp->ipp_hopoptslen == 0) {
1887 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1888 } else {
1889 ipp->ipp_fields |= IPPF_HOPOPTS;
1890 }
1891 mutex_exit(&connp->conn_lock);
1892 coa->coa_changed |= COA_HEADER_CHANGED;
1893 coa->coa_changed |= COA_WROFF_CHANGED;
1894 break;
1895 case IPV6_RTHDRDSTOPTS:
1896 mutex_enter(&connp->conn_lock);
1897 error = optcom_pkt_set(invalp, inlen,
1898 (uchar_t **)&ipp->ipp_rthdrdstopts,
1899 &ipp->ipp_rthdrdstoptslen);
1900 if (error != 0) {
1901 mutex_exit(&connp->conn_lock);
1902 return (error);
1903 }
1904 if (ipp->ipp_rthdrdstoptslen == 0) {
1905 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1906 } else {
1907 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1908 }
1909 mutex_exit(&connp->conn_lock);
1910 coa->coa_changed |= COA_HEADER_CHANGED;
1911 coa->coa_changed |= COA_WROFF_CHANGED;
1912 break;
1913 case IPV6_DSTOPTS:
1914 mutex_enter(&connp->conn_lock);
1915 error = optcom_pkt_set(invalp, inlen,
1916 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1917 if (error != 0) {
1918 mutex_exit(&connp->conn_lock);
1919 return (error);
1920 }
1921 if (ipp->ipp_dstoptslen == 0) {
1922 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1923 } else {
1924 ipp->ipp_fields |= IPPF_DSTOPTS;
1925 }
1926 mutex_exit(&connp->conn_lock);
1927 coa->coa_changed |= COA_HEADER_CHANGED;
1928 coa->coa_changed |= COA_WROFF_CHANGED;
1929 break;
1930 case IPV6_RTHDR:
1931 mutex_enter(&connp->conn_lock);
1932 error = optcom_pkt_set(invalp, inlen,
1933 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1934 if (error != 0) {
1935 mutex_exit(&connp->conn_lock);
1936 return (error);
1937 }
1938 if (ipp->ipp_rthdrlen == 0) {
1939 ipp->ipp_fields &= ~IPPF_RTHDR;
1940 } else {
1941 ipp->ipp_fields |= IPPF_RTHDR;
1942 }
1943 mutex_exit(&connp->conn_lock);
1944 coa->coa_changed |= COA_HEADER_CHANGED;
1945 coa->coa_changed |= COA_WROFF_CHANGED;
1946 break;
1947
1948 case IPV6_DONTFRAG:
1949 if (onoff) {
1950 ixa->ixa_flags |= IXAF_DONTFRAG;
1951 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1952 } else {
1953 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1954 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1955 }
1956 /* Need to redo ip_attr_connect */
1957 coa->coa_changed |= COA_ROUTE_CHANGED;
1958 break;
1959
1960 case IPV6_USE_MIN_MTU:
1961 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1962 ixa->ixa_use_min_mtu = *i1;
1963 /* Need to redo ip_attr_connect */
1964 coa->coa_changed |= COA_ROUTE_CHANGED;
1965 break;
1966
1967 case IPV6_SEC_OPT:
1968 mutex_enter(&connp->conn_lock);
1969 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1970 mutex_exit(&connp->conn_lock);
1971 if (error != 0) {
1972 return (error);
1973 }
1974 /* This is an IPsec policy change - redo ip_attr_connect */
1975 coa->coa_changed |= COA_ROUTE_CHANGED;
1976 break;
1977 case IPV6_SRC_PREFERENCES:
1978 /*
1979 * This socket option only affects connected
1980 * sockets that haven't already bound to a specific
1981 * IPv6 address. In other words, sockets that
1982 * don't call bind() with an address other than the
1983 * unspecified address and that call connect().
1984 * ip_set_destination_v6() passes these preferences
1985 * to the ipif_select_source_v6() function.
1986 */
1987 mutex_enter(&connp->conn_lock);
1988 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1989 mutex_exit(&connp->conn_lock);
1990 if (error != 0) {
1991 return (error);
1992 }
1993 break;
1994 case IPV6_V6ONLY:
1995 mutex_enter(&connp->conn_lock);
1996 connp->conn_ipv6_v6only = onoff;
1997 mutex_exit(&connp->conn_lock);
1998 break;
1999 }
2000 return (0);
2001 }
2002
2003 /* Handle IPPROTO_UDP */
2004 /* ARGSUSED1 */
2005 static int
conn_opt_set_udp(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)2006 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2007 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2008 {
2009 conn_t *connp = coa->coa_connp;
2010 int *i1 = (int *)invalp;
2011 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2012 int error;
2013
2014 switch (name) {
2015 case UDP_ANONPRIVBIND:
2016 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2017 return (error);
2018 }
2019 break;
2020 }
2021 if (checkonly)
2022 return (0);
2023
2024 /* Here we set the actual option value */
2025 mutex_enter(&connp->conn_lock);
2026 switch (name) {
2027 case UDP_ANONPRIVBIND:
2028 connp->conn_anon_priv_bind = onoff;
2029 break;
2030 case UDP_EXCLBIND:
2031 connp->conn_exclbind = onoff;
2032 break;
2033 }
2034 mutex_exit(&connp->conn_lock);
2035 return (0);
2036 }
2037
2038 /* Handle IPPROTO_TCP */
2039 /* ARGSUSED1 */
2040 static int
conn_opt_set_tcp(conn_opt_arg_t * coa,t_scalar_t name,uint_t inlen,uchar_t * invalp,boolean_t checkonly,cred_t * cr)2041 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2042 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2043 {
2044 conn_t *connp = coa->coa_connp;
2045 int *i1 = (int *)invalp;
2046 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2047 int error;
2048
2049 switch (name) {
2050 case TCP_ANONPRIVBIND:
2051 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2052 return (error);
2053 }
2054 break;
2055 }
2056 if (checkonly)
2057 return (0);
2058
2059 /* Here we set the actual option value */
2060 mutex_enter(&connp->conn_lock);
2061 switch (name) {
2062 case TCP_ANONPRIVBIND:
2063 connp->conn_anon_priv_bind = onoff;
2064 break;
2065 case TCP_EXCLBIND:
2066 connp->conn_exclbind = onoff;
2067 break;
2068 case TCP_RECVDSTADDR:
2069 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2070 break;
2071 }
2072 mutex_exit(&connp->conn_lock);
2073 return (0);
2074 }
2075
2076 int
conn_getsockname(conn_t * connp,struct sockaddr * sa,uint_t * salenp)2077 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2078 {
2079 sin_t *sin;
2080 sin6_t *sin6;
2081
2082 if (connp->conn_family == AF_INET) {
2083 if (*salenp < sizeof (sin_t))
2084 return (EINVAL);
2085
2086 *salenp = sizeof (sin_t);
2087 /* Fill zeroes and then initialize non-zero fields */
2088 sin = (sin_t *)sa;
2089 *sin = sin_null;
2090 sin->sin_family = AF_INET;
2091 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2092 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2093 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2094 } else {
2095 /*
2096 * INADDR_ANY
2097 * conn_saddr is not set, we might be bound to
2098 * broadcast/multicast. Use conn_bound_addr as
2099 * local address instead (that could
2100 * also still be INADDR_ANY)
2101 */
2102 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2103 }
2104 sin->sin_port = connp->conn_lport;
2105 } else {
2106 if (*salenp < sizeof (sin6_t))
2107 return (EINVAL);
2108
2109 *salenp = sizeof (sin6_t);
2110 /* Fill zeroes and then initialize non-zero fields */
2111 sin6 = (sin6_t *)sa;
2112 *sin6 = sin6_null;
2113 sin6->sin6_family = AF_INET6;
2114 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2115 sin6->sin6_addr = connp->conn_saddr_v6;
2116 } else {
2117 /*
2118 * conn_saddr is not set, we might be bound to
2119 * broadcast/multicast. Use conn_bound_addr as
2120 * local address instead (which could
2121 * also still be unspecified)
2122 */
2123 sin6->sin6_addr = connp->conn_bound_addr_v6;
2124 }
2125 sin6->sin6_port = connp->conn_lport;
2126 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2127 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2128 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2129 }
2130 return (0);
2131 }
2132
2133 int
conn_getpeername(conn_t * connp,struct sockaddr * sa,uint_t * salenp)2134 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2135 {
2136 struct sockaddr_in *sin;
2137 struct sockaddr_in6 *sin6;
2138
2139 if (connp->conn_family == AF_INET) {
2140 if (*salenp < sizeof (sin_t))
2141 return (EINVAL);
2142
2143 *salenp = sizeof (sin_t);
2144 /* initialize */
2145 sin = (sin_t *)sa;
2146 *sin = sin_null;
2147 sin->sin_family = AF_INET;
2148 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2149 sin->sin_port = connp->conn_fport;
2150 } else {
2151 if (*salenp < sizeof (sin6_t))
2152 return (EINVAL);
2153
2154 *salenp = sizeof (sin6_t);
2155 /* initialize */
2156 sin6 = (sin6_t *)sa;
2157 *sin6 = sin6_null;
2158 sin6->sin6_family = AF_INET6;
2159 sin6->sin6_addr = connp->conn_faddr_v6;
2160 sin6->sin6_port = connp->conn_fport;
2161 sin6->sin6_flowinfo = connp->conn_flowinfo;
2162 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2163 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2164 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2165 }
2166 return (0);
2167 }
2168
2169 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2170 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2171
2172 /*
2173 * Allocate and fill in conn_ht_iphc based on the current information
2174 * in the conn.
2175 * Normally used when we bind() and connect().
2176 * Returns failure if can't allocate memory, or if there is a problem
2177 * with a routing header/option.
2178 *
2179 * We allocate space for the transport header (ulp_hdr_len + extra) and
2180 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2181 * The extra is there for transports that want some spare room for future
2182 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2183 * excludes the extra part.
2184 *
2185 * We massage an routing option/header and store the ckecksum difference
2186 * in conn_sum.
2187 *
2188 * Caller needs to update conn_wroff if desired.
2189 */
2190 int
conn_build_hdr_template(conn_t * connp,uint_t ulp_hdr_length,uint_t extra,const in6_addr_t * v6src,const in6_addr_t * v6dst,uint32_t flowinfo)2191 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2192 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2193 {
2194 ip_xmit_attr_t *ixa = connp->conn_ixa;
2195 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2196 uint_t ip_hdr_length;
2197 uchar_t *hdrs;
2198 uint_t hdrs_len;
2199
2200 ASSERT(MUTEX_HELD(&connp->conn_lock));
2201
2202 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2203 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2204 /* In case of TX label and IP options it can be too much */
2205 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2206 /* Preserves existing TX errno for this */
2207 return (EHOSTUNREACH);
2208 }
2209 } else {
2210 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2211 }
2212 ixa->ixa_ip_hdr_length = ip_hdr_length;
2213 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2214 ASSERT(hdrs_len != 0);
2215
2216 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2217 /* Allocate new before we free any old */
2218 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2219 if (hdrs == NULL)
2220 return (ENOMEM);
2221
2222 if (connp->conn_ht_iphc != NULL) {
2223 kmem_free(connp->conn_ht_iphc,
2224 connp->conn_ht_iphc_allocated);
2225 }
2226 connp->conn_ht_iphc = hdrs;
2227 connp->conn_ht_iphc_allocated = hdrs_len;
2228 } else {
2229 hdrs = connp->conn_ht_iphc;
2230 }
2231 hdrs_len -= extra;
2232 connp->conn_ht_iphc_len = hdrs_len;
2233
2234 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2235 connp->conn_ht_ulp_len = ulp_hdr_length;
2236
2237 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2238 ipha_t *ipha = (ipha_t *)hdrs;
2239
2240 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2241 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2242 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2243 ipha->ipha_length = htons(hdrs_len);
2244 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2245 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2246 else
2247 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2248
2249 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2250 connp->conn_sum = cksum_massage_options_v4(ipha,
2251 connp->conn_netstack);
2252 } else {
2253 connp->conn_sum = 0;
2254 }
2255 } else {
2256 ip6_t *ip6h = (ip6_t *)hdrs;
2257
2258 ip6h->ip6_src = *v6src;
2259 ip6h->ip6_dst = *v6dst;
2260 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2261 flowinfo);
2262 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2263
2264 if (ipp->ipp_fields & IPPF_RTHDR) {
2265 connp->conn_sum = cksum_massage_options_v6(ip6h,
2266 ip_hdr_length, connp->conn_netstack);
2267
2268 /*
2269 * Verify that the first hop isn't a mapped address.
2270 * Routers along the path need to do this verification
2271 * for subsequent hops.
2272 */
2273 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2274 return (EADDRNOTAVAIL);
2275
2276 } else {
2277 connp->conn_sum = 0;
2278 }
2279 }
2280 return (0);
2281 }
2282
2283 /*
2284 * Prepend a header template to data_mp based on the ip_pkt_t
2285 * and the passed in source, destination and protocol.
2286 *
2287 * Returns failure if can't allocate memory, in which case data_mp is freed.
2288 * We allocate space for the transport header (ulp_hdr_len) and
2289 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2290 *
2291 * We massage an routing option/header and return the ckecksum difference
2292 * in *sump. This is in host byte order.
2293 *
2294 * Caller needs to update conn_wroff if desired.
2295 */
2296 mblk_t *
conn_prepend_hdr(ip_xmit_attr_t * ixa,const ip_pkt_t * ipp,const in6_addr_t * v6src,const in6_addr_t * v6dst,uint8_t protocol,uint32_t flowinfo,uint_t ulp_hdr_length,mblk_t * data_mp,uint_t data_length,uint_t wroff_extra,uint32_t * sump,int * errorp)2297 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2298 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2299 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2300 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2301 {
2302 uint_t ip_hdr_length;
2303 uchar_t *hdrs;
2304 uint_t hdrs_len;
2305 mblk_t *mp;
2306
2307 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2308 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2309 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2310 } else {
2311 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2312 }
2313 hdrs_len = ip_hdr_length + ulp_hdr_length;
2314 ASSERT(hdrs_len != 0);
2315
2316 ixa->ixa_ip_hdr_length = ip_hdr_length;
2317
2318 /* Can we prepend to data_mp? */
2319 if (data_mp != NULL &&
2320 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2321 data_mp->b_datap->db_ref == 1) {
2322 hdrs = data_mp->b_rptr - hdrs_len;
2323 data_mp->b_rptr = hdrs;
2324 mp = data_mp;
2325 } else {
2326 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2327 if (mp == NULL) {
2328 freemsg(data_mp);
2329 *errorp = ENOMEM;
2330 return (NULL);
2331 }
2332 mp->b_wptr = mp->b_datap->db_lim;
2333 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2334 mp->b_cont = data_mp;
2335 }
2336
2337 /*
2338 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2339 * if PKTINFO (aka IPPF_ADDR) was set.
2340 */
2341 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2342 ipha_t *ipha = (ipha_t *)hdrs;
2343
2344 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2345 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2346 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2347 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2348 ipha->ipha_length = htons(hdrs_len + data_length);
2349 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2350 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2351 else
2352 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2353
2354 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2355 *sump = cksum_massage_options_v4(ipha,
2356 ixa->ixa_ipst->ips_netstack);
2357 } else {
2358 *sump = 0;
2359 }
2360 } else {
2361 ip6_t *ip6h = (ip6_t *)hdrs;
2362
2363 ip6h->ip6_src = *v6src;
2364 ip6h->ip6_dst = *v6dst;
2365 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2366 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2367
2368 if (ipp->ipp_fields & IPPF_RTHDR) {
2369 *sump = cksum_massage_options_v6(ip6h,
2370 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2371
2372 /*
2373 * Verify that the first hop isn't a mapped address.
2374 * Routers along the path need to do this verification
2375 * for subsequent hops.
2376 */
2377 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2378 *errorp = EADDRNOTAVAIL;
2379 freemsg(mp);
2380 return (NULL);
2381 }
2382 } else {
2383 *sump = 0;
2384 }
2385 }
2386 return (mp);
2387 }
2388
2389 /*
2390 * Massage a source route if any putting the first hop
2391 * in ipha_dst. Compute a starting value for the checksum which
2392 * takes into account that the original ipha_dst should be
2393 * included in the checksum but that IP will include the
2394 * first hop from the source route in the tcp checksum.
2395 */
2396 static uint32_t
cksum_massage_options_v4(ipha_t * ipha,netstack_t * ns)2397 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2398 {
2399 in_addr_t dst;
2400 uint32_t cksum;
2401
2402 /* Get last hop then diff against first hop */
2403 cksum = ip_massage_options(ipha, ns);
2404 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405 dst = ipha->ipha_dst;
2406 cksum -= ((dst >> 16) + (dst & 0xffff));
2407 if ((int)cksum < 0)
2408 cksum--;
2409 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2410 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2411 ASSERT(cksum < 0x10000);
2412 return (ntohs(cksum));
2413 }
2414
2415 static uint32_t
cksum_massage_options_v6(ip6_t * ip6h,uint_t ip_hdr_len,netstack_t * ns)2416 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2417 {
2418 uint8_t *end;
2419 ip6_rthdr_t *rth;
2420 uint32_t cksum;
2421
2422 end = (uint8_t *)ip6h + ip_hdr_len;
2423 rth = ip_find_rthdr_v6(ip6h, end);
2424 if (rth == NULL)
2425 return (0);
2426
2427 cksum = ip_massage_options_v6(ip6h, rth, ns);
2428 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2429 ASSERT(cksum < 0x10000);
2430 return (ntohs(cksum));
2431 }
2432
2433 /*
2434 * ULPs that change the destination address need to call this for each
2435 * change to discard any state about a previous destination that might
2436 * have been multicast or multirt.
2437 */
2438 void
ip_attr_newdst(ip_xmit_attr_t * ixa)2439 ip_attr_newdst(ip_xmit_attr_t *ixa)
2440 {
2441 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2442 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2443 IXAF_NO_LOOP_ZONEID_SET);
2444 }
2445
2446 /*
2447 * Determine the nexthop which will be used.
2448 * Normally this is just the destination, but if a IPv4 source route, or
2449 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2450 * there.
2451 */
2452 void
ip_attr_nexthop(const ip_pkt_t * ipp,const ip_xmit_attr_t * ixa,const in6_addr_t * dst,in6_addr_t * nexthop)2453 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2454 const in6_addr_t *dst, in6_addr_t *nexthop)
2455 {
2456 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2457 *nexthop = *dst;
2458 return;
2459 }
2460 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2461 ipaddr_t v4dst;
2462 ipaddr_t v4nexthop;
2463
2464 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2465 v4nexthop = ip_pkt_source_route_v4(ipp);
2466 if (v4nexthop == INADDR_ANY)
2467 v4nexthop = v4dst;
2468
2469 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2470 } else {
2471 const in6_addr_t *v6nexthop;
2472
2473 v6nexthop = ip_pkt_source_route_v6(ipp);
2474 if (v6nexthop == NULL)
2475 v6nexthop = dst;
2476
2477 *nexthop = *v6nexthop;
2478 }
2479 }
2480
2481 /*
2482 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2483 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2484 * case (connected latching is done in conn_connect).
2485 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2486 * set, but doesn't otherwise use the conn_t.
2487 *
2488 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2489 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2490 *
2491 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2492 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2493 *
2494 * Updates laddrp and uinfo if they are non-NULL.
2495 *
2496 * TSOL notes: The callers if ip_attr_connect must check if the destination
2497 * is different than before and in that case redo conn_update_label.
2498 * The callers of conn_connect do not need that since conn_connect
2499 * performs the conn_update_label.
2500 */
2501 int
ip_attr_connect(const conn_t * connp,ip_xmit_attr_t * ixa,const in6_addr_t * v6src,const in6_addr_t * v6dst,const in6_addr_t * v6nexthop,in_port_t dstport,in6_addr_t * laddrp,iulp_t * uinfo,uint32_t flags)2502 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2503 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2504 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2505 iulp_t *uinfo, uint32_t flags)
2506 {
2507 in6_addr_t laddr = *v6src;
2508 int error;
2509
2510 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2511
2512 if (connp->conn_zone_is_global)
2513 flags |= IPDF_ZONE_IS_GLOBAL;
2514 else
2515 flags &= ~IPDF_ZONE_IS_GLOBAL;
2516
2517 /*
2518 * Lookup the route to determine a source address and the uinfo.
2519 * If the ULP has a source route option then the caller will
2520 * have set v6nexthop to be the first hop.
2521 */
2522 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2523 ipaddr_t v4dst;
2524 ipaddr_t v4src, v4nexthop;
2525
2526 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2527 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2528 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2529
2530 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2531 flags &= ~IPDF_SELECT_SRC;
2532 else
2533 flags |= IPDF_SELECT_SRC;
2534
2535 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2536 uinfo, flags, connp->conn_mac_mode);
2537 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2538 } else {
2539 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2540 flags &= ~IPDF_SELECT_SRC;
2541 else
2542 flags |= IPDF_SELECT_SRC;
2543
2544 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2545 uinfo, flags, connp->conn_mac_mode);
2546 }
2547 /* Pass out some address even if we hit a RTF_REJECT etc */
2548 if (laddrp != NULL)
2549 *laddrp = laddr;
2550
2551 if (error != 0)
2552 return (error);
2553
2554 if (flags & IPDF_IPSEC) {
2555 /*
2556 * Set any IPsec policy in ixa. Routine also looks at ULP
2557 * ports.
2558 */
2559 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2560 }
2561 return (0);
2562 }
2563
2564 /*
2565 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2566 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2567 * usable for SCTP, since SCTP has multiple faddrs.
2568 *
2569 * Caller must hold conn_lock to provide atomic constency between the
2570 * conn_t's addresses and the ixa.
2571 * NOTE: this function drops and reaquires conn_lock since it can't be
2572 * held across ip_attr_connect/ip_set_destination.
2573 *
2574 * The caller needs to handle inserting in the receive-side fanout when
2575 * appropriate after conn_connect returns.
2576 */
2577 int
conn_connect(conn_t * connp,iulp_t * uinfo,uint32_t flags)2578 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2579 {
2580 ip_xmit_attr_t *ixa = connp->conn_ixa;
2581 in6_addr_t nexthop;
2582 in6_addr_t saddr, faddr;
2583 in_port_t fport;
2584 int error;
2585
2586 ASSERT(MUTEX_HELD(&connp->conn_lock));
2587
2588 if (connp->conn_ipversion == IPV4_VERSION)
2589 ixa->ixa_flags |= IXAF_IS_IPV4;
2590 else
2591 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2592
2593 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2594 flags &= ~IPDF_IPSEC;
2595
2596 /* In case we had previously done an ip_attr_connect */
2597 ip_attr_newdst(ixa);
2598
2599 /*
2600 * Determine the nexthop and copy the addresses before dropping
2601 * conn_lock.
2602 */
2603 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2604 &connp->conn_faddr_v6, &nexthop);
2605 saddr = connp->conn_saddr_v6;
2606 faddr = connp->conn_faddr_v6;
2607 fport = connp->conn_fport;
2608
2609 mutex_exit(&connp->conn_lock);
2610 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2611 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2612 mutex_enter(&connp->conn_lock);
2613
2614 /* Could have changed even if an error */
2615 connp->conn_saddr_v6 = saddr;
2616 if (error != 0)
2617 return (error);
2618
2619 /*
2620 * Check whether Trusted Solaris policy allows communication with this
2621 * host, and pretend that the destination is unreachable if not.
2622 * Compute any needed label and place it in ipp_label_v4/v6.
2623 *
2624 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2625 * the packet.
2626 *
2627 * TSOL Note: Any concurrent threads would pick a different ixa
2628 * (and ipp if they are to change the ipp) so we
2629 * don't have to worry about concurrent threads.
2630 */
2631 if (is_system_labeled()) {
2632 if (connp->conn_mlp_type != mlptSingle)
2633 return (ECONNREFUSED);
2634
2635 /*
2636 * conn_update_label will set ipp_label* which will later
2637 * be used by conn_build_hdr_template.
2638 */
2639 error = conn_update_label(connp, ixa,
2640 &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2641 if (error != 0)
2642 return (error);
2643 }
2644
2645 /*
2646 * Ensure that we match on the selected local address.
2647 * This overrides conn_laddr in the case we had earlier bound to a
2648 * multicast or broadcast address.
2649 */
2650 connp->conn_laddr_v6 = connp->conn_saddr_v6;
2651
2652 /*
2653 * Allow setting new policies.
2654 * The addresses/ports are already set, thus the IPsec policy calls
2655 * can handle their passed-in conn's.
2656 */
2657 connp->conn_policy_cached = B_FALSE;
2658
2659 /*
2660 * Cache IPsec policy in this conn. If we have per-socket policy,
2661 * we'll cache that. If we don't, we'll inherit global policy.
2662 *
2663 * This is done before the caller inserts in the receive-side fanout.
2664 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2665 * for connections where we don't have a policy. This is to prevent
2666 * global policy lookups in the inbound path.
2667 *
2668 * If we insert before we set conn_policy_cached,
2669 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2670 * because global policy cound be non-empty. We normally call
2671 * ipsec_check_policy() for conn_policy_cached connections only if
2672 * conn_in_enforce_policy is set. But in this case,
2673 * conn_policy_cached can get set anytime since we made the
2674 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2675 * called, which will make the above assumption false. Thus, we
2676 * need to insert after we set conn_policy_cached.
2677 */
2678 error = ipsec_conn_cache_policy(connp,
2679 connp->conn_ipversion == IPV4_VERSION);
2680 if (error != 0)
2681 return (error);
2682
2683 /*
2684 * We defer to do LSO check until here since now we have better idea
2685 * whether IPsec is present. If the underlying ill is LSO capable,
2686 * copy its capability in so the ULP can decide whether to enable LSO
2687 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2688 * claim LSO for IPv6.
2689 *
2690 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2691 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2692 */
2693 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2694
2695 ASSERT(ixa->ixa_ire != NULL);
2696 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2697 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2698 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2699 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2700 (ixa->ixa_nce != NULL) &&
2701 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2702 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2703 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2704 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2705 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2706 }
2707
2708 /* Check whether ZEROCOPY capability is usable for this connection. */
2709 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2710
2711 if ((flags & IPDF_ZCOPY) &&
2712 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2713 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2714 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2715 (ixa->ixa_nce != NULL) &&
2716 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2717 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2718 }
2719 return (0);
2720 }
2721
2722 /*
2723 * Predicates to check if the addresses match conn_last*
2724 */
2725
2726 /*
2727 * Compare the conn against an address.
2728 * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2729 */
2730 boolean_t
conn_same_as_last_v4(conn_t * connp,sin_t * sin)2731 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2732 {
2733 ASSERT(connp->conn_family == AF_INET);
2734 return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2735 sin->sin_port == connp->conn_lastdstport);
2736 }
2737
2738 /*
2739 * Compare, including for mapped addresses
2740 */
2741 boolean_t
conn_same_as_last_v6(conn_t * connp,sin6_t * sin6)2742 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2743 {
2744 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2745 sin6->sin6_port == connp->conn_lastdstport &&
2746 sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2747 sin6->sin6_scope_id == connp->conn_lastscopeid);
2748 }
2749
2750 /*
2751 * Compute a label and place it in the ip_packet_t.
2752 * Handles IPv4 and IPv6.
2753 * The caller should have a correct ixa_tsl and ixa_zoneid and have
2754 * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2755 * has been called.
2756 */
2757 int
conn_update_label(const conn_t * connp,const ip_xmit_attr_t * ixa,const in6_addr_t * v6dst,ip_pkt_t * ipp)2758 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2759 const in6_addr_t *v6dst, ip_pkt_t *ipp)
2760 {
2761 int err;
2762 ipaddr_t v4dst;
2763
2764 if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2765 uchar_t opt_storage[IP_MAX_OPT_LENGTH];
2766
2767 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2768
2769 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2770 v4dst, opt_storage, ixa->ixa_ipst);
2771 if (err == 0) {
2772 /* Length contained in opt_storage[IPOPT_OLEN] */
2773 err = optcom_pkt_set(opt_storage,
2774 opt_storage[IPOPT_OLEN],
2775 (uchar_t **)&ipp->ipp_label_v4,
2776 &ipp->ipp_label_len_v4);
2777 }
2778 if (err != 0) {
2779 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2780 char *, "conn(1) failed to update options(2) "
2781 "on ixa(3)",
2782 conn_t *, connp, char *, opt_storage,
2783 ip_xmit_attr_t *, ixa);
2784 }
2785 if (ipp->ipp_label_len_v4 != 0)
2786 ipp->ipp_fields |= IPPF_LABEL_V4;
2787 else
2788 ipp->ipp_fields &= ~IPPF_LABEL_V4;
2789 } else {
2790 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
2791 uint_t optlen;
2792
2793 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2794 v6dst, opt_storage, ixa->ixa_ipst);
2795 if (err == 0) {
2796 /*
2797 * Note that ipp_label_v6 is just the option - not
2798 * the hopopts extension header.
2799 *
2800 * Length contained in opt_storage[IPOPT_OLEN], but
2801 * that doesn't include the two byte options header.
2802 */
2803 optlen = opt_storage[IPOPT_OLEN];
2804 if (optlen != 0)
2805 optlen += 2;
2806
2807 err = optcom_pkt_set(opt_storage, optlen,
2808 (uchar_t **)&ipp->ipp_label_v6,
2809 &ipp->ipp_label_len_v6);
2810 }
2811 if (err != 0) {
2812 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2813 char *, "conn(1) failed to update options(2) "
2814 "on ixa(3)",
2815 conn_t *, connp, char *, opt_storage,
2816 ip_xmit_attr_t *, ixa);
2817 }
2818 if (ipp->ipp_label_len_v6 != 0)
2819 ipp->ipp_fields |= IPPF_LABEL_V6;
2820 else
2821 ipp->ipp_fields &= ~IPPF_LABEL_V6;
2822 }
2823 return (err);
2824 }
2825
2826 /*
2827 * Inherit all options settings from the parent/listener to the eager.
2828 * Returns zero on success; ENOMEM if memory allocation failed.
2829 *
2830 * We assume that the eager has not had any work done i.e., the conn_ixa
2831 * and conn_xmit_ipp are all zero.
2832 * Furthermore we assume that no other thread can access the eager (because
2833 * it isn't inserted in any fanout list).
2834 */
2835 int
conn_inherit_parent(conn_t * lconnp,conn_t * econnp)2836 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2837 {
2838 cred_t *credp;
2839 int err;
2840 void *notify_cookie;
2841 uint32_t xmit_hint;
2842
2843 econnp->conn_family = lconnp->conn_family;
2844 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2845 econnp->conn_wq = lconnp->conn_wq;
2846 econnp->conn_rq = lconnp->conn_rq;
2847
2848 /*
2849 * Make a safe copy of the transmit attributes.
2850 * conn_connect will later be used by the caller to setup the ire etc.
2851 */
2852 ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2853 ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2854 ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2855 ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2856
2857 /* Preserve ixa_notify_cookie and xmit_hint */
2858 notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2859 xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2860 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2861 econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2862 econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2863
2864 econnp->conn_bound_if = lconnp->conn_bound_if;
2865 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2866
2867 /* Inherit all RECV options */
2868 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2869
2870 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2871 KM_NOSLEEP);
2872 if (err != 0)
2873 return (err);
2874
2875 econnp->conn_zoneid = lconnp->conn_zoneid;
2876 econnp->conn_allzones = lconnp->conn_allzones;
2877
2878 /* This is odd. Pick a flowlabel for each connection instead? */
2879 econnp->conn_flowinfo = lconnp->conn_flowinfo;
2880
2881 econnp->conn_default_ttl = lconnp->conn_default_ttl;
2882
2883 /*
2884 * TSOL: tsol_input_proc() needs the eager's cred before the
2885 * eager is accepted
2886 */
2887 ASSERT(lconnp->conn_cred != NULL);
2888 econnp->conn_cred = credp = lconnp->conn_cred;
2889 crhold(credp);
2890 econnp->conn_cpid = lconnp->conn_cpid;
2891 econnp->conn_open_time = ddi_get_lbolt64();
2892
2893 /*
2894 * Cache things in the ixa without any refhold.
2895 * Listener might not have set up ixa_cred
2896 */
2897 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2898 econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2899 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2900 if (is_system_labeled())
2901 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2902
2903 /*
2904 * If the caller has the process-wide flag set, then default to MAC
2905 * exempt mode. This allows read-down to unlabeled hosts.
2906 */
2907 if (getpflags(NET_MAC_AWARE, credp) != 0)
2908 econnp->conn_mac_mode = CONN_MAC_AWARE;
2909
2910 econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2911
2912 /*
2913 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2914 * via soaccept()->soinheritoptions() which essentially applies
2915 * all the listener options to the new connection. The options that we
2916 * need to take care of are:
2917 * SO_DEBUG, SO_REUSEADDR, SO_REUSEPORT, SO_KEEPALIVE, SO_DONTROUTE,
2918 * SO_BROADCAST, SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND,
2919 * SO_LINGER, SO_SNDBUF, SO_RCVBUF.
2920 *
2921 * SO_RCVBUF: conn_rcvbuf is set.
2922 * SO_SNDBUF: conn_sndbuf is set.
2923 */
2924
2925 /* Could we define a struct and use a struct copy for this? */
2926 econnp->conn_sndbuf = lconnp->conn_sndbuf;
2927 econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2928 econnp->conn_sndlowat = lconnp->conn_sndlowat;
2929 econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2930 econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2931 econnp->conn_oobinline = lconnp->conn_oobinline;
2932 econnp->conn_debug = lconnp->conn_debug;
2933 econnp->conn_keepalive = lconnp->conn_keepalive;
2934 econnp->conn_linger = lconnp->conn_linger;
2935 econnp->conn_lingertime = lconnp->conn_lingertime;
2936
2937 /* Set the IP options */
2938 econnp->conn_broadcast = lconnp->conn_broadcast;
2939 econnp->conn_useloopback = lconnp->conn_useloopback;
2940 econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2941 econnp->conn_reuseport = lconnp->conn_reuseport;
2942 return (0);
2943 }
2944