1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 25 * Copyright 2024 Oxide Computer Company 26 */ 27 /* Copyright (c) 1990 Mentat Inc. */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/xti_inet.h> 35 #include <sys/ucred.h> 36 #include <sys/zone.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/debug.h> 41 #include <sys/atomic.h> 42 #include <sys/policy.h> 43 44 #include <sys/systm.h> 45 #include <sys/param.h> 46 #include <sys/kmem.h> 47 #include <sys/sdt.h> 48 #include <sys/socket.h> 49 #include <sys/ethernet.h> 50 #include <sys/mac.h> 51 #include <net/if.h> 52 #include <net/if_types.h> 53 #include <net/if_arp.h> 54 #include <net/route.h> 55 #include <sys/sockio.h> 56 #include <netinet/in.h> 57 #include <net/if_dl.h> 58 59 #include <inet/common.h> 60 #include <inet/mi.h> 61 #include <inet/mib2.h> 62 #include <inet/nd.h> 63 #include <inet/arp.h> 64 #include <inet/snmpcom.h> 65 #include <inet/kstatcom.h> 66 67 #include <netinet/igmp_var.h> 68 #include <netinet/ip6.h> 69 #include <netinet/icmp6.h> 70 #include <netinet/sctp.h> 71 72 #include <inet/ip.h> 73 #include <inet/ip_impl.h> 74 #include <inet/ip6.h> 75 #include <inet/ip6_asp.h> 76 #include <inet/tcp.h> 77 #include <inet/ip_multi.h> 78 #include <inet/ip_if.h> 79 #include <inet/ip_ire.h> 80 #include <inet/ip_ftable.h> 81 #include <inet/ip_rts.h> 82 #include <inet/optcom.h> 83 #include <inet/ip_ndp.h> 84 #include <inet/ip_listutils.h> 85 #include <netinet/igmp.h> 86 #include <netinet/ip_mroute.h> 87 #include <netinet/udp.h> 88 #include <inet/ipp_common.h> 89 90 #include <net/pfkeyv2.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/ipdrop.h> 94 #include <inet/ip_netinfo.h> 95 96 #include <inet/ipclassifier.h> 97 #include <inet/sctp_ip.h> 98 #include <inet/sctp/sctp_impl.h> 99 #include <inet/udp_impl.h> 100 #include <sys/sunddi.h> 101 102 #include <sys/tsol/label.h> 103 #include <sys/tsol/tnet.h> 104 105 /* 106 * Return how much size is needed for the different ancillary data items 107 */ 108 uint_t 109 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary, 110 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp) 111 { 112 uint_t ancil_size; 113 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 114 115 /* 116 * If IP_RECVDSTADDR is set we include the destination IP 117 * address as an option. With IP_RECVOPTS we include all 118 * the IP options. 119 */ 120 ancil_size = 0; 121 if (recv_ancillary.crb_recvdstaddr && 122 (ira->ira_flags & IRAF_IS_IPV4)) { 123 ancil_size += sizeof (struct T_opthdr) + 124 sizeof (struct in_addr); 125 IP_STAT(ipst, conn_in_recvdstaddr); 126 } 127 128 /* 129 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but 130 * are different 131 */ 132 if (recv_ancillary.crb_ip_recvpktinfo && 133 connp->conn_family == AF_INET) { 134 ancil_size += sizeof (struct T_opthdr) + 135 sizeof (struct in_pktinfo); 136 IP_STAT(ipst, conn_in_recvpktinfo); 137 } 138 139 if ((recv_ancillary.crb_recvopts) && 140 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) { 141 ancil_size += sizeof (struct T_opthdr) + 142 ipp->ipp_ipv4_options_len; 143 IP_STAT(ipst, conn_in_recvopts); 144 } 145 146 if (recv_ancillary.crb_recvslla) { 147 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 148 ill_t *ill; 149 150 /* Make sure ira_l2src is setup if not already */ 151 if (!(ira->ira_flags & IRAF_L2SRC_SET)) { 152 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, 153 ipst); 154 if (ill != NULL) { 155 ip_setl2src(mp, ira, ill); 156 ill_refrele(ill); 157 } 158 } 159 ancil_size += sizeof (struct T_opthdr) + 160 sizeof (struct sockaddr_dl); 161 IP_STAT(ipst, conn_in_recvslla); 162 } 163 164 if (recv_ancillary.crb_recvif) { 165 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t); 166 IP_STAT(ipst, conn_in_recvif); 167 } 168 169 /* 170 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but 171 * are different 172 */ 173 if (recv_ancillary.crb_ip_recvpktinfo && 174 connp->conn_family == AF_INET6) { 175 ancil_size += sizeof (struct T_opthdr) + 176 sizeof (struct in6_pktinfo); 177 IP_STAT(ipst, conn_in_recvpktinfo); 178 } 179 180 if (recv_ancillary.crb_ipv6_recvhoplimit) { 181 ancil_size += sizeof (struct T_opthdr) + sizeof (int); 182 IP_STAT(ipst, conn_in_recvhoplimit); 183 } 184 185 if (recv_ancillary.crb_ipv6_recvtclass) { 186 ancil_size += sizeof (struct T_opthdr) + sizeof (int); 187 IP_STAT(ipst, conn_in_recvtclass); 188 } 189 190 if (recv_ancillary.crb_ipv6_recvhopopts && 191 (ipp->ipp_fields & IPPF_HOPOPTS)) { 192 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen; 193 IP_STAT(ipst, conn_in_recvhopopts); 194 } 195 /* 196 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS 197 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination 198 * options that appear before a routing header. 199 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set. 200 */ 201 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) { 202 if (recv_ancillary.crb_ipv6_recvrthdrdstopts || 203 (recv_ancillary.crb_ipv6_recvdstopts && 204 recv_ancillary.crb_ipv6_recvrthdr)) { 205 ancil_size += sizeof (struct T_opthdr) + 206 ipp->ipp_rthdrdstoptslen; 207 IP_STAT(ipst, conn_in_recvrthdrdstopts); 208 } 209 } 210 if ((recv_ancillary.crb_ipv6_recvrthdr) && 211 (ipp->ipp_fields & IPPF_RTHDR)) { 212 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen; 213 IP_STAT(ipst, conn_in_recvrthdr); 214 } 215 if ((recv_ancillary.crb_ipv6_recvdstopts || 216 recv_ancillary.crb_old_ipv6_recvdstopts) && 217 (ipp->ipp_fields & IPPF_DSTOPTS)) { 218 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen; 219 IP_STAT(ipst, conn_in_recvdstopts); 220 } 221 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) { 222 ancil_size += sizeof (struct T_opthdr) + 223 ucredminsize(ira->ira_cred); 224 IP_STAT(ipst, conn_in_recvucred); 225 } 226 227 /* 228 * If SO_TIMESTAMP is set allocate the appropriate sized 229 * buffer. Since gethrestime() expects a pointer aligned 230 * argument, we allocate space necessary for extra 231 * alignment (even though it might not be used). 232 */ 233 if (recv_ancillary.crb_timestamp) { 234 ancil_size += sizeof (struct T_opthdr) + 235 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 236 IP_STAT(ipst, conn_in_timestamp); 237 } 238 239 /* 240 * If IP_RECVTOS is set allocate the appropriately sized buffer 241 */ 242 if (recv_ancillary.crb_recvtos && 243 (ira->ira_flags & IRAF_IS_IPV4)) { 244 ancil_size += sizeof (struct T_opthdr) + 245 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE); 246 IP_STAT(ipst, conn_in_recvtos); 247 } 248 249 /* 250 * If IP_RECVTTL is set allocate the appropriate sized buffer 251 */ 252 if (recv_ancillary.crb_recvttl && 253 (ira->ira_flags & IRAF_IS_IPV4)) { 254 ancil_size += sizeof (struct T_opthdr) + 255 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE); 256 IP_STAT(ipst, conn_in_recvttl); 257 } 258 259 return (ancil_size); 260 } 261 262 /* 263 * Lay down the ancillary data items at "ancil_buf". 264 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently 265 * large buffer - ancil_size. 266 */ 267 void 268 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary, 269 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size) 270 { 271 /* 272 * Copy in destination address before options to avoid 273 * any padding issues. 274 */ 275 if (recv_ancillary.crb_recvdstaddr && 276 (ira->ira_flags & IRAF_IS_IPV4)) { 277 struct T_opthdr *toh; 278 ipaddr_t *dstptr; 279 280 toh = (struct T_opthdr *)ancil_buf; 281 toh->level = IPPROTO_IP; 282 toh->name = IP_RECVDSTADDR; 283 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 284 toh->status = 0; 285 ancil_buf += sizeof (struct T_opthdr); 286 dstptr = (ipaddr_t *)ancil_buf; 287 *dstptr = ipp->ipp_addr_v4; 288 ancil_buf += sizeof (ipaddr_t); 289 ancil_size -= toh->len; 290 } 291 292 /* 293 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but 294 * are different 295 */ 296 if (recv_ancillary.crb_ip_recvpktinfo && 297 connp->conn_family == AF_INET) { 298 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 299 struct T_opthdr *toh; 300 struct in_pktinfo *pktinfop; 301 ill_t *ill; 302 ipif_t *ipif; 303 304 toh = (struct T_opthdr *)ancil_buf; 305 toh->level = IPPROTO_IP; 306 toh->name = IP_PKTINFO; 307 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop); 308 toh->status = 0; 309 ancil_buf += sizeof (struct T_opthdr); 310 pktinfop = (struct in_pktinfo *)ancil_buf; 311 312 pktinfop->ipi_ifindex = ira->ira_ruifindex; 313 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY; 314 315 /* Find a good address to report */ 316 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst); 317 if (ill != NULL) { 318 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp)); 319 if (ipif != NULL) { 320 pktinfop->ipi_spec_dst.s_addr = 321 ipif->ipif_lcl_addr; 322 ipif_refrele(ipif); 323 } 324 ill_refrele(ill); 325 } 326 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4; 327 ancil_buf += sizeof (struct in_pktinfo); 328 ancil_size -= toh->len; 329 } 330 331 if ((recv_ancillary.crb_recvopts) && 332 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) { 333 struct T_opthdr *toh; 334 335 toh = (struct T_opthdr *)ancil_buf; 336 toh->level = IPPROTO_IP; 337 toh->name = IP_RECVOPTS; 338 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len; 339 toh->status = 0; 340 ancil_buf += sizeof (struct T_opthdr); 341 bcopy(ipp->ipp_ipv4_options, ancil_buf, 342 ipp->ipp_ipv4_options_len); 343 ancil_buf += ipp->ipp_ipv4_options_len; 344 ancil_size -= toh->len; 345 } 346 347 if (recv_ancillary.crb_recvslla) { 348 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 349 struct T_opthdr *toh; 350 struct sockaddr_dl *dstptr; 351 ill_t *ill; 352 int alen = 0; 353 354 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst); 355 if (ill != NULL) 356 alen = ill->ill_phys_addr_length; 357 358 /* 359 * For loopback multicast and broadcast the packet arrives 360 * with ira_ruifdex being the physical interface, but 361 * ira_l2src is all zero since ip_postfrag_loopback doesn't 362 * know our l2src. We don't report the address in that case. 363 */ 364 if (ira->ira_flags & IRAF_LOOPBACK) 365 alen = 0; 366 367 toh = (struct T_opthdr *)ancil_buf; 368 toh->level = IPPROTO_IP; 369 toh->name = IP_RECVSLLA; 370 toh->len = sizeof (struct T_opthdr) + 371 sizeof (struct sockaddr_dl); 372 toh->status = 0; 373 ancil_buf += sizeof (struct T_opthdr); 374 dstptr = (struct sockaddr_dl *)ancil_buf; 375 dstptr->sdl_family = AF_LINK; 376 dstptr->sdl_index = ira->ira_ruifindex; 377 if (ill != NULL) 378 dstptr->sdl_type = ill->ill_type; 379 else 380 dstptr->sdl_type = 0; 381 dstptr->sdl_nlen = 0; 382 dstptr->sdl_alen = alen; 383 dstptr->sdl_slen = 0; 384 bcopy(ira->ira_l2src, dstptr->sdl_data, alen); 385 ancil_buf += sizeof (struct sockaddr_dl); 386 ancil_size -= toh->len; 387 if (ill != NULL) 388 ill_refrele(ill); 389 } 390 391 if (recv_ancillary.crb_recvif) { 392 struct T_opthdr *toh; 393 uint_t *dstptr; 394 395 toh = (struct T_opthdr *)ancil_buf; 396 toh->level = IPPROTO_IP; 397 toh->name = IP_RECVIF; 398 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t); 399 toh->status = 0; 400 ancil_buf += sizeof (struct T_opthdr); 401 dstptr = (uint_t *)ancil_buf; 402 *dstptr = ira->ira_ruifindex; 403 ancil_buf += sizeof (uint_t); 404 ancil_size -= toh->len; 405 } 406 407 /* 408 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but 409 * are different 410 */ 411 if (recv_ancillary.crb_ip_recvpktinfo && 412 connp->conn_family == AF_INET6) { 413 struct T_opthdr *toh; 414 struct in6_pktinfo *pkti; 415 416 toh = (struct T_opthdr *)ancil_buf; 417 toh->level = IPPROTO_IPV6; 418 toh->name = IPV6_PKTINFO; 419 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti); 420 toh->status = 0; 421 ancil_buf += sizeof (struct T_opthdr); 422 pkti = (struct in6_pktinfo *)ancil_buf; 423 if (ira->ira_flags & IRAF_IS_IPV4) { 424 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4, 425 &pkti->ipi6_addr); 426 } else { 427 pkti->ipi6_addr = ipp->ipp_addr; 428 } 429 pkti->ipi6_ifindex = ira->ira_ruifindex; 430 431 ancil_buf += sizeof (*pkti); 432 ancil_size -= toh->len; 433 } 434 if (recv_ancillary.crb_ipv6_recvhoplimit) { 435 struct T_opthdr *toh; 436 437 toh = (struct T_opthdr *)ancil_buf; 438 toh->level = IPPROTO_IPV6; 439 toh->name = IPV6_HOPLIMIT; 440 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t); 441 toh->status = 0; 442 ancil_buf += sizeof (struct T_opthdr); 443 *(uint_t *)ancil_buf = ipp->ipp_hoplimit; 444 ancil_buf += sizeof (uint_t); 445 ancil_size -= toh->len; 446 } 447 if (recv_ancillary.crb_ipv6_recvtclass) { 448 struct T_opthdr *toh; 449 450 toh = (struct T_opthdr *)ancil_buf; 451 toh->level = IPPROTO_IPV6; 452 toh->name = IPV6_TCLASS; 453 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t); 454 toh->status = 0; 455 ancil_buf += sizeof (struct T_opthdr); 456 457 if (ira->ira_flags & IRAF_IS_IPV4) 458 *(uint_t *)ancil_buf = ipp->ipp_type_of_service; 459 else 460 *(uint_t *)ancil_buf = ipp->ipp_tclass; 461 ancil_buf += sizeof (uint_t); 462 ancil_size -= toh->len; 463 } 464 if (recv_ancillary.crb_ipv6_recvhopopts && 465 (ipp->ipp_fields & IPPF_HOPOPTS)) { 466 struct T_opthdr *toh; 467 468 toh = (struct T_opthdr *)ancil_buf; 469 toh->level = IPPROTO_IPV6; 470 toh->name = IPV6_HOPOPTS; 471 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen; 472 toh->status = 0; 473 ancil_buf += sizeof (struct T_opthdr); 474 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen); 475 ancil_buf += ipp->ipp_hopoptslen; 476 ancil_size -= toh->len; 477 } 478 /* 479 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS 480 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination 481 * options that appear before a routing header. 482 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set. 483 */ 484 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) { 485 if (recv_ancillary.crb_ipv6_recvrthdrdstopts || 486 (recv_ancillary.crb_ipv6_recvdstopts && 487 recv_ancillary.crb_ipv6_recvrthdr)) { 488 struct T_opthdr *toh; 489 490 toh = (struct T_opthdr *)ancil_buf; 491 toh->level = IPPROTO_IPV6; 492 toh->name = IPV6_DSTOPTS; 493 toh->len = sizeof (struct T_opthdr) + 494 ipp->ipp_rthdrdstoptslen; 495 toh->status = 0; 496 ancil_buf += sizeof (struct T_opthdr); 497 bcopy(ipp->ipp_rthdrdstopts, ancil_buf, 498 ipp->ipp_rthdrdstoptslen); 499 ancil_buf += ipp->ipp_rthdrdstoptslen; 500 ancil_size -= toh->len; 501 } 502 } 503 if (recv_ancillary.crb_ipv6_recvrthdr && 504 (ipp->ipp_fields & IPPF_RTHDR)) { 505 struct T_opthdr *toh; 506 507 toh = (struct T_opthdr *)ancil_buf; 508 toh->level = IPPROTO_IPV6; 509 toh->name = IPV6_RTHDR; 510 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen; 511 toh->status = 0; 512 ancil_buf += sizeof (struct T_opthdr); 513 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen); 514 ancil_buf += ipp->ipp_rthdrlen; 515 ancil_size -= toh->len; 516 } 517 if ((recv_ancillary.crb_ipv6_recvdstopts || 518 recv_ancillary.crb_old_ipv6_recvdstopts) && 519 (ipp->ipp_fields & IPPF_DSTOPTS)) { 520 struct T_opthdr *toh; 521 522 toh = (struct T_opthdr *)ancil_buf; 523 toh->level = IPPROTO_IPV6; 524 toh->name = IPV6_DSTOPTS; 525 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen; 526 toh->status = 0; 527 ancil_buf += sizeof (struct T_opthdr); 528 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen); 529 ancil_buf += ipp->ipp_dstoptslen; 530 ancil_size -= toh->len; 531 } 532 533 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) { 534 struct T_opthdr *toh; 535 cred_t *rcr = connp->conn_cred; 536 537 toh = (struct T_opthdr *)ancil_buf; 538 toh->level = SOL_SOCKET; 539 toh->name = SCM_UCRED; 540 toh->len = sizeof (struct T_opthdr) + 541 ucredminsize(ira->ira_cred); 542 toh->status = 0; 543 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr); 544 ancil_buf += toh->len; 545 ancil_size -= toh->len; 546 } 547 if (recv_ancillary.crb_timestamp) { 548 struct T_opthdr *toh; 549 550 toh = (struct T_opthdr *)ancil_buf; 551 toh->level = SOL_SOCKET; 552 toh->name = SCM_TIMESTAMP; 553 toh->len = sizeof (struct T_opthdr) + 554 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 555 toh->status = 0; 556 ancil_buf += sizeof (struct T_opthdr); 557 /* Align for gethrestime() */ 558 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf, 559 sizeof (intptr_t)); 560 gethrestime((timestruc_t *)ancil_buf); 561 ancil_buf = (uchar_t *)toh + toh->len; 562 ancil_size -= toh->len; 563 } 564 565 if (recv_ancillary.crb_recvtos && 566 (ira->ira_flags & IRAF_IS_IPV4)) { 567 struct T_opthdr *toh; 568 uint8_t *dstptr; 569 570 toh = (struct T_opthdr *)ancil_buf; 571 toh->level = IPPROTO_IP; 572 toh->name = IP_RECVTOS; 573 toh->len = sizeof (struct T_opthdr) + 574 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE); 575 toh->status = 0; 576 ancil_buf += sizeof (struct T_opthdr); 577 dstptr = (uint8_t *)ancil_buf; 578 *dstptr = ipp->ipp_type_of_service; 579 ancil_buf = (uchar_t *)toh + toh->len; 580 ancil_size -= toh->len; 581 ASSERT(__TPI_TOPT_ISALIGNED(toh)); 582 } 583 584 if (recv_ancillary.crb_recvttl && 585 (ira->ira_flags & IRAF_IS_IPV4)) { 586 struct T_opthdr *toh; 587 uint8_t *dstptr; 588 589 toh = (struct T_opthdr *)ancil_buf; 590 toh->level = IPPROTO_IP; 591 toh->name = IP_RECVTTL; 592 toh->len = sizeof (struct T_opthdr) + 593 P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE); 594 toh->status = 0; 595 ancil_buf += sizeof (struct T_opthdr); 596 dstptr = (uint8_t *)ancil_buf; 597 *dstptr = ipp->ipp_hoplimit; 598 ancil_buf = (uchar_t *)toh + toh->len; 599 ancil_size -= toh->len; 600 ASSERT(__TPI_TOPT_ISALIGNED(toh)); 601 } 602 603 /* Consumed all of allocated space */ 604 ASSERT(ancil_size == 0); 605 606 } 607 608 /* 609 * This routine retrieves the current status of socket options. 610 * It returns the size of the option retrieved, or -1. 611 */ 612 int 613 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name, 614 uchar_t *ptr) 615 { 616 int *i1 = (int *)ptr; 617 conn_t *connp = coa->coa_connp; 618 ip_xmit_attr_t *ixa = coa->coa_ixa; 619 ip_pkt_t *ipp = coa->coa_ipp; 620 ip_stack_t *ipst = ixa->ixa_ipst; 621 uint_t len; 622 623 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock)); 624 625 switch (level) { 626 case SOL_SOCKET: 627 switch (name) { 628 case SO_DEBUG: 629 *i1 = connp->conn_debug ? SO_DEBUG : 0; 630 break; /* goto sizeof (int) option return */ 631 case SO_KEEPALIVE: 632 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0; 633 break; 634 case SO_LINGER: { 635 struct linger *lgr = (struct linger *)ptr; 636 637 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0; 638 lgr->l_linger = connp->conn_lingertime; 639 } 640 return (sizeof (struct linger)); 641 642 case SO_OOBINLINE: 643 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0; 644 break; 645 case SO_REUSEADDR: 646 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0; 647 break; /* goto sizeof (int) option return */ 648 case SO_TYPE: 649 *i1 = connp->conn_so_type; 650 break; /* goto sizeof (int) option return */ 651 case SO_DONTROUTE: 652 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ? 653 SO_DONTROUTE : 0; 654 break; /* goto sizeof (int) option return */ 655 case SO_USELOOPBACK: 656 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0; 657 break; /* goto sizeof (int) option return */ 658 case SO_BROADCAST: 659 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0; 660 break; /* goto sizeof (int) option return */ 661 662 case SO_SNDBUF: 663 *i1 = connp->conn_sndbuf; 664 break; /* goto sizeof (int) option return */ 665 case SO_RCVBUF: 666 *i1 = connp->conn_rcvbuf; 667 break; /* goto sizeof (int) option return */ 668 case SO_RCVTIMEO: 669 case SO_SNDTIMEO: 670 /* 671 * Pass these two options in order for third part 672 * protocol usage. Here just return directly. 673 */ 674 *i1 = 0; 675 break; 676 case SO_DGRAM_ERRIND: 677 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0; 678 break; /* goto sizeof (int) option return */ 679 case SO_RECVUCRED: 680 *i1 = connp->conn_recv_ancillary.crb_recvucred; 681 break; /* goto sizeof (int) option return */ 682 case SO_TIMESTAMP: 683 *i1 = connp->conn_recv_ancillary.crb_timestamp; 684 break; /* goto sizeof (int) option return */ 685 case SO_VRRP: 686 *i1 = connp->conn_isvrrp; 687 break; /* goto sizeof (int) option return */ 688 case SO_ANON_MLP: 689 *i1 = connp->conn_anon_mlp; 690 break; /* goto sizeof (int) option return */ 691 case SO_MAC_EXEMPT: 692 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE); 693 break; /* goto sizeof (int) option return */ 694 case SO_MAC_IMPLICIT: 695 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT); 696 break; /* goto sizeof (int) option return */ 697 case SO_ALLZONES: 698 *i1 = connp->conn_allzones; 699 break; /* goto sizeof (int) option return */ 700 case SO_EXCLBIND: 701 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0; 702 break; 703 case SO_PROTOTYPE: 704 *i1 = connp->conn_proto; 705 break; 706 707 case SO_DOMAIN: 708 *i1 = connp->conn_family; 709 break; 710 default: 711 return (-1); 712 } 713 break; 714 case IPPROTO_IP: 715 if (connp->conn_family != AF_INET) 716 return (-1); 717 switch (name) { 718 case IP_OPTIONS: 719 case T_IP_OPTIONS: 720 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS)) 721 return (0); 722 723 len = ipp->ipp_ipv4_options_len; 724 if (len > 0) { 725 bcopy(ipp->ipp_ipv4_options, ptr, len); 726 } 727 return (len); 728 729 case IP_PKTINFO: { 730 /* 731 * This also handles IP_RECVPKTINFO. 732 * IP_PKTINFO and IP_RECVPKTINFO have same value. 733 * Differentiation is based on the size of the 734 * argument passed in. 735 */ 736 struct in_pktinfo *pktinfo; 737 738 #ifdef notdef 739 /* optcom doesn't provide a length with "get" */ 740 if (inlen == sizeof (int)) { 741 /* This is IP_RECVPKTINFO option. */ 742 *i1 = connp->conn_recv_ancillary. 743 crb_ip_recvpktinfo; 744 return (sizeof (int)); 745 } 746 #endif 747 /* XXX assumes that caller has room for max size! */ 748 749 pktinfo = (struct in_pktinfo *)ptr; 750 pktinfo->ipi_ifindex = ixa->ixa_ifindex; 751 if (ipp->ipp_fields & IPPF_ADDR) 752 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4; 753 else 754 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY; 755 return (sizeof (struct in_pktinfo)); 756 } 757 case IP_DONTFRAG: 758 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0; 759 return (sizeof (int)); 760 case IP_TOS: 761 case T_IP_TOS: 762 *i1 = (int)ipp->ipp_type_of_service; 763 break; /* goto sizeof (int) option return */ 764 case IP_TTL: 765 *i1 = (int)ipp->ipp_unicast_hops; 766 break; /* goto sizeof (int) option return */ 767 case IP_DHCPINIT_IF: 768 return (-1); 769 case IP_NEXTHOP: 770 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) { 771 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4; 772 return (sizeof (ipaddr_t)); 773 } else { 774 return (0); 775 } 776 777 case IP_MULTICAST_IF: 778 /* 0 address if not set */ 779 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr; 780 return (sizeof (ipaddr_t)); 781 case IP_MULTICAST_TTL: 782 *(uchar_t *)ptr = ixa->ixa_multicast_ttl; 783 return (sizeof (uchar_t)); 784 case IP_MULTICAST_LOOP: 785 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0; 786 return (sizeof (uint8_t)); 787 case IP_RECVOPTS: 788 *i1 = connp->conn_recv_ancillary.crb_recvopts; 789 break; /* goto sizeof (int) option return */ 790 case IP_RECVDSTADDR: 791 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr; 792 break; /* goto sizeof (int) option return */ 793 case IP_RECVIF: 794 *i1 = connp->conn_recv_ancillary.crb_recvif; 795 break; /* goto sizeof (int) option return */ 796 case IP_RECVSLLA: 797 *i1 = connp->conn_recv_ancillary.crb_recvslla; 798 break; /* goto sizeof (int) option return */ 799 case IP_RECVTTL: 800 *i1 = connp->conn_recv_ancillary.crb_recvttl; 801 break; /* goto sizeof (int) option return */ 802 case IP_RECVTOS: 803 *i1 = connp->conn_recv_ancillary.crb_recvtos; 804 break; /* goto sizeof (int) option return */ 805 case IP_ADD_MEMBERSHIP: 806 case IP_DROP_MEMBERSHIP: 807 case MCAST_JOIN_GROUP: 808 case MCAST_LEAVE_GROUP: 809 case IP_BLOCK_SOURCE: 810 case IP_UNBLOCK_SOURCE: 811 case IP_ADD_SOURCE_MEMBERSHIP: 812 case IP_DROP_SOURCE_MEMBERSHIP: 813 case MCAST_BLOCK_SOURCE: 814 case MCAST_UNBLOCK_SOURCE: 815 case MCAST_JOIN_SOURCE_GROUP: 816 case MCAST_LEAVE_SOURCE_GROUP: 817 case MRT_INIT: 818 case MRT_DONE: 819 case MRT_ADD_VIF: 820 case MRT_DEL_VIF: 821 case MRT_ADD_MFC: 822 case MRT_DEL_MFC: 823 /* cannot "get" the value for these */ 824 return (-1); 825 case MRT_VERSION: 826 case MRT_ASSERT: 827 (void) ip_mrouter_get(name, connp, ptr); 828 return (sizeof (int)); 829 case IP_SEC_OPT: 830 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr, 831 IPSEC_AF_V4)); 832 case IP_BOUND_IF: 833 /* Zero if not set */ 834 *i1 = connp->conn_bound_if; 835 break; /* goto sizeof (int) option return */ 836 case IP_UNSPEC_SRC: 837 *i1 = connp->conn_unspec_src; 838 break; /* goto sizeof (int) option return */ 839 case IP_BROADCAST_TTL: 840 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET) 841 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl; 842 else 843 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl; 844 return (sizeof (uchar_t)); 845 case IP_MINTTL: 846 *i1 = connp->conn_min_ttl; 847 return (sizeof (int)); 848 default: 849 return (-1); 850 } 851 break; 852 case IPPROTO_IPV6: 853 if (connp->conn_family != AF_INET6) 854 return (-1); 855 switch (name) { 856 case IPV6_UNICAST_HOPS: 857 *i1 = (int)ipp->ipp_unicast_hops; 858 break; /* goto sizeof (int) option return */ 859 case IPV6_MULTICAST_IF: 860 /* 0 index if not set */ 861 *i1 = ixa->ixa_multicast_ifindex; 862 break; /* goto sizeof (int) option return */ 863 case IPV6_MULTICAST_HOPS: 864 *i1 = ixa->ixa_multicast_ttl; 865 break; /* goto sizeof (int) option return */ 866 case IPV6_MULTICAST_LOOP: 867 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0; 868 break; /* goto sizeof (int) option return */ 869 case IPV6_JOIN_GROUP: 870 case IPV6_LEAVE_GROUP: 871 case MCAST_JOIN_GROUP: 872 case MCAST_LEAVE_GROUP: 873 case MCAST_BLOCK_SOURCE: 874 case MCAST_UNBLOCK_SOURCE: 875 case MCAST_JOIN_SOURCE_GROUP: 876 case MCAST_LEAVE_SOURCE_GROUP: 877 /* cannot "get" the value for these */ 878 return (-1); 879 case IPV6_BOUND_IF: 880 /* Zero if not set */ 881 *i1 = connp->conn_bound_if; 882 break; /* goto sizeof (int) option return */ 883 case IPV6_UNSPEC_SRC: 884 *i1 = connp->conn_unspec_src; 885 break; /* goto sizeof (int) option return */ 886 case IPV6_RECVPKTINFO: 887 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo; 888 break; /* goto sizeof (int) option return */ 889 case IPV6_RECVTCLASS: 890 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass; 891 break; /* goto sizeof (int) option return */ 892 case IPV6_RECVPATHMTU: 893 *i1 = connp->conn_ipv6_recvpathmtu; 894 break; /* goto sizeof (int) option return */ 895 case IPV6_RECVHOPLIMIT: 896 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit; 897 break; /* goto sizeof (int) option return */ 898 case IPV6_RECVHOPOPTS: 899 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts; 900 break; /* goto sizeof (int) option return */ 901 case IPV6_RECVDSTOPTS: 902 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts; 903 break; /* goto sizeof (int) option return */ 904 case _OLD_IPV6_RECVDSTOPTS: 905 *i1 = 906 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts; 907 break; /* goto sizeof (int) option return */ 908 case IPV6_RECVRTHDRDSTOPTS: 909 *i1 = connp->conn_recv_ancillary. 910 crb_ipv6_recvrthdrdstopts; 911 break; /* goto sizeof (int) option return */ 912 case IPV6_RECVRTHDR: 913 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr; 914 break; /* goto sizeof (int) option return */ 915 case IPV6_PKTINFO: { 916 /* XXX assumes that caller has room for max size! */ 917 struct in6_pktinfo *pkti; 918 919 pkti = (struct in6_pktinfo *)ptr; 920 pkti->ipi6_ifindex = ixa->ixa_ifindex; 921 if (ipp->ipp_fields & IPPF_ADDR) 922 pkti->ipi6_addr = ipp->ipp_addr; 923 else 924 pkti->ipi6_addr = ipv6_all_zeros; 925 return (sizeof (struct in6_pktinfo)); 926 } 927 case IPV6_TCLASS: 928 *i1 = ipp->ipp_tclass; 929 break; /* goto sizeof (int) option return */ 930 case IPV6_NEXTHOP: { 931 sin6_t *sin6 = (sin6_t *)ptr; 932 933 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) 934 return (0); 935 936 *sin6 = sin6_null; 937 sin6->sin6_family = AF_INET6; 938 sin6->sin6_addr = ixa->ixa_nexthop_v6; 939 940 return (sizeof (sin6_t)); 941 } 942 case IPV6_HOPOPTS: 943 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 944 return (0); 945 bcopy(ipp->ipp_hopopts, ptr, 946 ipp->ipp_hopoptslen); 947 return (ipp->ipp_hopoptslen); 948 case IPV6_RTHDRDSTOPTS: 949 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS)) 950 return (0); 951 bcopy(ipp->ipp_rthdrdstopts, ptr, 952 ipp->ipp_rthdrdstoptslen); 953 return (ipp->ipp_rthdrdstoptslen); 954 case IPV6_RTHDR: 955 if (!(ipp->ipp_fields & IPPF_RTHDR)) 956 return (0); 957 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 958 return (ipp->ipp_rthdrlen); 959 case IPV6_DSTOPTS: 960 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 961 return (0); 962 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 963 return (ipp->ipp_dstoptslen); 964 case IPV6_PATHMTU: 965 return (ip_fill_mtuinfo(connp, ixa, 966 (struct ip6_mtuinfo *)ptr)); 967 case IPV6_SEC_OPT: 968 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr, 969 IPSEC_AF_V6)); 970 case IPV6_SRC_PREFERENCES: 971 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr)); 972 case IPV6_DONTFRAG: 973 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0; 974 return (sizeof (int)); 975 case IPV6_USE_MIN_MTU: 976 if (ixa->ixa_flags & IXAF_USE_MIN_MTU) 977 *i1 = ixa->ixa_use_min_mtu; 978 else 979 *i1 = IPV6_USE_MIN_MTU_MULTICAST; 980 break; 981 case IPV6_V6ONLY: 982 *i1 = connp->conn_ipv6_v6only; 983 return (sizeof (int)); 984 case IPV6_MINHOPCOUNT: 985 *i1 = connp->conn_min_ttl; 986 return (sizeof (int)); 987 default: 988 return (-1); 989 } 990 break; 991 case IPPROTO_UDP: 992 switch (name) { 993 case UDP_ANONPRIVBIND: 994 *i1 = connp->conn_anon_priv_bind; 995 break; 996 case UDP_EXCLBIND: 997 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0; 998 break; 999 default: 1000 return (-1); 1001 } 1002 break; 1003 case IPPROTO_TCP: 1004 switch (name) { 1005 case TCP_RECVDSTADDR: 1006 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr; 1007 break; 1008 case TCP_ANONPRIVBIND: 1009 *i1 = connp->conn_anon_priv_bind; 1010 break; 1011 case TCP_EXCLBIND: 1012 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0; 1013 break; 1014 default: 1015 return (-1); 1016 } 1017 break; 1018 default: 1019 return (-1); 1020 } 1021 return (sizeof (int)); 1022 } 1023 1024 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, 1025 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr); 1026 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, 1027 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr); 1028 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, 1029 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr); 1030 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, 1031 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr); 1032 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, 1033 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr); 1034 1035 /* 1036 * This routine sets the most common socket options including some 1037 * that are transport/ULP specific. 1038 * It returns errno or zero. 1039 * 1040 * For fixed length options, there is no sanity check 1041 * of passed in length is done. It is assumed *_optcom_req() 1042 * routines do the right thing. 1043 */ 1044 int 1045 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name, 1046 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr) 1047 { 1048 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1049 1050 /* We have different functions for different levels */ 1051 switch (level) { 1052 case SOL_SOCKET: 1053 return (conn_opt_set_socket(coa, name, inlen, invalp, 1054 checkonly, cr)); 1055 case IPPROTO_IP: 1056 return (conn_opt_set_ip(coa, name, inlen, invalp, 1057 checkonly, cr)); 1058 case IPPROTO_IPV6: 1059 return (conn_opt_set_ipv6(coa, name, inlen, invalp, 1060 checkonly, cr)); 1061 case IPPROTO_UDP: 1062 return (conn_opt_set_udp(coa, name, inlen, invalp, 1063 checkonly, cr)); 1064 case IPPROTO_TCP: 1065 return (conn_opt_set_tcp(coa, name, inlen, invalp, 1066 checkonly, cr)); 1067 default: 1068 return (0); 1069 } 1070 } 1071 1072 /* 1073 * Handle SOL_SOCKET 1074 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support 1075 * it implement their own checks and setting of conn_proto. 1076 */ 1077 /* ARGSUSED1 */ 1078 static int 1079 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, 1080 uchar_t *invalp, boolean_t checkonly, cred_t *cr) 1081 { 1082 conn_t *connp = coa->coa_connp; 1083 ip_xmit_attr_t *ixa = coa->coa_ixa; 1084 int *i1 = (int *)invalp; 1085 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1086 1087 switch (name) { 1088 case SO_ALLZONES: 1089 if (IPCL_IS_BOUND(connp)) 1090 return (EINVAL); 1091 break; 1092 case SO_VRRP: 1093 if (secpolicy_ip_config(cr, checkonly) != 0) 1094 return (EACCES); 1095 break; 1096 case SO_MAC_EXEMPT: 1097 if (secpolicy_net_mac_aware(cr) != 0) 1098 return (EACCES); 1099 if (IPCL_IS_BOUND(connp)) 1100 return (EINVAL); 1101 break; 1102 case SO_MAC_IMPLICIT: 1103 if (secpolicy_net_mac_implicit(cr) != 0) 1104 return (EACCES); 1105 break; 1106 } 1107 if (checkonly) 1108 return (0); 1109 1110 mutex_enter(&connp->conn_lock); 1111 /* Here we set the actual option value */ 1112 switch (name) { 1113 case SO_DEBUG: 1114 connp->conn_debug = onoff; 1115 break; 1116 case SO_KEEPALIVE: 1117 connp->conn_keepalive = onoff; 1118 break; 1119 case SO_LINGER: { 1120 struct linger *lgr = (struct linger *)invalp; 1121 1122 if (lgr->l_onoff) { 1123 connp->conn_linger = 1; 1124 connp->conn_lingertime = lgr->l_linger; 1125 } else { 1126 connp->conn_linger = 0; 1127 connp->conn_lingertime = 0; 1128 } 1129 break; 1130 } 1131 case SO_OOBINLINE: 1132 connp->conn_oobinline = onoff; 1133 coa->coa_changed |= COA_OOBINLINE_CHANGED; 1134 break; 1135 case SO_REUSEADDR: 1136 connp->conn_reuseaddr = onoff; 1137 break; 1138 case SO_DONTROUTE: 1139 if (onoff) 1140 ixa->ixa_flags |= IXAF_DONTROUTE; 1141 else 1142 ixa->ixa_flags &= ~IXAF_DONTROUTE; 1143 coa->coa_changed |= COA_ROUTE_CHANGED; 1144 break; 1145 case SO_USELOOPBACK: 1146 connp->conn_useloopback = onoff; 1147 break; 1148 case SO_BROADCAST: 1149 connp->conn_broadcast = onoff; 1150 break; 1151 case SO_SNDBUF: 1152 /* ULP has range checked the value */ 1153 connp->conn_sndbuf = *i1; 1154 coa->coa_changed |= COA_SNDBUF_CHANGED; 1155 break; 1156 case SO_RCVBUF: 1157 /* ULP has range checked the value */ 1158 connp->conn_rcvbuf = *i1; 1159 coa->coa_changed |= COA_RCVBUF_CHANGED; 1160 break; 1161 case SO_RCVTIMEO: 1162 case SO_SNDTIMEO: 1163 /* 1164 * Pass these two options in order for third part 1165 * protocol usage. 1166 */ 1167 break; 1168 case SO_DGRAM_ERRIND: 1169 connp->conn_dgram_errind = onoff; 1170 break; 1171 case SO_RECVUCRED: 1172 connp->conn_recv_ancillary.crb_recvucred = onoff; 1173 break; 1174 case SO_ALLZONES: 1175 connp->conn_allzones = onoff; 1176 coa->coa_changed |= COA_ROUTE_CHANGED; 1177 if (onoff) 1178 ixa->ixa_zoneid = ALL_ZONES; 1179 else 1180 ixa->ixa_zoneid = connp->conn_zoneid; 1181 break; 1182 case SO_TIMESTAMP: 1183 connp->conn_recv_ancillary.crb_timestamp = onoff; 1184 break; 1185 case SO_VRRP: 1186 connp->conn_isvrrp = onoff; 1187 break; 1188 case SO_ANON_MLP: 1189 connp->conn_anon_mlp = onoff; 1190 break; 1191 case SO_MAC_EXEMPT: 1192 connp->conn_mac_mode = onoff ? 1193 CONN_MAC_AWARE : CONN_MAC_DEFAULT; 1194 break; 1195 case SO_MAC_IMPLICIT: 1196 connp->conn_mac_mode = onoff ? 1197 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT; 1198 break; 1199 case SO_EXCLBIND: 1200 connp->conn_exclbind = onoff; 1201 break; 1202 } 1203 mutex_exit(&connp->conn_lock); 1204 return (0); 1205 } 1206 1207 /* Handle IPPROTO_IP */ 1208 static int 1209 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, 1210 uchar_t *invalp, boolean_t checkonly, cred_t *cr) 1211 { 1212 conn_t *connp = coa->coa_connp; 1213 ip_xmit_attr_t *ixa = coa->coa_ixa; 1214 ip_pkt_t *ipp = coa->coa_ipp; 1215 int *i1 = (int *)invalp; 1216 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1217 ipaddr_t addr = (ipaddr_t)*i1; 1218 uint_t ifindex; 1219 zoneid_t zoneid = IPCL_ZONEID(connp); 1220 ipif_t *ipif; 1221 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1222 int error; 1223 1224 if (connp->conn_family != AF_INET) 1225 return (EINVAL); 1226 1227 ifindex = UINT_MAX; 1228 switch (name) { 1229 case IP_TTL: 1230 /* Don't allow zero */ 1231 if (*i1 < 1 || *i1 > MAXTTL) 1232 return (EINVAL); 1233 break; 1234 case IP_MULTICAST_IF: 1235 if (addr == INADDR_ANY) { 1236 /* Clear */ 1237 ifindex = 0; 1238 break; 1239 } 1240 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst); 1241 if (ipif == NULL) 1242 return (EHOSTUNREACH); 1243 /* not supported by the virtual network iface */ 1244 if (IS_VNI(ipif->ipif_ill)) { 1245 ipif_refrele(ipif); 1246 return (EINVAL); 1247 } 1248 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex; 1249 ipif_refrele(ipif); 1250 break; 1251 case IP_NEXTHOP: { 1252 ire_t *ire; 1253 1254 if (addr == INADDR_ANY) { 1255 /* Clear */ 1256 break; 1257 } 1258 /* Verify that the next-hop is on-link */ 1259 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid, 1260 NULL, MATCH_IRE_TYPE, 0, ipst, NULL); 1261 if (ire == NULL) 1262 return (EHOSTUNREACH); 1263 ire_refrele(ire); 1264 break; 1265 } 1266 case IP_OPTIONS: 1267 case T_IP_OPTIONS: { 1268 uint_t newlen; 1269 1270 if (ipp->ipp_fields & IPPF_LABEL_V4) 1271 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3; 1272 else 1273 newlen = inlen; 1274 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 1275 return (EINVAL); 1276 } 1277 break; 1278 } 1279 case IP_PKTINFO: { 1280 struct in_pktinfo *pktinfo; 1281 1282 /* Two different valid lengths */ 1283 if (inlen != sizeof (int) && 1284 inlen != sizeof (struct in_pktinfo)) 1285 return (EINVAL); 1286 if (inlen == sizeof (int)) 1287 break; 1288 1289 pktinfo = (struct in_pktinfo *)invalp; 1290 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) { 1291 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr, 1292 zoneid, ipst, B_FALSE)) { 1293 case IPVL_UNICAST_UP: 1294 case IPVL_UNICAST_DOWN: 1295 break; 1296 default: 1297 return (EADDRNOTAVAIL); 1298 } 1299 } 1300 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid, 1301 B_FALSE, ipst)) 1302 return (ENXIO); 1303 break; 1304 } 1305 case IP_BOUND_IF: 1306 ifindex = *(uint_t *)i1; 1307 1308 /* Just check it is ok. */ 1309 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst)) 1310 return (ENXIO); 1311 break; 1312 case IP_MINTTL: 1313 if (*i1 < 0 || *i1 > MAXTTL) 1314 return (EINVAL); 1315 break; 1316 } 1317 if (checkonly) 1318 return (0); 1319 1320 /* Here we set the actual option value */ 1321 /* 1322 * conn_lock protects the bitfields, and is used to 1323 * set the fields atomically. Not needed for ixa settings since 1324 * the caller has an exclusive copy of the ixa. 1325 * We can not hold conn_lock across the multicast options though. 1326 */ 1327 switch (name) { 1328 case IP_OPTIONS: 1329 case T_IP_OPTIONS: 1330 /* Save options for use by IP. */ 1331 mutex_enter(&connp->conn_lock); 1332 error = optcom_pkt_set(invalp, inlen, 1333 (uchar_t **)&ipp->ipp_ipv4_options, 1334 &ipp->ipp_ipv4_options_len); 1335 if (error != 0) { 1336 mutex_exit(&connp->conn_lock); 1337 return (error); 1338 } 1339 if (ipp->ipp_ipv4_options_len == 0) { 1340 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS; 1341 } else { 1342 ipp->ipp_fields |= IPPF_IPV4_OPTIONS; 1343 } 1344 mutex_exit(&connp->conn_lock); 1345 coa->coa_changed |= COA_HEADER_CHANGED; 1346 coa->coa_changed |= COA_WROFF_CHANGED; 1347 break; 1348 1349 case IP_TTL: 1350 mutex_enter(&connp->conn_lock); 1351 ipp->ipp_unicast_hops = *i1; 1352 mutex_exit(&connp->conn_lock); 1353 coa->coa_changed |= COA_HEADER_CHANGED; 1354 break; 1355 case IP_TOS: 1356 case T_IP_TOS: 1357 mutex_enter(&connp->conn_lock); 1358 if (*i1 == -1) { 1359 ipp->ipp_type_of_service = 0; 1360 } else { 1361 ipp->ipp_type_of_service = *i1; 1362 } 1363 mutex_exit(&connp->conn_lock); 1364 coa->coa_changed |= COA_HEADER_CHANGED; 1365 break; 1366 case IP_MULTICAST_IF: 1367 ixa->ixa_multicast_ifindex = ifindex; 1368 ixa->ixa_multicast_ifaddr = addr; 1369 coa->coa_changed |= COA_ROUTE_CHANGED; 1370 break; 1371 case IP_MULTICAST_TTL: 1372 ixa->ixa_multicast_ttl = *invalp; 1373 /* Handled automatically by ip_output */ 1374 break; 1375 case IP_MULTICAST_LOOP: 1376 if (*invalp != 0) 1377 ixa->ixa_flags |= IXAF_MULTICAST_LOOP; 1378 else 1379 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP; 1380 /* Handled automatically by ip_output */ 1381 break; 1382 case IP_RECVOPTS: 1383 mutex_enter(&connp->conn_lock); 1384 connp->conn_recv_ancillary.crb_recvopts = onoff; 1385 mutex_exit(&connp->conn_lock); 1386 break; 1387 case IP_RECVDSTADDR: 1388 mutex_enter(&connp->conn_lock); 1389 connp->conn_recv_ancillary.crb_recvdstaddr = onoff; 1390 mutex_exit(&connp->conn_lock); 1391 break; 1392 case IP_RECVIF: 1393 mutex_enter(&connp->conn_lock); 1394 connp->conn_recv_ancillary.crb_recvif = onoff; 1395 mutex_exit(&connp->conn_lock); 1396 break; 1397 case IP_RECVSLLA: 1398 mutex_enter(&connp->conn_lock); 1399 connp->conn_recv_ancillary.crb_recvslla = onoff; 1400 mutex_exit(&connp->conn_lock); 1401 break; 1402 case IP_RECVTTL: 1403 mutex_enter(&connp->conn_lock); 1404 connp->conn_recv_ancillary.crb_recvttl = onoff; 1405 mutex_exit(&connp->conn_lock); 1406 break; 1407 case IP_RECVTOS: 1408 mutex_enter(&connp->conn_lock); 1409 connp->conn_recv_ancillary.crb_recvtos = onoff; 1410 mutex_exit(&connp->conn_lock); 1411 break; 1412 case IP_PKTINFO: { 1413 /* 1414 * This also handles IP_RECVPKTINFO. 1415 * IP_PKTINFO and IP_RECVPKTINFO have same value. 1416 * Differentiation is based on the size of the 1417 * argument passed in. 1418 */ 1419 struct in_pktinfo *pktinfo; 1420 1421 if (inlen == sizeof (int)) { 1422 /* This is IP_RECVPKTINFO option. */ 1423 mutex_enter(&connp->conn_lock); 1424 connp->conn_recv_ancillary.crb_ip_recvpktinfo = 1425 onoff; 1426 mutex_exit(&connp->conn_lock); 1427 break; 1428 } 1429 1430 /* This is IP_PKTINFO option. */ 1431 mutex_enter(&connp->conn_lock); 1432 pktinfo = (struct in_pktinfo *)invalp; 1433 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) { 1434 ipp->ipp_fields |= IPPF_ADDR; 1435 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst, 1436 &ipp->ipp_addr); 1437 } else { 1438 ipp->ipp_fields &= ~IPPF_ADDR; 1439 ipp->ipp_addr = ipv6_all_zeros; 1440 } 1441 mutex_exit(&connp->conn_lock); 1442 ixa->ixa_ifindex = pktinfo->ipi_ifindex; 1443 coa->coa_changed |= COA_ROUTE_CHANGED; 1444 coa->coa_changed |= COA_HEADER_CHANGED; 1445 break; 1446 } 1447 case IP_DONTFRAG: 1448 if (onoff) { 1449 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF); 1450 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY; 1451 } else { 1452 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF); 1453 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 1454 } 1455 /* Need to redo ip_attr_connect */ 1456 coa->coa_changed |= COA_ROUTE_CHANGED; 1457 break; 1458 case IP_ADD_MEMBERSHIP: 1459 case IP_DROP_MEMBERSHIP: 1460 case MCAST_JOIN_GROUP: 1461 case MCAST_LEAVE_GROUP: 1462 return (ip_opt_set_multicast_group(connp, name, 1463 invalp, B_FALSE, checkonly)); 1464 1465 case IP_BLOCK_SOURCE: 1466 case IP_UNBLOCK_SOURCE: 1467 case IP_ADD_SOURCE_MEMBERSHIP: 1468 case IP_DROP_SOURCE_MEMBERSHIP: 1469 case MCAST_BLOCK_SOURCE: 1470 case MCAST_UNBLOCK_SOURCE: 1471 case MCAST_JOIN_SOURCE_GROUP: 1472 case MCAST_LEAVE_SOURCE_GROUP: 1473 return (ip_opt_set_multicast_sources(connp, name, 1474 invalp, B_FALSE, checkonly)); 1475 1476 case IP_SEC_OPT: 1477 mutex_enter(&connp->conn_lock); 1478 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp); 1479 mutex_exit(&connp->conn_lock); 1480 if (error != 0) { 1481 return (error); 1482 } 1483 /* This is an IPsec policy change - redo ip_attr_connect */ 1484 coa->coa_changed |= COA_ROUTE_CHANGED; 1485 break; 1486 case IP_NEXTHOP: 1487 ixa->ixa_nexthop_v4 = addr; 1488 if (addr != INADDR_ANY) 1489 ixa->ixa_flags |= IXAF_NEXTHOP_SET; 1490 else 1491 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET; 1492 coa->coa_changed |= COA_ROUTE_CHANGED; 1493 break; 1494 1495 case IP_BOUND_IF: 1496 ixa->ixa_ifindex = ifindex; /* Send */ 1497 mutex_enter(&connp->conn_lock); 1498 connp->conn_incoming_ifindex = ifindex; /* Receive */ 1499 connp->conn_bound_if = ifindex; /* getsockopt */ 1500 mutex_exit(&connp->conn_lock); 1501 coa->coa_changed |= COA_ROUTE_CHANGED; 1502 break; 1503 case IP_UNSPEC_SRC: 1504 mutex_enter(&connp->conn_lock); 1505 connp->conn_unspec_src = onoff; 1506 if (onoff) 1507 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE; 1508 else 1509 ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1510 1511 mutex_exit(&connp->conn_lock); 1512 break; 1513 case IP_BROADCAST_TTL: 1514 ixa->ixa_broadcast_ttl = *invalp; 1515 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET; 1516 /* Handled automatically by ip_output */ 1517 break; 1518 case MRT_INIT: 1519 case MRT_DONE: 1520 case MRT_ADD_VIF: 1521 case MRT_DEL_VIF: 1522 case MRT_ADD_MFC: 1523 case MRT_DEL_MFC: 1524 case MRT_ASSERT: 1525 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1526 return (error); 1527 } 1528 error = ip_mrouter_set((int)name, connp, checkonly, 1529 (uchar_t *)invalp, inlen); 1530 if (error) { 1531 return (error); 1532 } 1533 return (0); 1534 case IP_MINTTL: 1535 mutex_enter(&connp->conn_lock); 1536 connp->conn_min_ttl = *i1; 1537 mutex_exit(&connp->conn_lock); 1538 break; 1539 } 1540 return (0); 1541 } 1542 1543 /* Handle IPPROTO_IPV6 */ 1544 static int 1545 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, 1546 uchar_t *invalp, boolean_t checkonly, cred_t *cr) 1547 { 1548 conn_t *connp = coa->coa_connp; 1549 ip_xmit_attr_t *ixa = coa->coa_ixa; 1550 ip_pkt_t *ipp = coa->coa_ipp; 1551 int *i1 = (int *)invalp; 1552 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1553 uint_t ifindex; 1554 zoneid_t zoneid = IPCL_ZONEID(connp); 1555 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1556 int error; 1557 1558 if (connp->conn_family != AF_INET6) 1559 return (EINVAL); 1560 1561 ifindex = UINT_MAX; 1562 switch (name) { 1563 case IPV6_MULTICAST_IF: 1564 /* 1565 * The only possible error is EINVAL. 1566 * We call this option on both V4 and V6 1567 * If both fail, then this call returns 1568 * EINVAL. If at least one of them succeeds we 1569 * return success. 1570 */ 1571 ifindex = *(uint_t *)i1; 1572 1573 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) && 1574 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst)) 1575 return (EINVAL); 1576 break; 1577 case IPV6_UNICAST_HOPS: 1578 /* Don't allow zero. -1 means to use default */ 1579 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS) 1580 return (EINVAL); 1581 break; 1582 case IPV6_MULTICAST_HOPS: 1583 /* -1 means use default */ 1584 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) 1585 return (EINVAL); 1586 break; 1587 case IPV6_MULTICAST_LOOP: 1588 if (*i1 != 0 && *i1 != 1) 1589 return (EINVAL); 1590 break; 1591 case IPV6_BOUND_IF: 1592 ifindex = *(uint_t *)i1; 1593 1594 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst)) 1595 return (ENXIO); 1596 break; 1597 case IPV6_PKTINFO: { 1598 struct in6_pktinfo *pkti; 1599 boolean_t isv6; 1600 1601 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 1602 return (EINVAL); 1603 if (inlen == 0) 1604 break; /* Clear values below */ 1605 1606 /* 1607 * Verify the source address and ifindex. Privileged users 1608 * can use any source address. 1609 */ 1610 pkti = (struct in6_pktinfo *)invalp; 1611 1612 /* 1613 * For link-local addresses we use the ipi6_ifindex when 1614 * we verify the local address. 1615 * If net_rawaccess then any source address can be used. 1616 */ 1617 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 1618 secpolicy_net_rawaccess(cr) != 0) { 1619 uint_t scopeid = 0; 1620 in6_addr_t *v6src = &pkti->ipi6_addr; 1621 ipaddr_t v4src; 1622 ip_laddr_t laddr_type = IPVL_UNICAST_UP; 1623 1624 if (IN6_IS_ADDR_V4MAPPED(v6src)) { 1625 IN6_V4MAPPED_TO_IPADDR(v6src, v4src); 1626 if (v4src != INADDR_ANY) { 1627 laddr_type = ip_laddr_verify_v4(v4src, 1628 zoneid, ipst, B_FALSE); 1629 } 1630 } else { 1631 if (IN6_IS_ADDR_LINKSCOPE(v6src)) 1632 scopeid = pkti->ipi6_ifindex; 1633 1634 laddr_type = ip_laddr_verify_v6(v6src, zoneid, 1635 ipst, B_FALSE, scopeid); 1636 } 1637 switch (laddr_type) { 1638 case IPVL_UNICAST_UP: 1639 case IPVL_UNICAST_DOWN: 1640 break; 1641 default: 1642 return (EADDRNOTAVAIL); 1643 } 1644 ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1645 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) { 1646 /* Allow any source */ 1647 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE; 1648 } 1649 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr)); 1650 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6, 1651 ipst)) 1652 return (ENXIO); 1653 break; 1654 } 1655 case IPV6_HOPLIMIT: 1656 /* It is only allowed as ancilary data */ 1657 if (!coa->coa_ancillary) 1658 return (EINVAL); 1659 1660 if (inlen != 0 && inlen != sizeof (int)) 1661 return (EINVAL); 1662 if (inlen == sizeof (int)) { 1663 if (*i1 > IPV6_MAX_HOPS || *i1 < -1 || *i1 == 0) 1664 return (EINVAL); 1665 } 1666 break; 1667 case IPV6_TCLASS: 1668 if (inlen != 0 && inlen != sizeof (int)) 1669 return (EINVAL); 1670 if (inlen == sizeof (int)) { 1671 if (*i1 > 255 || *i1 < -1) 1672 return (EINVAL); 1673 } 1674 break; 1675 case IPV6_NEXTHOP: 1676 if (inlen != 0 && inlen != sizeof (sin6_t)) 1677 return (EINVAL); 1678 if (inlen == sizeof (sin6_t)) { 1679 sin6_t *sin6 = (sin6_t *)invalp; 1680 ire_t *ire; 1681 1682 if (sin6->sin6_family != AF_INET6) 1683 return (EAFNOSUPPORT); 1684 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 1685 return (EADDRNOTAVAIL); 1686 1687 /* Verify that the next-hop is on-link */ 1688 ire = ire_ftable_lookup_v6(&sin6->sin6_addr, 1689 0, 0, IRE_ONLINK, NULL, zoneid, 1690 NULL, MATCH_IRE_TYPE, 0, ipst, NULL); 1691 if (ire == NULL) 1692 return (EHOSTUNREACH); 1693 ire_refrele(ire); 1694 break; 1695 } 1696 break; 1697 case IPV6_RTHDR: 1698 case IPV6_DSTOPTS: 1699 case IPV6_RTHDRDSTOPTS: 1700 case IPV6_HOPOPTS: { 1701 /* All have the length field in the same place */ 1702 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 1703 /* 1704 * Sanity checks - minimum size, size a multiple of 1705 * eight bytes, and matching size passed in. 1706 */ 1707 if (inlen != 0 && 1708 inlen != (8 * (hopts->ip6h_len + 1))) 1709 return (EINVAL); 1710 break; 1711 } 1712 case IPV6_PATHMTU: 1713 /* Can't be set */ 1714 return (EINVAL); 1715 1716 case IPV6_USE_MIN_MTU: 1717 if (inlen != sizeof (int)) 1718 return (EINVAL); 1719 if (*i1 < -1 || *i1 > 1) 1720 return (EINVAL); 1721 break; 1722 case IPV6_SRC_PREFERENCES: 1723 if (inlen != sizeof (uint32_t)) 1724 return (EINVAL); 1725 break; 1726 case IPV6_V6ONLY: 1727 if (*i1 < 0 || *i1 > 1) { 1728 return (EINVAL); 1729 } 1730 break; 1731 case IPV6_MINHOPCOUNT: 1732 if (*i1 < 0 || *i1 > IPV6_MAX_HOPS) 1733 return (EINVAL); 1734 break; 1735 } 1736 if (checkonly) 1737 return (0); 1738 1739 /* Here we set the actual option value */ 1740 /* 1741 * conn_lock protects the bitfields, and is used to 1742 * set the fields atomically. Not needed for ixa settings since 1743 * the caller has an exclusive copy of the ixa. 1744 * We can not hold conn_lock across the multicast options though. 1745 */ 1746 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1747 switch (name) { 1748 case IPV6_MULTICAST_IF: 1749 ixa->ixa_multicast_ifindex = ifindex; 1750 /* Need to redo ip_attr_connect */ 1751 coa->coa_changed |= COA_ROUTE_CHANGED; 1752 break; 1753 case IPV6_UNICAST_HOPS: 1754 /* -1 means use default */ 1755 mutex_enter(&connp->conn_lock); 1756 if (*i1 == -1) { 1757 ipp->ipp_unicast_hops = connp->conn_default_ttl; 1758 } else { 1759 ipp->ipp_unicast_hops = (uint8_t)*i1; 1760 } 1761 mutex_exit(&connp->conn_lock); 1762 coa->coa_changed |= COA_HEADER_CHANGED; 1763 break; 1764 case IPV6_MULTICAST_HOPS: 1765 /* -1 means use default */ 1766 if (*i1 == -1) { 1767 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1768 } else { 1769 ixa->ixa_multicast_ttl = (uint8_t)*i1; 1770 } 1771 /* Handled automatically by ip_output */ 1772 break; 1773 case IPV6_MULTICAST_LOOP: 1774 if (*i1 != 0) 1775 ixa->ixa_flags |= IXAF_MULTICAST_LOOP; 1776 else 1777 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP; 1778 /* Handled automatically by ip_output */ 1779 break; 1780 case IPV6_JOIN_GROUP: 1781 case IPV6_LEAVE_GROUP: 1782 case MCAST_JOIN_GROUP: 1783 case MCAST_LEAVE_GROUP: 1784 return (ip_opt_set_multicast_group(connp, name, 1785 invalp, B_TRUE, checkonly)); 1786 1787 case MCAST_BLOCK_SOURCE: 1788 case MCAST_UNBLOCK_SOURCE: 1789 case MCAST_JOIN_SOURCE_GROUP: 1790 case MCAST_LEAVE_SOURCE_GROUP: 1791 return (ip_opt_set_multicast_sources(connp, name, 1792 invalp, B_TRUE, checkonly)); 1793 1794 case IPV6_BOUND_IF: 1795 ixa->ixa_ifindex = ifindex; /* Send */ 1796 mutex_enter(&connp->conn_lock); 1797 connp->conn_incoming_ifindex = ifindex; /* Receive */ 1798 connp->conn_bound_if = ifindex; /* getsockopt */ 1799 mutex_exit(&connp->conn_lock); 1800 coa->coa_changed |= COA_ROUTE_CHANGED; 1801 break; 1802 case IPV6_UNSPEC_SRC: 1803 mutex_enter(&connp->conn_lock); 1804 connp->conn_unspec_src = onoff; 1805 if (onoff) 1806 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE; 1807 else 1808 ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1809 mutex_exit(&connp->conn_lock); 1810 break; 1811 case IPV6_RECVPKTINFO: 1812 mutex_enter(&connp->conn_lock); 1813 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff; 1814 mutex_exit(&connp->conn_lock); 1815 break; 1816 case IPV6_RECVTCLASS: 1817 mutex_enter(&connp->conn_lock); 1818 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff; 1819 mutex_exit(&connp->conn_lock); 1820 break; 1821 case IPV6_RECVPATHMTU: 1822 mutex_enter(&connp->conn_lock); 1823 connp->conn_ipv6_recvpathmtu = onoff; 1824 mutex_exit(&connp->conn_lock); 1825 break; 1826 case IPV6_RECVHOPLIMIT: 1827 mutex_enter(&connp->conn_lock); 1828 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit = 1829 onoff; 1830 mutex_exit(&connp->conn_lock); 1831 break; 1832 case IPV6_RECVHOPOPTS: 1833 mutex_enter(&connp->conn_lock); 1834 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff; 1835 mutex_exit(&connp->conn_lock); 1836 break; 1837 case IPV6_RECVDSTOPTS: 1838 mutex_enter(&connp->conn_lock); 1839 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff; 1840 mutex_exit(&connp->conn_lock); 1841 break; 1842 case _OLD_IPV6_RECVDSTOPTS: 1843 mutex_enter(&connp->conn_lock); 1844 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts = 1845 onoff; 1846 mutex_exit(&connp->conn_lock); 1847 break; 1848 case IPV6_RECVRTHDRDSTOPTS: 1849 mutex_enter(&connp->conn_lock); 1850 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts = 1851 onoff; 1852 mutex_exit(&connp->conn_lock); 1853 break; 1854 case IPV6_RECVRTHDR: 1855 mutex_enter(&connp->conn_lock); 1856 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff; 1857 mutex_exit(&connp->conn_lock); 1858 break; 1859 case IPV6_PKTINFO: 1860 mutex_enter(&connp->conn_lock); 1861 if (inlen == 0) { 1862 ipp->ipp_fields &= ~IPPF_ADDR; 1863 ipp->ipp_addr = ipv6_all_zeros; 1864 ixa->ixa_ifindex = 0; 1865 } else { 1866 struct in6_pktinfo *pkti; 1867 1868 pkti = (struct in6_pktinfo *)invalp; 1869 ipp->ipp_addr = pkti->ipi6_addr; 1870 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)) 1871 ipp->ipp_fields |= IPPF_ADDR; 1872 else 1873 ipp->ipp_fields &= ~IPPF_ADDR; 1874 ixa->ixa_ifindex = pkti->ipi6_ifindex; 1875 } 1876 mutex_exit(&connp->conn_lock); 1877 /* Source and ifindex might have changed */ 1878 coa->coa_changed |= COA_HEADER_CHANGED; 1879 coa->coa_changed |= COA_ROUTE_CHANGED; 1880 break; 1881 case IPV6_HOPLIMIT: 1882 mutex_enter(&connp->conn_lock); 1883 if (inlen == 0 || *i1 == -1) { 1884 /* Revert to default */ 1885 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 1886 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE; 1887 } else { 1888 ipp->ipp_hoplimit = *i1; 1889 ipp->ipp_fields |= IPPF_HOPLIMIT; 1890 /* Ensure that it sticks for multicast packets */ 1891 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE; 1892 } 1893 mutex_exit(&connp->conn_lock); 1894 coa->coa_changed |= COA_HEADER_CHANGED; 1895 break; 1896 case IPV6_TCLASS: 1897 /* 1898 * IPV6_TCLASS accepts -1 as use kernel default 1899 * and [0, 255] as the actualy traffic class. 1900 */ 1901 mutex_enter(&connp->conn_lock); 1902 if (inlen == 0 || *i1 == -1) { 1903 ipp->ipp_tclass = 0; 1904 ipp->ipp_fields &= ~IPPF_TCLASS; 1905 } else { 1906 ipp->ipp_tclass = *i1; 1907 ipp->ipp_fields |= IPPF_TCLASS; 1908 } 1909 mutex_exit(&connp->conn_lock); 1910 coa->coa_changed |= COA_HEADER_CHANGED; 1911 break; 1912 case IPV6_NEXTHOP: 1913 if (inlen == 0) { 1914 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET; 1915 } else { 1916 sin6_t *sin6 = (sin6_t *)invalp; 1917 1918 ixa->ixa_nexthop_v6 = sin6->sin6_addr; 1919 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6)) 1920 ixa->ixa_flags |= IXAF_NEXTHOP_SET; 1921 else 1922 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET; 1923 } 1924 coa->coa_changed |= COA_ROUTE_CHANGED; 1925 break; 1926 case IPV6_HOPOPTS: 1927 mutex_enter(&connp->conn_lock); 1928 error = optcom_pkt_set(invalp, inlen, 1929 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen); 1930 if (error != 0) { 1931 mutex_exit(&connp->conn_lock); 1932 return (error); 1933 } 1934 if (ipp->ipp_hopoptslen == 0) { 1935 ipp->ipp_fields &= ~IPPF_HOPOPTS; 1936 } else { 1937 ipp->ipp_fields |= IPPF_HOPOPTS; 1938 } 1939 mutex_exit(&connp->conn_lock); 1940 coa->coa_changed |= COA_HEADER_CHANGED; 1941 coa->coa_changed |= COA_WROFF_CHANGED; 1942 break; 1943 case IPV6_RTHDRDSTOPTS: 1944 mutex_enter(&connp->conn_lock); 1945 error = optcom_pkt_set(invalp, inlen, 1946 (uchar_t **)&ipp->ipp_rthdrdstopts, 1947 &ipp->ipp_rthdrdstoptslen); 1948 if (error != 0) { 1949 mutex_exit(&connp->conn_lock); 1950 return (error); 1951 } 1952 if (ipp->ipp_rthdrdstoptslen == 0) { 1953 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS; 1954 } else { 1955 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 1956 } 1957 mutex_exit(&connp->conn_lock); 1958 coa->coa_changed |= COA_HEADER_CHANGED; 1959 coa->coa_changed |= COA_WROFF_CHANGED; 1960 break; 1961 case IPV6_DSTOPTS: 1962 mutex_enter(&connp->conn_lock); 1963 error = optcom_pkt_set(invalp, inlen, 1964 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen); 1965 if (error != 0) { 1966 mutex_exit(&connp->conn_lock); 1967 return (error); 1968 } 1969 if (ipp->ipp_dstoptslen == 0) { 1970 ipp->ipp_fields &= ~IPPF_DSTOPTS; 1971 } else { 1972 ipp->ipp_fields |= IPPF_DSTOPTS; 1973 } 1974 mutex_exit(&connp->conn_lock); 1975 coa->coa_changed |= COA_HEADER_CHANGED; 1976 coa->coa_changed |= COA_WROFF_CHANGED; 1977 break; 1978 case IPV6_RTHDR: 1979 mutex_enter(&connp->conn_lock); 1980 error = optcom_pkt_set(invalp, inlen, 1981 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen); 1982 if (error != 0) { 1983 mutex_exit(&connp->conn_lock); 1984 return (error); 1985 } 1986 if (ipp->ipp_rthdrlen == 0) { 1987 ipp->ipp_fields &= ~IPPF_RTHDR; 1988 } else { 1989 ipp->ipp_fields |= IPPF_RTHDR; 1990 } 1991 mutex_exit(&connp->conn_lock); 1992 coa->coa_changed |= COA_HEADER_CHANGED; 1993 coa->coa_changed |= COA_WROFF_CHANGED; 1994 break; 1995 1996 case IPV6_DONTFRAG: 1997 if (onoff) { 1998 ixa->ixa_flags |= IXAF_DONTFRAG; 1999 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY; 2000 } else { 2001 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2002 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 2003 } 2004 /* Need to redo ip_attr_connect */ 2005 coa->coa_changed |= COA_ROUTE_CHANGED; 2006 break; 2007 2008 case IPV6_USE_MIN_MTU: 2009 ixa->ixa_flags |= IXAF_USE_MIN_MTU; 2010 ixa->ixa_use_min_mtu = *i1; 2011 /* Need to redo ip_attr_connect */ 2012 coa->coa_changed |= COA_ROUTE_CHANGED; 2013 break; 2014 2015 case IPV6_SEC_OPT: 2016 mutex_enter(&connp->conn_lock); 2017 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp); 2018 mutex_exit(&connp->conn_lock); 2019 if (error != 0) { 2020 return (error); 2021 } 2022 /* This is an IPsec policy change - redo ip_attr_connect */ 2023 coa->coa_changed |= COA_ROUTE_CHANGED; 2024 break; 2025 case IPV6_SRC_PREFERENCES: 2026 /* 2027 * This socket option only affects connected 2028 * sockets that haven't already bound to a specific 2029 * IPv6 address. In other words, sockets that 2030 * don't call bind() with an address other than the 2031 * unspecified address and that call connect(). 2032 * ip_set_destination_v6() passes these preferences 2033 * to the ipif_select_source_v6() function. 2034 */ 2035 mutex_enter(&connp->conn_lock); 2036 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp); 2037 mutex_exit(&connp->conn_lock); 2038 if (error != 0) { 2039 return (error); 2040 } 2041 break; 2042 case IPV6_V6ONLY: 2043 mutex_enter(&connp->conn_lock); 2044 connp->conn_ipv6_v6only = onoff; 2045 mutex_exit(&connp->conn_lock); 2046 break; 2047 case IPV6_MINHOPCOUNT: 2048 mutex_enter(&connp->conn_lock); 2049 connp->conn_min_ttl = *i1; 2050 mutex_exit(&connp->conn_lock); 2051 break; 2052 } 2053 return (0); 2054 } 2055 2056 /* Handle IPPROTO_UDP */ 2057 /* ARGSUSED1 */ 2058 static int 2059 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, 2060 uchar_t *invalp, boolean_t checkonly, cred_t *cr) 2061 { 2062 conn_t *connp = coa->coa_connp; 2063 int *i1 = (int *)invalp; 2064 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2065 int error; 2066 2067 switch (name) { 2068 case UDP_ANONPRIVBIND: 2069 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) { 2070 return (error); 2071 } 2072 break; 2073 } 2074 if (checkonly) 2075 return (0); 2076 2077 /* Here we set the actual option value */ 2078 mutex_enter(&connp->conn_lock); 2079 switch (name) { 2080 case UDP_ANONPRIVBIND: 2081 connp->conn_anon_priv_bind = onoff; 2082 break; 2083 case UDP_EXCLBIND: 2084 connp->conn_exclbind = onoff; 2085 break; 2086 } 2087 mutex_exit(&connp->conn_lock); 2088 return (0); 2089 } 2090 2091 /* Handle IPPROTO_TCP */ 2092 /* ARGSUSED1 */ 2093 static int 2094 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen, 2095 uchar_t *invalp, boolean_t checkonly, cred_t *cr) 2096 { 2097 conn_t *connp = coa->coa_connp; 2098 int *i1 = (int *)invalp; 2099 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2100 int error; 2101 2102 switch (name) { 2103 case TCP_ANONPRIVBIND: 2104 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) { 2105 return (error); 2106 } 2107 break; 2108 } 2109 if (checkonly) 2110 return (0); 2111 2112 /* Here we set the actual option value */ 2113 mutex_enter(&connp->conn_lock); 2114 switch (name) { 2115 case TCP_ANONPRIVBIND: 2116 connp->conn_anon_priv_bind = onoff; 2117 break; 2118 case TCP_EXCLBIND: 2119 connp->conn_exclbind = onoff; 2120 break; 2121 case TCP_RECVDSTADDR: 2122 connp->conn_recv_ancillary.crb_recvdstaddr = onoff; 2123 break; 2124 } 2125 mutex_exit(&connp->conn_lock); 2126 return (0); 2127 } 2128 2129 int 2130 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp) 2131 { 2132 sin_t *sin; 2133 sin6_t *sin6; 2134 2135 if (connp->conn_family == AF_INET) { 2136 if (*salenp < sizeof (sin_t)) 2137 return (EINVAL); 2138 2139 *salenp = sizeof (sin_t); 2140 /* Fill zeroes and then initialize non-zero fields */ 2141 sin = (sin_t *)sa; 2142 *sin = sin_null; 2143 sin->sin_family = AF_INET; 2144 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) && 2145 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 2146 sin->sin_addr.s_addr = connp->conn_saddr_v4; 2147 } else { 2148 /* 2149 * INADDR_ANY 2150 * conn_saddr is not set, we might be bound to 2151 * broadcast/multicast. Use conn_bound_addr as 2152 * local address instead (that could 2153 * also still be INADDR_ANY) 2154 */ 2155 sin->sin_addr.s_addr = connp->conn_bound_addr_v4; 2156 } 2157 sin->sin_port = connp->conn_lport; 2158 } else { 2159 if (*salenp < sizeof (sin6_t)) 2160 return (EINVAL); 2161 2162 *salenp = sizeof (sin6_t); 2163 /* Fill zeroes and then initialize non-zero fields */ 2164 sin6 = (sin6_t *)sa; 2165 *sin6 = sin6_null; 2166 sin6->sin6_family = AF_INET6; 2167 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 2168 sin6->sin6_addr = connp->conn_saddr_v6; 2169 } else { 2170 /* 2171 * conn_saddr is not set, we might be bound to 2172 * broadcast/multicast. Use conn_bound_addr as 2173 * local address instead (which could 2174 * also still be unspecified) 2175 */ 2176 sin6->sin6_addr = connp->conn_bound_addr_v6; 2177 } 2178 sin6->sin6_port = connp->conn_lport; 2179 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) && 2180 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) 2181 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 2182 } 2183 return (0); 2184 } 2185 2186 int 2187 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp) 2188 { 2189 struct sockaddr_in *sin; 2190 struct sockaddr_in6 *sin6; 2191 2192 if (connp->conn_family == AF_INET) { 2193 if (*salenp < sizeof (sin_t)) 2194 return (EINVAL); 2195 2196 *salenp = sizeof (sin_t); 2197 /* initialize */ 2198 sin = (sin_t *)sa; 2199 *sin = sin_null; 2200 sin->sin_family = AF_INET; 2201 sin->sin_addr.s_addr = connp->conn_faddr_v4; 2202 sin->sin_port = connp->conn_fport; 2203 } else { 2204 if (*salenp < sizeof (sin6_t)) 2205 return (EINVAL); 2206 2207 *salenp = sizeof (sin6_t); 2208 /* initialize */ 2209 sin6 = (sin6_t *)sa; 2210 *sin6 = sin6_null; 2211 sin6->sin6_family = AF_INET6; 2212 sin6->sin6_addr = connp->conn_faddr_v6; 2213 sin6->sin6_port = connp->conn_fport; 2214 sin6->sin6_flowinfo = connp->conn_flowinfo; 2215 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) && 2216 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) 2217 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 2218 } 2219 return (0); 2220 } 2221 2222 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *); 2223 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *); 2224 2225 /* 2226 * Allocate and fill in conn_ht_iphc based on the current information 2227 * in the conn. 2228 * Normally used when we bind() and connect(). 2229 * Returns failure if can't allocate memory, or if there is a problem 2230 * with a routing header/option. 2231 * 2232 * We allocate space for the transport header (ulp_hdr_len + extra) and 2233 * indicate the offset of the ulp header by setting ixa_ip_hdr_length. 2234 * The extra is there for transports that want some spare room for future 2235 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len 2236 * excludes the extra part. 2237 * 2238 * We massage an routing option/header and store the ckecksum difference 2239 * in conn_sum. 2240 * 2241 * Caller needs to update conn_wroff if desired. 2242 */ 2243 int 2244 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra, 2245 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo) 2246 { 2247 ip_xmit_attr_t *ixa = connp->conn_ixa; 2248 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 2249 uint_t ip_hdr_length; 2250 uchar_t *hdrs; 2251 uint_t hdrs_len; 2252 2253 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2254 2255 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2256 ip_hdr_length = ip_total_hdrs_len_v4(ipp); 2257 /* In case of TX label and IP options it can be too much */ 2258 if (ip_hdr_length > IP_MAX_HDR_LENGTH) { 2259 /* Preserves existing TX errno for this */ 2260 return (EHOSTUNREACH); 2261 } 2262 } else { 2263 ip_hdr_length = ip_total_hdrs_len_v6(ipp); 2264 } 2265 ixa->ixa_ip_hdr_length = ip_hdr_length; 2266 hdrs_len = ip_hdr_length + ulp_hdr_length + extra; 2267 ASSERT(hdrs_len != 0); 2268 2269 if (hdrs_len != connp->conn_ht_iphc_allocated) { 2270 /* Allocate new before we free any old */ 2271 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 2272 if (hdrs == NULL) 2273 return (ENOMEM); 2274 2275 if (connp->conn_ht_iphc != NULL) { 2276 kmem_free(connp->conn_ht_iphc, 2277 connp->conn_ht_iphc_allocated); 2278 } 2279 connp->conn_ht_iphc = hdrs; 2280 connp->conn_ht_iphc_allocated = hdrs_len; 2281 } else { 2282 hdrs = connp->conn_ht_iphc; 2283 } 2284 hdrs_len -= extra; 2285 connp->conn_ht_iphc_len = hdrs_len; 2286 2287 connp->conn_ht_ulp = hdrs + ip_hdr_length; 2288 connp->conn_ht_ulp_len = ulp_hdr_length; 2289 2290 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2291 ipha_t *ipha = (ipha_t *)hdrs; 2292 2293 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 2294 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst); 2295 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto); 2296 ipha->ipha_length = htons(hdrs_len); 2297 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2298 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2299 else 2300 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2301 2302 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) { 2303 connp->conn_sum = cksum_massage_options_v4(ipha, 2304 connp->conn_netstack); 2305 } else { 2306 connp->conn_sum = 0; 2307 } 2308 } else { 2309 ip6_t *ip6h = (ip6_t *)hdrs; 2310 2311 ip6h->ip6_src = *v6src; 2312 ip6h->ip6_dst = *v6dst; 2313 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto, 2314 flowinfo); 2315 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN); 2316 2317 if (ipp->ipp_fields & IPPF_RTHDR) { 2318 connp->conn_sum = cksum_massage_options_v6(ip6h, 2319 ip_hdr_length, connp->conn_netstack); 2320 2321 /* 2322 * Verify that the first hop isn't a mapped address. 2323 * Routers along the path need to do this verification 2324 * for subsequent hops. 2325 */ 2326 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) 2327 return (EADDRNOTAVAIL); 2328 2329 } else { 2330 connp->conn_sum = 0; 2331 } 2332 } 2333 return (0); 2334 } 2335 2336 /* 2337 * Prepend a header template to data_mp based on the ip_pkt_t 2338 * and the passed in source, destination and protocol. 2339 * 2340 * Returns failure if can't allocate memory, in which case data_mp is freed. 2341 * We allocate space for the transport header (ulp_hdr_len) and 2342 * indicate the offset of the ulp header by setting ixa_ip_hdr_length. 2343 * 2344 * We massage an routing option/header and return the ckecksum difference 2345 * in *sump. This is in host byte order. 2346 * 2347 * Caller needs to update conn_wroff if desired. 2348 */ 2349 mblk_t * 2350 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2351 const in6_addr_t *v6src, const in6_addr_t *v6dst, 2352 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp, 2353 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp) 2354 { 2355 uint_t ip_hdr_length; 2356 uchar_t *hdrs; 2357 uint_t hdrs_len; 2358 mblk_t *mp; 2359 2360 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2361 ip_hdr_length = ip_total_hdrs_len_v4(ipp); 2362 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH); 2363 } else { 2364 ip_hdr_length = ip_total_hdrs_len_v6(ipp); 2365 } 2366 hdrs_len = ip_hdr_length + ulp_hdr_length; 2367 ASSERT(hdrs_len != 0); 2368 2369 ixa->ixa_ip_hdr_length = ip_hdr_length; 2370 2371 /* Can we prepend to data_mp? */ 2372 if (data_mp != NULL && 2373 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len && 2374 data_mp->b_datap->db_ref == 1) { 2375 hdrs = data_mp->b_rptr - hdrs_len; 2376 data_mp->b_rptr = hdrs; 2377 mp = data_mp; 2378 } else { 2379 mp = allocb(hdrs_len + wroff_extra, BPRI_MED); 2380 if (mp == NULL) { 2381 freemsg(data_mp); 2382 *errorp = ENOMEM; 2383 return (NULL); 2384 } 2385 mp->b_wptr = mp->b_datap->db_lim; 2386 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len; 2387 mp->b_cont = data_mp; 2388 } 2389 2390 /* 2391 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it 2392 * if PKTINFO (aka IPPF_ADDR) was set. 2393 */ 2394 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2395 ipha_t *ipha = (ipha_t *)hdrs; 2396 2397 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst)); 2398 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 2399 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst); 2400 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol); 2401 ipha->ipha_length = htons(hdrs_len + data_length); 2402 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2403 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2404 else 2405 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2406 2407 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) { 2408 *sump = cksum_massage_options_v4(ipha, 2409 ixa->ixa_ipst->ips_netstack); 2410 } else { 2411 *sump = 0; 2412 } 2413 } else { 2414 ip6_t *ip6h = (ip6_t *)hdrs; 2415 2416 ip6h->ip6_src = *v6src; 2417 ip6h->ip6_dst = *v6dst; 2418 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo); 2419 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN); 2420 2421 if (ipp->ipp_fields & IPPF_RTHDR) { 2422 *sump = cksum_massage_options_v6(ip6h, 2423 ip_hdr_length, ixa->ixa_ipst->ips_netstack); 2424 2425 /* 2426 * Verify that the first hop isn't a mapped address. 2427 * Routers along the path need to do this verification 2428 * for subsequent hops. 2429 */ 2430 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 2431 *errorp = EADDRNOTAVAIL; 2432 freemsg(mp); 2433 return (NULL); 2434 } 2435 } else { 2436 *sump = 0; 2437 } 2438 } 2439 return (mp); 2440 } 2441 2442 /* 2443 * Massage a source route if any putting the first hop 2444 * in ipha_dst. Compute a starting value for the checksum which 2445 * takes into account that the original ipha_dst should be 2446 * included in the checksum but that IP will include the 2447 * first hop from the source route in the tcp checksum. 2448 */ 2449 static uint32_t 2450 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns) 2451 { 2452 in_addr_t dst; 2453 uint32_t cksum; 2454 2455 /* Get last hop then diff against first hop */ 2456 cksum = ip_massage_options(ipha, ns); 2457 cksum = (cksum & 0xFFFF) + (cksum >> 16); 2458 dst = ipha->ipha_dst; 2459 cksum -= ((dst >> 16) + (dst & 0xffff)); 2460 if ((int)cksum < 0) 2461 cksum--; 2462 cksum = (cksum & 0xFFFF) + (cksum >> 16); 2463 cksum = (cksum & 0xFFFF) + (cksum >> 16); 2464 ASSERT(cksum < 0x10000); 2465 return (ntohs(cksum)); 2466 } 2467 2468 static uint32_t 2469 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns) 2470 { 2471 uint8_t *end; 2472 ip6_rthdr_t *rth; 2473 uint32_t cksum; 2474 2475 end = (uint8_t *)ip6h + ip_hdr_len; 2476 rth = ip_find_rthdr_v6(ip6h, end); 2477 if (rth == NULL) 2478 return (0); 2479 2480 cksum = ip_massage_options_v6(ip6h, rth, ns); 2481 cksum = (cksum & 0xFFFF) + (cksum >> 16); 2482 ASSERT(cksum < 0x10000); 2483 return (ntohs(cksum)); 2484 } 2485 2486 /* 2487 * ULPs that change the destination address need to call this for each 2488 * change to discard any state about a previous destination that might 2489 * have been multicast or multirt. 2490 */ 2491 void 2492 ip_attr_newdst(ip_xmit_attr_t *ixa) 2493 { 2494 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM | 2495 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR | 2496 IXAF_NO_LOOP_ZONEID_SET); 2497 } 2498 2499 /* 2500 * Determine the nexthop which will be used. 2501 * Normally this is just the destination, but if a IPv4 source route, or 2502 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from 2503 * there. 2504 */ 2505 void 2506 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa, 2507 const in6_addr_t *dst, in6_addr_t *nexthop) 2508 { 2509 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) { 2510 *nexthop = *dst; 2511 return; 2512 } 2513 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2514 ipaddr_t v4dst; 2515 ipaddr_t v4nexthop; 2516 2517 IN6_V4MAPPED_TO_IPADDR(dst, v4dst); 2518 v4nexthop = ip_pkt_source_route_v4(ipp); 2519 if (v4nexthop == INADDR_ANY) 2520 v4nexthop = v4dst; 2521 2522 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop); 2523 } else { 2524 const in6_addr_t *v6nexthop; 2525 2526 v6nexthop = ip_pkt_source_route_v6(ipp); 2527 if (v6nexthop == NULL) 2528 v6nexthop = dst; 2529 2530 *nexthop = *v6nexthop; 2531 } 2532 } 2533 2534 /* 2535 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa. 2536 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected 2537 * case (connected latching is done in conn_connect). 2538 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be 2539 * set, but doesn't otherwise use the conn_t. 2540 * 2541 * Caller must set/clear IXAF_IS_IPV4 as appropriately. 2542 * Caller must use ip_attr_nexthop() to determine the nexthop argument. 2543 * 2544 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele 2545 * causing the squeue to run doing ipcl_walk grabbing conn_lock.) 2546 * 2547 * Updates laddrp and uinfo if they are non-NULL. 2548 * 2549 * TSOL notes: The callers if ip_attr_connect must check if the destination 2550 * is different than before and in that case redo conn_update_label. 2551 * The callers of conn_connect do not need that since conn_connect 2552 * performs the conn_update_label. 2553 */ 2554 int 2555 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa, 2556 const in6_addr_t *v6src, const in6_addr_t *v6dst, 2557 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp, 2558 iulp_t *uinfo, uint32_t flags) 2559 { 2560 in6_addr_t laddr = *v6src; 2561 int error; 2562 2563 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 2564 2565 if (connp->conn_zone_is_global) 2566 flags |= IPDF_ZONE_IS_GLOBAL; 2567 else 2568 flags &= ~IPDF_ZONE_IS_GLOBAL; 2569 2570 /* 2571 * Lookup the route to determine a source address and the uinfo. 2572 * If the ULP has a source route option then the caller will 2573 * have set v6nexthop to be the first hop. 2574 */ 2575 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2576 ipaddr_t v4dst; 2577 ipaddr_t v4src, v4nexthop; 2578 2579 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst); 2580 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop); 2581 IN6_V4MAPPED_TO_IPADDR(v6src, v4src); 2582 2583 if (connp->conn_unspec_src || v4src != INADDR_ANY) 2584 flags &= ~IPDF_SELECT_SRC; 2585 else 2586 flags |= IPDF_SELECT_SRC; 2587 2588 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa, 2589 uinfo, flags, connp->conn_mac_mode); 2590 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr); 2591 } else { 2592 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src)) 2593 flags &= ~IPDF_SELECT_SRC; 2594 else 2595 flags |= IPDF_SELECT_SRC; 2596 2597 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa, 2598 uinfo, flags, connp->conn_mac_mode); 2599 } 2600 /* Pass out some address even if we hit a RTF_REJECT etc */ 2601 if (laddrp != NULL) 2602 *laddrp = laddr; 2603 2604 if (error != 0) 2605 return (error); 2606 2607 if (flags & IPDF_IPSEC) { 2608 /* 2609 * Set any IPsec policy in ixa. Routine also looks at ULP 2610 * ports. 2611 */ 2612 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa); 2613 } 2614 return (0); 2615 } 2616 2617 /* 2618 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa. 2619 * Assumes that conn_faddr and conn_fport are already set. As such it is not 2620 * usable for SCTP, since SCTP has multiple faddrs. 2621 * 2622 * Caller must hold conn_lock to provide atomic constency between the 2623 * conn_t's addresses and the ixa. 2624 * NOTE: this function drops and reaquires conn_lock since it can't be 2625 * held across ip_attr_connect/ip_set_destination. 2626 * 2627 * The caller needs to handle inserting in the receive-side fanout when 2628 * appropriate after conn_connect returns. 2629 */ 2630 int 2631 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags) 2632 { 2633 ip_xmit_attr_t *ixa = connp->conn_ixa; 2634 in6_addr_t nexthop; 2635 in6_addr_t saddr, faddr; 2636 in_port_t fport; 2637 int error; 2638 2639 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2640 2641 if (connp->conn_ipversion == IPV4_VERSION) 2642 ixa->ixa_flags |= IXAF_IS_IPV4; 2643 else 2644 ixa->ixa_flags &= ~IXAF_IS_IPV4; 2645 2646 /* We do IPsec latching below - hence no caching in ip_attr_connect */ 2647 flags &= ~IPDF_IPSEC; 2648 2649 /* In case we had previously done an ip_attr_connect */ 2650 ip_attr_newdst(ixa); 2651 2652 /* 2653 * Determine the nexthop and copy the addresses before dropping 2654 * conn_lock. 2655 */ 2656 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa, 2657 &connp->conn_faddr_v6, &nexthop); 2658 saddr = connp->conn_saddr_v6; 2659 faddr = connp->conn_faddr_v6; 2660 fport = connp->conn_fport; 2661 2662 mutex_exit(&connp->conn_lock); 2663 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport, 2664 &saddr, uinfo, flags | IPDF_VERIFY_DST); 2665 mutex_enter(&connp->conn_lock); 2666 2667 /* Could have changed even if an error */ 2668 connp->conn_saddr_v6 = saddr; 2669 if (error != 0) 2670 return (error); 2671 2672 /* 2673 * Check whether Trusted Solaris policy allows communication with this 2674 * host, and pretend that the destination is unreachable if not. 2675 * Compute any needed label and place it in ipp_label_v4/v6. 2676 * 2677 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form 2678 * the packet. 2679 * 2680 * TSOL Note: Any concurrent threads would pick a different ixa 2681 * (and ipp if they are to change the ipp) so we 2682 * don't have to worry about concurrent threads. 2683 */ 2684 if (is_system_labeled()) { 2685 if (connp->conn_mlp_type != mlptSingle) 2686 return (ECONNREFUSED); 2687 2688 /* 2689 * conn_update_label will set ipp_label* which will later 2690 * be used by conn_build_hdr_template. 2691 */ 2692 error = conn_update_label(connp, ixa, 2693 &connp->conn_faddr_v6, &connp->conn_xmit_ipp); 2694 if (error != 0) 2695 return (error); 2696 } 2697 2698 /* 2699 * Ensure that we match on the selected local address. 2700 * This overrides conn_laddr in the case we had earlier bound to a 2701 * multicast or broadcast address. 2702 */ 2703 connp->conn_laddr_v6 = connp->conn_saddr_v6; 2704 2705 /* 2706 * Allow setting new policies. 2707 * The addresses/ports are already set, thus the IPsec policy calls 2708 * can handle their passed-in conn's. 2709 */ 2710 connp->conn_policy_cached = B_FALSE; 2711 2712 /* 2713 * Cache IPsec policy in this conn. If we have per-socket policy, 2714 * we'll cache that. If we don't, we'll inherit global policy. 2715 * 2716 * This is done before the caller inserts in the receive-side fanout. 2717 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even 2718 * for connections where we don't have a policy. This is to prevent 2719 * global policy lookups in the inbound path. 2720 * 2721 * If we insert before we set conn_policy_cached, 2722 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true 2723 * because global policy cound be non-empty. We normally call 2724 * ipsec_check_policy() for conn_policy_cached connections only if 2725 * conn_in_enforce_policy is set. But in this case, 2726 * conn_policy_cached can get set anytime since we made the 2727 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is 2728 * called, which will make the above assumption false. Thus, we 2729 * need to insert after we set conn_policy_cached. 2730 */ 2731 error = ipsec_conn_cache_policy(connp, 2732 connp->conn_ipversion == IPV4_VERSION); 2733 if (error != 0) 2734 return (error); 2735 2736 /* 2737 * We defer to do LSO check until here since now we have better idea 2738 * whether IPsec is present. If the underlying ill is LSO capable, 2739 * copy its capability in so the ULP can decide whether to enable LSO 2740 * on this connection. So far, only TCP/IPv4 is implemented, so won't 2741 * claim LSO for IPv6. 2742 * 2743 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because 2744 * the receiver can not handle it. Also not to enable LSO for MULTIRT. 2745 */ 2746 ixa->ixa_flags &= ~IXAF_LSO_CAPAB; 2747 2748 ASSERT(ixa->ixa_ire != NULL); 2749 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) && 2750 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) && 2751 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && 2752 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) && 2753 (ixa->ixa_nce != NULL) && 2754 ((ixa->ixa_flags & IXAF_IS_IPV4) ? 2755 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) : 2756 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) { 2757 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab; 2758 ixa->ixa_flags |= IXAF_LSO_CAPAB; 2759 } 2760 2761 /* Check whether ZEROCOPY capability is usable for this connection. */ 2762 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB; 2763 2764 if ((flags & IPDF_ZCOPY) && 2765 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) && 2766 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && 2767 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) && 2768 (ixa->ixa_nce != NULL) && 2769 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) { 2770 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB; 2771 } 2772 return (0); 2773 } 2774 2775 /* 2776 * Predicates to check if the addresses match conn_last* 2777 */ 2778 2779 /* 2780 * Compare the conn against an address. 2781 * If using mapped addresses on AF_INET6 sockets, use the _v6 function 2782 */ 2783 boolean_t 2784 conn_same_as_last_v4(conn_t *connp, sin_t *sin) 2785 { 2786 ASSERT(connp->conn_family == AF_INET); 2787 return (sin->sin_addr.s_addr == connp->conn_v4lastdst && 2788 sin->sin_port == connp->conn_lastdstport); 2789 } 2790 2791 /* 2792 * Compare, including for mapped addresses 2793 */ 2794 boolean_t 2795 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6) 2796 { 2797 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) && 2798 sin6->sin6_port == connp->conn_lastdstport && 2799 sin6->sin6_flowinfo == connp->conn_lastflowinfo && 2800 sin6->sin6_scope_id == connp->conn_lastscopeid); 2801 } 2802 2803 /* 2804 * Compute a label and place it in the ip_packet_t. 2805 * Handles IPv4 and IPv6. 2806 * The caller should have a correct ixa_tsl and ixa_zoneid and have 2807 * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest 2808 * has been called. 2809 */ 2810 int 2811 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa, 2812 const in6_addr_t *v6dst, ip_pkt_t *ipp) 2813 { 2814 int err; 2815 ipaddr_t v4dst; 2816 2817 if (IN6_IS_ADDR_V4MAPPED(v6dst)) { 2818 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 2819 2820 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst); 2821 2822 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid, 2823 v4dst, opt_storage, ixa->ixa_ipst); 2824 if (err == 0) { 2825 /* Length contained in opt_storage[IPOPT_OLEN] */ 2826 err = optcom_pkt_set(opt_storage, 2827 opt_storage[IPOPT_OLEN], 2828 (uchar_t **)&ipp->ipp_label_v4, 2829 &ipp->ipp_label_len_v4); 2830 } 2831 if (err != 0) { 2832 DTRACE_PROBE4(tx__ip__log__info__updatelabel, 2833 char *, "conn(1) failed to update options(2) " 2834 "on ixa(3)", 2835 conn_t *, connp, char *, opt_storage, 2836 ip_xmit_attr_t *, ixa); 2837 } 2838 if (ipp->ipp_label_len_v4 != 0) 2839 ipp->ipp_fields |= IPPF_LABEL_V4; 2840 else 2841 ipp->ipp_fields &= ~IPPF_LABEL_V4; 2842 } else { 2843 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 2844 uint_t optlen; 2845 2846 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid, 2847 v6dst, opt_storage, ixa->ixa_ipst); 2848 if (err == 0) { 2849 /* 2850 * Note that ipp_label_v6 is just the option - not 2851 * the hopopts extension header. 2852 * 2853 * Length contained in opt_storage[IPOPT_OLEN], but 2854 * that doesn't include the two byte options header. 2855 */ 2856 optlen = opt_storage[IPOPT_OLEN]; 2857 if (optlen != 0) 2858 optlen += 2; 2859 2860 err = optcom_pkt_set(opt_storage, optlen, 2861 (uchar_t **)&ipp->ipp_label_v6, 2862 &ipp->ipp_label_len_v6); 2863 } 2864 if (err != 0) { 2865 DTRACE_PROBE4(tx__ip__log__info__updatelabel, 2866 char *, "conn(1) failed to update options(2) " 2867 "on ixa(3)", 2868 conn_t *, connp, char *, opt_storage, 2869 ip_xmit_attr_t *, ixa); 2870 } 2871 if (ipp->ipp_label_len_v6 != 0) 2872 ipp->ipp_fields |= IPPF_LABEL_V6; 2873 else 2874 ipp->ipp_fields &= ~IPPF_LABEL_V6; 2875 } 2876 return (err); 2877 } 2878 2879 /* 2880 * Inherit all options settings from the parent/listener to the eager. 2881 * Returns zero on success; ENOMEM if memory allocation failed. 2882 * 2883 * We assume that the eager has not had any work done i.e., the conn_ixa 2884 * and conn_xmit_ipp are all zero. 2885 * Furthermore we assume that no other thread can access the eager (because 2886 * it isn't inserted in any fanout list). 2887 */ 2888 int 2889 conn_inherit_parent(conn_t *lconnp, conn_t *econnp) 2890 { 2891 cred_t *credp; 2892 int err; 2893 void *notify_cookie; 2894 uint32_t xmit_hint; 2895 2896 econnp->conn_family = lconnp->conn_family; 2897 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only; 2898 econnp->conn_wq = lconnp->conn_wq; 2899 econnp->conn_rq = lconnp->conn_rq; 2900 2901 /* 2902 * Make a safe copy of the transmit attributes. 2903 * conn_connect will later be used by the caller to setup the ire etc. 2904 */ 2905 ASSERT(econnp->conn_ixa->ixa_refcnt == 1); 2906 ASSERT(econnp->conn_ixa->ixa_ire == NULL); 2907 ASSERT(econnp->conn_ixa->ixa_dce == NULL); 2908 ASSERT(econnp->conn_ixa->ixa_nce == NULL); 2909 2910 /* Preserve ixa_notify_cookie and xmit_hint */ 2911 notify_cookie = econnp->conn_ixa->ixa_notify_cookie; 2912 xmit_hint = econnp->conn_ixa->ixa_xmit_hint; 2913 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa); 2914 econnp->conn_ixa->ixa_notify_cookie = notify_cookie; 2915 econnp->conn_ixa->ixa_xmit_hint = xmit_hint; 2916 2917 econnp->conn_bound_if = lconnp->conn_bound_if; 2918 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex; 2919 2920 /* Inherit all RECV options */ 2921 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary; 2922 2923 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp, 2924 KM_NOSLEEP); 2925 if (err != 0) 2926 return (err); 2927 2928 econnp->conn_zoneid = lconnp->conn_zoneid; 2929 econnp->conn_allzones = lconnp->conn_allzones; 2930 2931 /* This is odd. Pick a flowlabel for each connection instead? */ 2932 econnp->conn_flowinfo = lconnp->conn_flowinfo; 2933 2934 econnp->conn_default_ttl = lconnp->conn_default_ttl; 2935 econnp->conn_min_ttl = lconnp->conn_min_ttl; 2936 2937 /* 2938 * TSOL: tsol_input_proc() needs the eager's cred before the 2939 * eager is accepted 2940 */ 2941 ASSERT(lconnp->conn_cred != NULL); 2942 econnp->conn_cred = credp = lconnp->conn_cred; 2943 crhold(credp); 2944 econnp->conn_cpid = lconnp->conn_cpid; 2945 econnp->conn_open_time = ddi_get_lbolt64(); 2946 2947 /* 2948 * Cache things in the ixa without any refhold. 2949 * Listener might not have set up ixa_cred 2950 */ 2951 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 2952 econnp->conn_ixa->ixa_cred = econnp->conn_cred; 2953 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid; 2954 if (is_system_labeled()) 2955 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred); 2956 2957 /* 2958 * If the caller has the process-wide flag set, then default to MAC 2959 * exempt mode. This allows read-down to unlabeled hosts. 2960 */ 2961 if (getpflags(NET_MAC_AWARE, credp) != 0) 2962 econnp->conn_mac_mode = CONN_MAC_AWARE; 2963 2964 econnp->conn_zone_is_global = lconnp->conn_zone_is_global; 2965 2966 /* 2967 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ 2968 * via soaccept()->soinheritoptions() which essentially applies 2969 * all the listener options to the new connection. The options that we 2970 * need to take care of are: 2971 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST, 2972 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER, 2973 * SO_SNDBUF, SO_RCVBUF. 2974 * 2975 * SO_RCVBUF: conn_rcvbuf is set. 2976 * SO_SNDBUF: conn_sndbuf is set. 2977 */ 2978 2979 /* Could we define a struct and use a struct copy for this? */ 2980 econnp->conn_sndbuf = lconnp->conn_sndbuf; 2981 econnp->conn_rcvbuf = lconnp->conn_rcvbuf; 2982 econnp->conn_sndlowat = lconnp->conn_sndlowat; 2983 econnp->conn_rcvlowat = lconnp->conn_rcvlowat; 2984 econnp->conn_dgram_errind = lconnp->conn_dgram_errind; 2985 econnp->conn_oobinline = lconnp->conn_oobinline; 2986 econnp->conn_debug = lconnp->conn_debug; 2987 econnp->conn_keepalive = lconnp->conn_keepalive; 2988 econnp->conn_linger = lconnp->conn_linger; 2989 econnp->conn_lingertime = lconnp->conn_lingertime; 2990 2991 /* Set the IP options */ 2992 econnp->conn_broadcast = lconnp->conn_broadcast; 2993 econnp->conn_useloopback = lconnp->conn_useloopback; 2994 econnp->conn_reuseaddr = lconnp->conn_reuseaddr; 2995 return (0); 2996 } 2997