1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/kmem_impl.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/open.h> 44 #include <sys/user.h> 45 #include <sys/termios.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/suntpi.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <sys/sodirect.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <inet/kssl/ksslapi.h> 85 86 #include <fs/sockfs/sockcommon.h> 87 #include <fs/sockfs/socktpi.h> 88 #include <fs/sockfs/socktpi_impl.h> 89 90 /* 91 * Possible failures when memory can't be allocated. The documented behavior: 92 * 93 * 5.5: 4.X: XNET: 94 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 95 * EINTR 96 * (4.X does not document EINTR but returns it) 97 * bind: ENOSR - ENOBUFS/ENOSR 98 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 99 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 100 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 101 * (4.X getpeername and getsockname do not fail in practice) 102 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 103 * listen: - - ENOBUFS 104 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 107 * EINTR 108 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 109 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 110 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 111 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 112 * 113 * Resolution. When allocation fails: 114 * recv: return EINTR 115 * send: return EINTR 116 * connect, accept: EINTR 117 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 118 * socket, socketpair: ENOBUFS 119 * getpeername, getsockname: sleep 120 * getsockopt, setsockopt: sleep 121 */ 122 123 #ifdef SOCK_TEST 124 /* 125 * Variables that make sockfs do something other than the standard TPI 126 * for the AF_INET transports. 127 * 128 * solisten_tpi_tcp: 129 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 130 * the transport is already bound. This is needed to avoid loosing the 131 * port number should listen() do a T_UNBIND_REQ followed by a 132 * O_T_BIND_REQ. 133 * 134 * soconnect_tpi_udp: 135 * UDP and ICMP can handle a T_CONN_REQ. 136 * This is needed to make the sequence of connect(), getsockname() 137 * return the local IP address used to send packets to the connected to 138 * destination. 139 * 140 * soconnect_tpi_tcp: 141 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 142 * Set this to non-zero to send TPI conformant messages to TCP in this 143 * respect. This is a performance optimization. 144 * 145 * soaccept_tpi_tcp: 146 * TCP can handle a T_CONN_REQ without the acceptor being bound. 147 * This is a performance optimization that has been picked up in XTI. 148 * 149 * soaccept_tpi_multioptions: 150 * When inheriting SOL_SOCKET options from the listener to the accepting 151 * socket send them as a single message for AF_INET{,6}. 152 */ 153 int solisten_tpi_tcp = 0; 154 int soconnect_tpi_udp = 0; 155 int soconnect_tpi_tcp = 0; 156 int soaccept_tpi_tcp = 0; 157 int soaccept_tpi_multioptions = 1; 158 #else /* SOCK_TEST */ 159 #define soconnect_tpi_tcp 0 160 #define soconnect_tpi_udp 0 161 #define solisten_tpi_tcp 0 162 #define soaccept_tpi_tcp 0 163 #define soaccept_tpi_multioptions 1 164 #endif /* SOCK_TEST */ 165 166 #ifdef SOCK_TEST 167 extern int do_useracc; 168 extern clock_t sock_test_timelimit; 169 #endif /* SOCK_TEST */ 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 /* Sockets acting as an in-kernel SSL proxy */ 203 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 204 strsigset_t *, strsigset_t *, strpollset_t *); 205 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 206 strsigset_t *, strsigset_t *, strpollset_t *); 207 208 static int sotpi_unbind(struct sonode *, int); 209 210 extern int sodput(sodirect_t *, mblk_t *); 211 extern void sodwakeup(sodirect_t *); 212 213 /* TPI sockfs sonode operations */ 214 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 215 int); 216 static int sotpi_accept(struct sonode *, int, struct cred *, 217 struct sonode **); 218 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 219 int, struct cred *); 220 static int sotpi_listen(struct sonode *, int, struct cred *); 221 static int sotpi_connect(struct sonode *, const struct sockaddr *, 222 socklen_t, int, int, struct cred *); 223 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 224 struct uio *, struct cred *); 225 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 226 struct uio *, struct cred *); 227 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 228 struct cred *, mblk_t **); 229 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 230 struct uio *, void *, t_uscalar_t, int); 231 static int sodgram_direct(struct sonode *, struct sockaddr *, 232 socklen_t, struct uio *, int); 233 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 234 socklen_t *, boolean_t, struct cred *); 235 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 236 socklen_t *, struct cred *); 237 static int sotpi_shutdown(struct sonode *, int, struct cred *); 238 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 239 socklen_t *, int, struct cred *); 240 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 241 socklen_t, struct cred *); 242 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 243 int32_t *); 244 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 245 struct cred *, int32_t *); 246 static int sotpi_poll(struct sonode *, short, int, short *, 247 struct pollhead **); 248 static int sotpi_close(struct sonode *, int, struct cred *); 249 250 static int i_sotpi_info_constructor(sotpi_info_t *); 251 static void i_sotpi_info_destructor(sotpi_info_t *); 252 253 sonodeops_t sotpi_sonodeops = { 254 sotpi_init, /* sop_init */ 255 sotpi_accept, /* sop_accept */ 256 sotpi_bind, /* sop_bind */ 257 sotpi_listen, /* sop_listen */ 258 sotpi_connect, /* sop_connect */ 259 sotpi_recvmsg, /* sop_recvmsg */ 260 sotpi_sendmsg, /* sop_sendmsg */ 261 sotpi_sendmblk, /* sop_sendmblk */ 262 sotpi_getpeername, /* sop_getpeername */ 263 sotpi_getsockname, /* sop_getsockname */ 264 sotpi_shutdown, /* sop_shutdown */ 265 sotpi_getsockopt, /* sop_getsockopt */ 266 sotpi_setsockopt, /* sop_setsockopt */ 267 sotpi_ioctl, /* sop_ioctl */ 268 sotpi_poll, /* sop_poll */ 269 sotpi_close, /* sop_close */ 270 }; 271 272 /* 273 * Return a TPI socket vnode. 274 * 275 * Note that sockets assume that the driver will clone (either itself 276 * or by using the clone driver) i.e. a socket() call will always 277 * result in a new vnode being created. 278 */ 279 280 /* 281 * Common create code for socket and accept. If tso is set the values 282 * from that node is used instead of issuing a T_INFO_REQ. 283 */ 284 285 /* ARGSUSED */ 286 static struct sonode * 287 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 288 int version, int sflags, int *errorp, cred_t *cr) 289 { 290 struct sonode *so; 291 kmem_cache_t *cp; 292 int sfamily = family; 293 294 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 295 296 if (family == AF_NCA) { 297 /* 298 * The request is for an NCA socket so for NL7C use the 299 * INET domain instead and mark NL7C_AF_NCA below. 300 */ 301 family = AF_INET; 302 /* 303 * NL7C is not supported in the non-global zone, 304 * we enforce this restriction here. 305 */ 306 if (getzoneid() != GLOBAL_ZONEID) { 307 *errorp = ENOTSUP; 308 return (NULL); 309 } 310 } 311 312 /* 313 * to be compatible with old tpi socket implementation ignore 314 * sleep flag (sflags) passed in 315 */ 316 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 317 so = kmem_cache_alloc(cp, KM_SLEEP); 318 if (so == NULL) { 319 *errorp = ENOMEM; 320 return (NULL); 321 } 322 323 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 324 sotpi_info_init(so); 325 326 if (sfamily == AF_NCA) { 327 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 328 } 329 330 if (version == SOV_DEFAULT) 331 version = so_default_version; 332 333 so->so_version = (short)version; 334 *errorp = 0; 335 336 return (so); 337 } 338 339 static void 340 sotpi_destroy(struct sonode *so) 341 { 342 kmem_cache_t *cp; 343 struct sockparams *origsp; 344 345 /* 346 * If there is a new dealloc function (ie. smod_destroy_func), 347 * then it should check the correctness of the ops. 348 */ 349 350 ASSERT(so->so_ops == &sotpi_sonodeops); 351 352 origsp = SOTOTPI(so)->sti_orig_sp; 353 354 sotpi_info_fini(so); 355 356 if (so->so_state & SS_FALLBACK_COMP) { 357 /* 358 * A fallback happend, which means that a sotpi_info_t struct 359 * was allocated (as opposed to being allocated from the TPI 360 * sonode cache. Therefore we explicitly free the struct 361 * here. 362 */ 363 sotpi_info_destroy(so); 364 ASSERT(origsp != NULL); 365 366 origsp->sp_smod_info->smod_sock_destroy_func(so); 367 SOCKPARAMS_DEC_REF(origsp); 368 } else { 369 sonode_fini(so); 370 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 371 socktpi_cache; 372 kmem_cache_free(cp, so); 373 } 374 } 375 376 /* ARGSUSED1 */ 377 int 378 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 379 { 380 major_t maj; 381 dev_t newdev; 382 struct vnode *vp; 383 int error = 0; 384 struct stdata *stp; 385 386 sotpi_info_t *sti = SOTOTPI(so); 387 388 dprint(1, ("sotpi_init()\n")); 389 390 /* 391 * over write the sleep flag passed in but that is ok 392 * as tpi socket does not honor sleep flag. 393 */ 394 flags |= FREAD|FWRITE; 395 396 /* 397 * Record in so_flag that it is a clone. 398 */ 399 if (getmajor(sti->sti_dev) == clone_major) 400 so->so_flag |= SOCLONE; 401 402 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 403 (so->so_family == AF_INET || so->so_family == AF_INET6) && 404 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 405 so->so_protocol == IPPROTO_IP)) { 406 /* Tell tcp or udp that it's talking to sockets */ 407 flags |= SO_SOCKSTR; 408 409 /* 410 * Here we indicate to socktpi_open() our attempt to 411 * make direct calls between sockfs and transport. 412 * The final decision is left to socktpi_open(). 413 */ 414 sti->sti_direct = 1; 415 416 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 417 if (so->so_type == SOCK_STREAM && tso != NULL) { 418 if (SOTOTPI(tso)->sti_direct) { 419 /* 420 * Inherit sti_direct from listener and pass 421 * SO_ACCEPTOR open flag to tcp, indicating 422 * that this is an accept fast-path instance. 423 */ 424 flags |= SO_ACCEPTOR; 425 } else { 426 /* 427 * sti_direct is not set on listener, meaning 428 * that the listener has been converted from 429 * a socket to a stream. Ensure that the 430 * acceptor inherits these settings. 431 */ 432 sti->sti_direct = 0; 433 flags &= ~SO_SOCKSTR; 434 } 435 } 436 } 437 438 /* 439 * Tell local transport that it is talking to sockets. 440 */ 441 if (so->so_family == AF_UNIX) { 442 flags |= SO_SOCKSTR; 443 } 444 445 vp = SOTOV(so); 446 newdev = vp->v_rdev; 447 maj = getmajor(newdev); 448 ASSERT(STREAMSTAB(maj)); 449 450 error = stropen(vp, &newdev, flags, cr); 451 452 stp = vp->v_stream; 453 if (error == 0) { 454 if (so->so_flag & SOCLONE) 455 ASSERT(newdev != vp->v_rdev); 456 mutex_enter(&so->so_lock); 457 sti->sti_dev = newdev; 458 vp->v_rdev = newdev; 459 mutex_exit(&so->so_lock); 460 461 if (stp->sd_flag & STRISTTY) { 462 /* 463 * this is a post SVR4 tty driver - a socket can not 464 * be a controlling terminal. Fail the open. 465 */ 466 (void) sotpi_close(so, flags, cr); 467 return (ENOTTY); /* XXX */ 468 } 469 470 ASSERT(stp->sd_wrq != NULL); 471 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 472 473 /* 474 * If caller is interested in doing direct function call 475 * interface to/from transport module, probe the module 476 * directly beneath the streamhead to see if it qualifies. 477 * 478 * We turn off the direct interface when qualifications fail. 479 * In the acceptor case, we simply turn off the sti_direct 480 * flag on the socket. We do the fallback after the accept 481 * has completed, before the new socket is returned to the 482 * application. 483 */ 484 if (sti->sti_direct) { 485 queue_t *tq = stp->sd_wrq->q_next; 486 487 /* 488 * sti_direct is currently supported and tested 489 * only for tcp/udp; this is the main reason to 490 * have the following assertions. 491 */ 492 ASSERT(so->so_family == AF_INET || 493 so->so_family == AF_INET6); 494 ASSERT(so->so_protocol == IPPROTO_UDP || 495 so->so_protocol == IPPROTO_TCP || 496 so->so_protocol == IPPROTO_IP); 497 ASSERT(so->so_type == SOCK_DGRAM || 498 so->so_type == SOCK_STREAM); 499 500 /* 501 * Abort direct call interface if the module directly 502 * underneath the stream head is not defined with the 503 * _D_DIRECT flag. This could happen in the tcp or 504 * udp case, when some other module is autopushed 505 * above it, or for some reasons the expected module 506 * isn't purely D_MP (which is the main requirement). 507 * 508 * Else, SS_DIRECT is valid. If the read-side Q has 509 * _QSODIRECT set then and uioasync is enabled then 510 * set SS_SODIRECT to enable sodirect. 511 */ 512 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 513 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 514 int rval; 515 516 /* Continue on without direct calls */ 517 sti->sti_direct = 0; 518 519 /* 520 * Cannot issue ioctl on fallback socket since 521 * there is no conn associated with the queue. 522 * The fallback downcall will notify the proto 523 * of the change. 524 */ 525 if (!(flags & SO_ACCEPTOR) && 526 !(flags & SO_FALLBACK)) { 527 if ((error = strioctl(vp, 528 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 529 cr, &rval)) != 0) { 530 (void) sotpi_close(so, flags, 531 cr); 532 return (error); 533 } 534 } 535 } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) && 536 uioasync.enabled) { 537 /* Enable sodirect */ 538 so->so_state |= SS_SODIRECT; 539 } 540 } 541 542 if (flags & SO_FALLBACK) { 543 /* 544 * The stream created does not have a conn. 545 * do stream set up after conn has been assigned 546 */ 547 return (error); 548 } 549 if (error = so_strinit(so, tso)) { 550 (void) sotpi_close(so, flags, cr); 551 return (error); 552 } 553 554 /* Wildcard */ 555 if (so->so_protocol != so->so_sockparams->sp_protocol) { 556 int protocol = so->so_protocol; 557 /* 558 * Issue SO_PROTOTYPE setsockopt. 559 */ 560 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 561 &protocol, (t_uscalar_t)sizeof (protocol), cr); 562 if (error != 0) { 563 (void) sotpi_close(so, flags, cr); 564 /* 565 * Setsockopt often fails with ENOPROTOOPT but 566 * socket() should fail with 567 * EPROTONOSUPPORT/EPROTOTYPE. 568 */ 569 return (EPROTONOSUPPORT); 570 } 571 } 572 573 } else { 574 /* 575 * While the same socket can not be reopened (unlike specfs) 576 * the stream head sets STREOPENFAIL when the autopush fails. 577 */ 578 if ((stp != NULL) && 579 (stp->sd_flag & STREOPENFAIL)) { 580 /* 581 * Open failed part way through. 582 */ 583 mutex_enter(&stp->sd_lock); 584 stp->sd_flag &= ~STREOPENFAIL; 585 mutex_exit(&stp->sd_lock); 586 (void) sotpi_close(so, flags, cr); 587 return (error); 588 /*NOTREACHED*/ 589 } 590 ASSERT(stp == NULL); 591 } 592 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 593 "sockfs open:maj %d vp %p so %p error %d", 594 maj, vp, so, error); 595 return (error); 596 } 597 598 /* 599 * Bind the socket to an unspecified address in sockfs only. 600 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 601 * required in all cases. 602 */ 603 static void 604 so_automatic_bind(struct sonode *so) 605 { 606 sotpi_info_t *sti = SOTOTPI(so); 607 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 608 609 ASSERT(MUTEX_HELD(&so->so_lock)); 610 ASSERT(!(so->so_state & SS_ISBOUND)); 611 ASSERT(sti->sti_unbind_mp); 612 613 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 614 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 615 sti->sti_laddr_sa->sa_family = so->so_family; 616 so->so_state |= SS_ISBOUND; 617 } 618 619 620 /* 621 * bind the socket. 622 * 623 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 624 * are passed in we allow rebinding. Note that for backwards compatibility 625 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 626 * Thus the rebinding code is currently not executed. 627 * 628 * The constraints for rebinding are: 629 * - it is a SOCK_DGRAM, or 630 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 631 * and no listen() has been done. 632 * This rebinding code was added based on some language in the XNET book 633 * about not returning EINVAL it the protocol allows rebinding. However, 634 * this language is not present in the Posix socket draft. Thus maybe the 635 * rebinding logic should be deleted from the source. 636 * 637 * A null "name" can be used to unbind the socket if: 638 * - it is a SOCK_DGRAM, or 639 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 640 * and no listen() has been done. 641 */ 642 /* ARGSUSED */ 643 static int 644 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 645 socklen_t namelen, int backlog, int flags, struct cred *cr) 646 { 647 struct T_bind_req bind_req; 648 struct T_bind_ack *bind_ack; 649 int error = 0; 650 mblk_t *mp; 651 void *addr; 652 t_uscalar_t addrlen; 653 int unbind_on_err = 1; 654 boolean_t clear_acceptconn_on_err = B_FALSE; 655 boolean_t restore_backlog_on_err = B_FALSE; 656 int save_so_backlog; 657 t_scalar_t PRIM_type = O_T_BIND_REQ; 658 boolean_t tcp_udp_xport; 659 void *nl7c = NULL; 660 sotpi_info_t *sti = SOTOTPI(so); 661 662 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 663 (void *)so, (void *)name, namelen, backlog, flags, 664 pr_state(so->so_state, so->so_mode))); 665 666 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 667 668 if (!(flags & _SOBIND_LOCK_HELD)) { 669 mutex_enter(&so->so_lock); 670 so_lock_single(so); /* Set SOLOCKED */ 671 } else { 672 ASSERT(MUTEX_HELD(&so->so_lock)); 673 ASSERT(so->so_flag & SOLOCKED); 674 } 675 676 /* 677 * Make sure that there is a preallocated unbind_req message 678 * before binding. This message allocated when the socket is 679 * created but it might be have been consumed. 680 */ 681 if (sti->sti_unbind_mp == NULL) { 682 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 683 /* NOTE: holding so_lock while sleeping */ 684 sti->sti_unbind_mp = 685 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 686 cr); 687 } 688 689 if (flags & _SOBIND_REBIND) { 690 /* 691 * Called from solisten after doing an sotpi_unbind() or 692 * potentially without the unbind (latter for AF_INET{,6}). 693 */ 694 ASSERT(name == NULL && namelen == 0); 695 696 if (so->so_family == AF_UNIX) { 697 ASSERT(sti->sti_ux_bound_vp); 698 addr = &sti->sti_ux_laddr; 699 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 700 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 701 "addr 0x%p, vp %p\n", 702 addrlen, 703 (void *)((struct so_ux_addr *)addr)->soua_vp, 704 (void *)sti->sti_ux_bound_vp)); 705 } else { 706 addr = sti->sti_laddr_sa; 707 addrlen = (t_uscalar_t)sti->sti_laddr_len; 708 } 709 } else if (flags & _SOBIND_UNSPEC) { 710 ASSERT(name == NULL && namelen == 0); 711 712 /* 713 * The caller checked SS_ISBOUND but not necessarily 714 * under so_lock 715 */ 716 if (so->so_state & SS_ISBOUND) { 717 /* No error */ 718 goto done; 719 } 720 721 /* Set an initial local address */ 722 switch (so->so_family) { 723 case AF_UNIX: 724 /* 725 * Use an address with same size as struct sockaddr 726 * just like BSD. 727 */ 728 sti->sti_laddr_len = 729 (socklen_t)sizeof (struct sockaddr); 730 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 731 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 732 sti->sti_laddr_sa->sa_family = so->so_family; 733 734 /* 735 * Pass down an address with the implicit bind 736 * magic number and the rest all zeros. 737 * The transport will return a unique address. 738 */ 739 sti->sti_ux_laddr.soua_vp = NULL; 740 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 741 addr = &sti->sti_ux_laddr; 742 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 743 break; 744 745 case AF_INET: 746 case AF_INET6: 747 /* 748 * An unspecified bind in TPI has a NULL address. 749 * Set the address in sockfs to have the sa_family. 750 */ 751 sti->sti_laddr_len = (so->so_family == AF_INET) ? 752 (socklen_t)sizeof (sin_t) : 753 (socklen_t)sizeof (sin6_t); 754 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 755 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 756 sti->sti_laddr_sa->sa_family = so->so_family; 757 addr = NULL; 758 addrlen = 0; 759 break; 760 761 default: 762 /* 763 * An unspecified bind in TPI has a NULL address. 764 * Set the address in sockfs to be zero length. 765 * 766 * Can not assume there is a sa_family for all 767 * protocol families. For example, AF_X25 does not 768 * have a family field. 769 */ 770 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 771 sti->sti_laddr_len = 0; /* XXX correct? */ 772 addr = NULL; 773 addrlen = 0; 774 break; 775 } 776 777 } else { 778 if (so->so_state & SS_ISBOUND) { 779 /* 780 * If it is ok to rebind the socket, first unbind 781 * with the transport. A rebind to the NULL address 782 * is interpreted as an unbind. 783 * Note that a bind to NULL in BSD does unbind the 784 * socket but it fails with EINVAL. 785 * Note that regular sockets set SOV_SOCKBSD i.e. 786 * _SOBIND_SOCKBSD gets set here hence no type of 787 * socket does currently allow rebinding. 788 * 789 * If the name is NULL just do an unbind. 790 */ 791 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 792 name != NULL) { 793 error = EINVAL; 794 unbind_on_err = 0; 795 eprintsoline(so, error); 796 goto done; 797 } 798 if ((so->so_mode & SM_CONNREQUIRED) && 799 (so->so_state & SS_CANTREBIND)) { 800 error = EINVAL; 801 unbind_on_err = 0; 802 eprintsoline(so, error); 803 goto done; 804 } 805 error = sotpi_unbind(so, 0); 806 if (error) { 807 eprintsoline(so, error); 808 goto done; 809 } 810 ASSERT(!(so->so_state & SS_ISBOUND)); 811 if (name == NULL) { 812 so->so_state &= 813 ~(SS_ISCONNECTED|SS_ISCONNECTING); 814 goto done; 815 } 816 } 817 818 /* X/Open requires this check */ 819 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 820 if (xnet_check_print) { 821 printf("sockfs: X/Open bind state check " 822 "caused EINVAL\n"); 823 } 824 error = EINVAL; 825 goto done; 826 } 827 828 switch (so->so_family) { 829 case AF_UNIX: 830 /* 831 * All AF_UNIX addresses are nul terminated 832 * when copied (copyin_name) in so the minimum 833 * length is 3 bytes. 834 */ 835 if (name == NULL || 836 (ssize_t)namelen <= sizeof (short) + 1) { 837 error = EISDIR; 838 eprintsoline(so, error); 839 goto done; 840 } 841 /* 842 * Verify so_family matches the bound family. 843 * BSD does not check this for AF_UNIX resulting 844 * in funny mknods. 845 */ 846 if (name->sa_family != so->so_family) { 847 error = EAFNOSUPPORT; 848 goto done; 849 } 850 break; 851 case AF_INET: 852 if (name == NULL) { 853 error = EINVAL; 854 eprintsoline(so, error); 855 goto done; 856 } 857 if ((size_t)namelen != sizeof (sin_t)) { 858 error = name->sa_family != so->so_family ? 859 EAFNOSUPPORT : EINVAL; 860 eprintsoline(so, error); 861 goto done; 862 } 863 if ((flags & _SOBIND_XPG4_2) && 864 (name->sa_family != so->so_family)) { 865 /* 866 * This check has to be made for X/Open 867 * sockets however application failures have 868 * been observed when it is applied to 869 * all sockets. 870 */ 871 error = EAFNOSUPPORT; 872 eprintsoline(so, error); 873 goto done; 874 } 875 /* 876 * Force a zero sa_family to match so_family. 877 * 878 * Some programs like inetd(1M) don't set the 879 * family field. Other programs leave 880 * sin_family set to garbage - SunOS 4.X does 881 * not check the family field on a bind. 882 * We use the family field that 883 * was passed in to the socket() call. 884 */ 885 name->sa_family = so->so_family; 886 break; 887 888 case AF_INET6: { 889 #ifdef DEBUG 890 sin6_t *sin6 = (sin6_t *)name; 891 #endif /* DEBUG */ 892 893 if (name == NULL) { 894 error = EINVAL; 895 eprintsoline(so, error); 896 goto done; 897 } 898 if ((size_t)namelen != sizeof (sin6_t)) { 899 error = name->sa_family != so->so_family ? 900 EAFNOSUPPORT : EINVAL; 901 eprintsoline(so, error); 902 goto done; 903 } 904 if (name->sa_family != so->so_family) { 905 /* 906 * With IPv6 we require the family to match 907 * unlike in IPv4. 908 */ 909 error = EAFNOSUPPORT; 910 eprintsoline(so, error); 911 goto done; 912 } 913 #ifdef DEBUG 914 /* 915 * Verify that apps don't forget to clear 916 * sin6_scope_id etc 917 */ 918 if (sin6->sin6_scope_id != 0 && 919 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 920 zcmn_err(getzoneid(), CE_WARN, 921 "bind with uninitialized sin6_scope_id " 922 "(%d) on socket. Pid = %d\n", 923 (int)sin6->sin6_scope_id, 924 (int)curproc->p_pid); 925 } 926 if (sin6->__sin6_src_id != 0) { 927 zcmn_err(getzoneid(), CE_WARN, 928 "bind with uninitialized __sin6_src_id " 929 "(%d) on socket. Pid = %d\n", 930 (int)sin6->__sin6_src_id, 931 (int)curproc->p_pid); 932 } 933 #endif /* DEBUG */ 934 break; 935 } 936 default: 937 /* 938 * Don't do any length or sa_family check to allow 939 * non-sockaddr style addresses. 940 */ 941 if (name == NULL) { 942 error = EINVAL; 943 eprintsoline(so, error); 944 goto done; 945 } 946 break; 947 } 948 949 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 950 error = ENAMETOOLONG; 951 eprintsoline(so, error); 952 goto done; 953 } 954 /* 955 * Save local address. 956 */ 957 sti->sti_laddr_len = (socklen_t)namelen; 958 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 959 bcopy(name, sti->sti_laddr_sa, namelen); 960 961 addr = sti->sti_laddr_sa; 962 addrlen = (t_uscalar_t)sti->sti_laddr_len; 963 switch (so->so_family) { 964 case AF_INET6: 965 case AF_INET: 966 break; 967 case AF_UNIX: { 968 struct sockaddr_un *soun = 969 (struct sockaddr_un *)sti->sti_laddr_sa; 970 struct vnode *vp, *rvp; 971 struct vattr vattr; 972 973 ASSERT(sti->sti_ux_bound_vp == NULL); 974 /* 975 * Create vnode for the specified path name. 976 * Keep vnode held with a reference in sti_ux_bound_vp. 977 * Use the vnode pointer as the address used in the 978 * bind with the transport. 979 * 980 * Use the same mode as in BSD. In particular this does 981 * not observe the umask. 982 */ 983 /* MAXPATHLEN + soun_family + nul termination */ 984 if (sti->sti_laddr_len > 985 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 986 error = ENAMETOOLONG; 987 eprintsoline(so, error); 988 goto done; 989 } 990 vattr.va_type = VSOCK; 991 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 992 vattr.va_mask = AT_TYPE|AT_MODE; 993 /* NOTE: holding so_lock */ 994 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 995 EXCL, 0, &vp, CRMKNOD, 0, 0); 996 if (error) { 997 if (error == EEXIST) 998 error = EADDRINUSE; 999 eprintsoline(so, error); 1000 goto done; 1001 } 1002 /* 1003 * Establish pointer from the underlying filesystem 1004 * vnode to the socket node. 1005 * sti_ux_bound_vp and v_stream->sd_vnode form the 1006 * cross-linkage between the underlying filesystem 1007 * node and the socket node. 1008 */ 1009 1010 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 1011 VN_HOLD(rvp); 1012 VN_RELE(vp); 1013 vp = rvp; 1014 } 1015 1016 ASSERT(SOTOV(so)->v_stream); 1017 mutex_enter(&vp->v_lock); 1018 vp->v_stream = SOTOV(so)->v_stream; 1019 sti->sti_ux_bound_vp = vp; 1020 mutex_exit(&vp->v_lock); 1021 1022 /* 1023 * Use the vnode pointer value as a unique address 1024 * (together with the magic number to avoid conflicts 1025 * with implicit binds) in the transport provider. 1026 */ 1027 sti->sti_ux_laddr.soua_vp = 1028 (void *)sti->sti_ux_bound_vp; 1029 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1030 addr = &sti->sti_ux_laddr; 1031 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1032 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1033 addrlen, 1034 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1035 break; 1036 } 1037 } /* end switch (so->so_family) */ 1038 } 1039 1040 /* 1041 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1042 * the transport can start passing up T_CONN_IND messages 1043 * as soon as it receives the bind req and strsock_proto() 1044 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1045 */ 1046 if (flags & _SOBIND_LISTEN) { 1047 if ((so->so_state & SS_ACCEPTCONN) == 0) 1048 clear_acceptconn_on_err = B_TRUE; 1049 save_so_backlog = so->so_backlog; 1050 restore_backlog_on_err = B_TRUE; 1051 so->so_state |= SS_ACCEPTCONN; 1052 so->so_backlog = backlog; 1053 } 1054 1055 /* 1056 * If NL7C addr(s) have been configured check for addr/port match, 1057 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1058 * 1059 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1060 * family sockets only. If match mark as such. 1061 */ 1062 if (nl7c_enabled && ((addr != NULL && 1063 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1064 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1065 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1066 /* 1067 * NL7C is not supported in non-global zones, 1068 * we enforce this restriction here. 1069 */ 1070 if (so->so_zoneid == GLOBAL_ZONEID) { 1071 /* An NL7C socket, mark it */ 1072 sti->sti_nl7c_flags |= NL7C_ENABLED; 1073 if (nl7c == NULL) { 1074 /* 1075 * Was an AF_NCA bind() so add it to the 1076 * addr list for reporting purposes. 1077 */ 1078 nl7c = nl7c_add_addr(addr, addrlen); 1079 } 1080 } else 1081 nl7c = NULL; 1082 } 1083 1084 /* 1085 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1086 * for other transports we will send in a O_T_BIND_REQ. 1087 */ 1088 if (tcp_udp_xport && 1089 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1090 PRIM_type = T_BIND_REQ; 1091 1092 bind_req.PRIM_type = PRIM_type; 1093 bind_req.ADDR_length = addrlen; 1094 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1095 bind_req.CONIND_number = backlog; 1096 /* NOTE: holding so_lock while sleeping */ 1097 mp = soallocproto2(&bind_req, sizeof (bind_req), 1098 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1099 sti->sti_laddr_valid = 0; 1100 1101 /* Done using sti_laddr_sa - can drop the lock */ 1102 mutex_exit(&so->so_lock); 1103 1104 /* 1105 * Intercept the bind_req message here to check if this <address/port> 1106 * was configured as an SSL proxy server, or if another endpoint was 1107 * already configured to act as a proxy for us. 1108 * 1109 * Note, only if NL7C not enabled for this socket. 1110 */ 1111 if (nl7c == NULL && 1112 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1113 so->so_type == SOCK_STREAM) { 1114 1115 if (sti->sti_kssl_ent != NULL) { 1116 kssl_release_ent(sti->sti_kssl_ent, so, 1117 sti->sti_kssl_type); 1118 sti->sti_kssl_ent = NULL; 1119 } 1120 1121 sti->sti_kssl_type = kssl_check_proxy(mp, so, 1122 &sti->sti_kssl_ent); 1123 switch (sti->sti_kssl_type) { 1124 case KSSL_NO_PROXY: 1125 break; 1126 1127 case KSSL_HAS_PROXY: 1128 mutex_enter(&so->so_lock); 1129 goto skip_transport; 1130 1131 case KSSL_IS_PROXY: 1132 break; 1133 } 1134 } 1135 1136 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1137 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1138 if (error) { 1139 eprintsoline(so, error); 1140 mutex_enter(&so->so_lock); 1141 goto done; 1142 } 1143 1144 mutex_enter(&so->so_lock); 1145 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1146 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1147 if (error) { 1148 eprintsoline(so, error); 1149 goto done; 1150 } 1151 skip_transport: 1152 ASSERT(mp); 1153 /* 1154 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1155 * strsock_proto while the lock was dropped above, the bind 1156 * is allowed to complete. 1157 */ 1158 1159 /* Mark as bound. This will be undone if we detect errors below. */ 1160 if (flags & _SOBIND_NOXLATE) { 1161 ASSERT(so->so_family == AF_UNIX); 1162 sti->sti_faddr_noxlate = 1; 1163 } 1164 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1165 so->so_state |= SS_ISBOUND; 1166 ASSERT(sti->sti_unbind_mp); 1167 1168 /* note that we've already set SS_ACCEPTCONN above */ 1169 1170 /* 1171 * Recompute addrlen - an unspecied bind sent down an 1172 * address of length zero but we expect the appropriate length 1173 * in return. 1174 */ 1175 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1176 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1177 1178 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1179 /* 1180 * The alignment restriction is really too strict but 1181 * we want enough alignment to inspect the fields of 1182 * a sockaddr_in. 1183 */ 1184 addr = sogetoff(mp, bind_ack->ADDR_offset, 1185 bind_ack->ADDR_length, 1186 __TPI_ALIGN_SIZE); 1187 if (addr == NULL) { 1188 freemsg(mp); 1189 error = EPROTO; 1190 eprintsoline(so, error); 1191 goto done; 1192 } 1193 if (!(flags & _SOBIND_UNSPEC)) { 1194 /* 1195 * Verify that the transport didn't return something we 1196 * did not want e.g. an address other than what we asked for. 1197 * 1198 * NOTE: These checks would go away if/when we switch to 1199 * using the new TPI (in which the transport would fail 1200 * the request instead of assigning a different address). 1201 * 1202 * NOTE2: For protocols that we don't know (i.e. any 1203 * other than AF_INET6, AF_INET and AF_UNIX), we 1204 * cannot know if the transport should be expected to 1205 * return the same address as that requested. 1206 * 1207 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1208 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1209 * 1210 * For example, in the case of netatalk it may be 1211 * inappropriate for the transport to return the 1212 * requested address (as it may have allocated a local 1213 * port number in behaviour similar to that of an 1214 * AF_INET bind request with a port number of zero). 1215 * 1216 * Given the definition of O_T_BIND_REQ, where the 1217 * transport may bind to an address other than the 1218 * requested address, it's not possible to determine 1219 * whether a returned address that differs from the 1220 * requested address is a reason to fail (because the 1221 * requested address was not available) or succeed 1222 * (because the transport allocated an appropriate 1223 * address and/or port). 1224 * 1225 * sockfs currently requires that the transport return 1226 * the requested address in the T_BIND_ACK, unless 1227 * there is code here to allow for any discrepancy. 1228 * Such code exists for AF_INET and AF_INET6. 1229 * 1230 * Netatalk chooses to return the requested address 1231 * rather than the (correct) allocated address. This 1232 * means that netatalk violates the TPI specification 1233 * (and would not function correctly if used from a 1234 * TLI application), but it does mean that it works 1235 * with sockfs. 1236 * 1237 * As noted above, using the newer XTI bind primitive 1238 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1239 * allow sockfs to be more sure about whether or not 1240 * the bind request had succeeded (as transports are 1241 * not permitted to bind to a different address than 1242 * that requested - they must return failure). 1243 * Unfortunately, support for T_BIND_REQ may not be 1244 * present in all transport implementations (netatalk, 1245 * for example, doesn't have it), making the 1246 * transition difficult. 1247 */ 1248 if (bind_ack->ADDR_length != addrlen) { 1249 /* Assumes that the requested address was in use */ 1250 freemsg(mp); 1251 error = EADDRINUSE; 1252 eprintsoline(so, error); 1253 goto done; 1254 } 1255 1256 switch (so->so_family) { 1257 case AF_INET6: 1258 case AF_INET: { 1259 sin_t *rname, *aname; 1260 1261 rname = (sin_t *)addr; 1262 aname = (sin_t *)sti->sti_laddr_sa; 1263 1264 /* 1265 * Take advantage of the alignment 1266 * of sin_port and sin6_port which fall 1267 * in the same place in their data structures. 1268 * Just use sin_port for either address family. 1269 * 1270 * This may become a problem if (heaven forbid) 1271 * there's a separate ipv6port_reserved... :-P 1272 * 1273 * Binding to port 0 has the semantics of letting 1274 * the transport bind to any port. 1275 * 1276 * If the transport is TCP or UDP since we had sent 1277 * a T_BIND_REQ we would not get a port other than 1278 * what we asked for. 1279 */ 1280 if (tcp_udp_xport) { 1281 /* 1282 * Pick up the new port number if we bound to 1283 * port 0. 1284 */ 1285 if (aname->sin_port == 0) 1286 aname->sin_port = rname->sin_port; 1287 sti->sti_laddr_valid = 1; 1288 break; 1289 } 1290 if (aname->sin_port != 0 && 1291 aname->sin_port != rname->sin_port) { 1292 freemsg(mp); 1293 error = EADDRINUSE; 1294 eprintsoline(so, error); 1295 goto done; 1296 } 1297 /* 1298 * Pick up the new port number if we bound to port 0. 1299 */ 1300 aname->sin_port = rname->sin_port; 1301 1302 /* 1303 * Unfortunately, addresses aren't _quite_ the same. 1304 */ 1305 if (so->so_family == AF_INET) { 1306 if (aname->sin_addr.s_addr != 1307 rname->sin_addr.s_addr) { 1308 freemsg(mp); 1309 error = EADDRNOTAVAIL; 1310 eprintsoline(so, error); 1311 goto done; 1312 } 1313 } else { 1314 sin6_t *rname6 = (sin6_t *)rname; 1315 sin6_t *aname6 = (sin6_t *)aname; 1316 1317 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1318 &rname6->sin6_addr)) { 1319 freemsg(mp); 1320 error = EADDRNOTAVAIL; 1321 eprintsoline(so, error); 1322 goto done; 1323 } 1324 } 1325 break; 1326 } 1327 case AF_UNIX: 1328 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1329 freemsg(mp); 1330 error = EADDRINUSE; 1331 eprintsoline(so, error); 1332 eprintso(so, 1333 ("addrlen %d, addr 0x%x, vp %p\n", 1334 addrlen, *((int *)addr), 1335 (void *)sti->sti_ux_bound_vp)); 1336 goto done; 1337 } 1338 sti->sti_laddr_valid = 1; 1339 break; 1340 default: 1341 /* 1342 * NOTE: This assumes that addresses can be 1343 * byte-compared for equivalence. 1344 */ 1345 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1346 freemsg(mp); 1347 error = EADDRINUSE; 1348 eprintsoline(so, error); 1349 goto done; 1350 } 1351 /* 1352 * Don't mark sti_laddr_valid, as we cannot be 1353 * sure that the returned address is the real 1354 * bound address when talking to an unknown 1355 * transport. 1356 */ 1357 break; 1358 } 1359 } else { 1360 /* 1361 * Save for returned address for getsockname. 1362 * Needed for unspecific bind unless transport supports 1363 * the TI_GETMYNAME ioctl. 1364 * Do this for AF_INET{,6} even though they do, as 1365 * caching info here is much better performance than 1366 * a TPI/STREAMS trip to the transport for getsockname. 1367 * Any which can't for some reason _must_ _not_ set 1368 * sti_laddr_valid here for the caching version of 1369 * getsockname to not break; 1370 */ 1371 switch (so->so_family) { 1372 case AF_UNIX: 1373 /* 1374 * Record the address bound with the transport 1375 * for use by socketpair. 1376 */ 1377 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1378 sti->sti_laddr_valid = 1; 1379 break; 1380 case AF_INET: 1381 case AF_INET6: 1382 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1383 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1384 sti->sti_laddr_valid = 1; 1385 break; 1386 default: 1387 /* 1388 * Don't mark sti_laddr_valid, as we cannot be 1389 * sure that the returned address is the real 1390 * bound address when talking to an unknown 1391 * transport. 1392 */ 1393 break; 1394 } 1395 } 1396 1397 if (nl7c != NULL) { 1398 /* Register listen()er sonode pointer with NL7C */ 1399 nl7c_listener_addr(nl7c, so); 1400 } 1401 1402 freemsg(mp); 1403 1404 done: 1405 if (error) { 1406 /* reset state & backlog to values held on entry */ 1407 if (clear_acceptconn_on_err == B_TRUE) 1408 so->so_state &= ~SS_ACCEPTCONN; 1409 if (restore_backlog_on_err == B_TRUE) 1410 so->so_backlog = save_so_backlog; 1411 1412 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1413 int err; 1414 1415 err = sotpi_unbind(so, 0); 1416 /* LINTED - statement has no consequent: if */ 1417 if (err) { 1418 eprintsoline(so, error); 1419 } else { 1420 ASSERT(!(so->so_state & SS_ISBOUND)); 1421 } 1422 } 1423 } 1424 if (!(flags & _SOBIND_LOCK_HELD)) { 1425 so_unlock_single(so, SOLOCKED); 1426 mutex_exit(&so->so_lock); 1427 } else { 1428 ASSERT(MUTEX_HELD(&so->so_lock)); 1429 ASSERT(so->so_flag & SOLOCKED); 1430 } 1431 return (error); 1432 } 1433 1434 /* bind the socket */ 1435 static int 1436 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1437 int flags, struct cred *cr) 1438 { 1439 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1440 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1441 1442 flags &= ~_SOBIND_SOCKETPAIR; 1443 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1444 } 1445 1446 /* 1447 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1448 * address, or when listen needs to unbind and bind. 1449 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1450 * so that a sobind can pick them up. 1451 */ 1452 static int 1453 sotpi_unbind(struct sonode *so, int flags) 1454 { 1455 struct T_unbind_req unbind_req; 1456 int error = 0; 1457 mblk_t *mp; 1458 sotpi_info_t *sti = SOTOTPI(so); 1459 1460 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1461 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1462 1463 ASSERT(MUTEX_HELD(&so->so_lock)); 1464 ASSERT(so->so_flag & SOLOCKED); 1465 1466 if (!(so->so_state & SS_ISBOUND)) { 1467 error = EINVAL; 1468 eprintsoline(so, error); 1469 goto done; 1470 } 1471 1472 mutex_exit(&so->so_lock); 1473 1474 /* 1475 * Flush the read and write side (except stream head read queue) 1476 * and send down T_UNBIND_REQ. 1477 */ 1478 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1479 1480 unbind_req.PRIM_type = T_UNBIND_REQ; 1481 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1482 0, _ALLOC_SLEEP, CRED()); 1483 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1484 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1485 mutex_enter(&so->so_lock); 1486 if (error) { 1487 eprintsoline(so, error); 1488 goto done; 1489 } 1490 1491 error = sowaitokack(so, T_UNBIND_REQ); 1492 if (error) { 1493 eprintsoline(so, error); 1494 goto done; 1495 } 1496 1497 /* 1498 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1499 * strsock_proto while the lock was dropped above, the unbind 1500 * is allowed to complete. 1501 */ 1502 if (!(flags & _SOUNBIND_REBIND)) { 1503 /* 1504 * Clear out bound address. 1505 */ 1506 vnode_t *vp; 1507 1508 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1509 1510 /* Undo any SSL proxy setup */ 1511 if ((so->so_family == AF_INET || 1512 so->so_family == AF_INET6) && 1513 (so->so_type == SOCK_STREAM) && 1514 (sti->sti_kssl_ent != NULL)) { 1515 kssl_release_ent(sti->sti_kssl_ent, so, 1516 sti->sti_kssl_type); 1517 sti->sti_kssl_ent = NULL; 1518 sti->sti_kssl_type = KSSL_NO_PROXY; 1519 } 1520 sti->sti_ux_bound_vp = NULL; 1521 vn_rele_stream(vp); 1522 } 1523 /* Clear out address */ 1524 sti->sti_laddr_len = 0; 1525 } 1526 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1527 sti->sti_laddr_valid = 0; 1528 1529 done: 1530 1531 /* If the caller held the lock don't release it here */ 1532 ASSERT(MUTEX_HELD(&so->so_lock)); 1533 ASSERT(so->so_flag & SOLOCKED); 1534 1535 return (error); 1536 } 1537 1538 /* 1539 * listen on the socket. 1540 * For TPI conforming transports this has to first unbind with the transport 1541 * and then bind again using the new backlog. 1542 */ 1543 /* ARGSUSED */ 1544 int 1545 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1546 { 1547 int error = 0; 1548 sotpi_info_t *sti = SOTOTPI(so); 1549 1550 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1551 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1552 1553 if (sti->sti_serv_type == T_CLTS) 1554 return (EOPNOTSUPP); 1555 1556 /* 1557 * If the socket is ready to accept connections already, then 1558 * return without doing anything. This avoids a problem where 1559 * a second listen() call fails if a connection is pending and 1560 * leaves the socket unbound. Only when we are not unbinding 1561 * with the transport can we safely increase the backlog. 1562 */ 1563 if (so->so_state & SS_ACCEPTCONN && 1564 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1565 /*CONSTCOND*/ 1566 !solisten_tpi_tcp)) 1567 return (0); 1568 1569 if (so->so_state & SS_ISCONNECTED) 1570 return (EINVAL); 1571 1572 mutex_enter(&so->so_lock); 1573 so_lock_single(so); /* Set SOLOCKED */ 1574 1575 /* 1576 * If the listen doesn't change the backlog we do nothing. 1577 * This avoids an EPROTO error from the transport. 1578 */ 1579 if ((so->so_state & SS_ACCEPTCONN) && 1580 so->so_backlog == backlog) 1581 goto done; 1582 1583 if (!(so->so_state & SS_ISBOUND)) { 1584 /* 1585 * Must have been explicitly bound in the UNIX domain. 1586 */ 1587 if (so->so_family == AF_UNIX) { 1588 error = EINVAL; 1589 goto done; 1590 } 1591 error = sotpi_bindlisten(so, NULL, 0, backlog, 1592 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1593 } else if (backlog > 0) { 1594 /* 1595 * AF_INET{,6} hack to avoid losing the port. 1596 * Assumes that all AF_INET{,6} transports can handle a 1597 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1598 * has already bound thus it is possible to avoid the unbind. 1599 */ 1600 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1601 /*CONSTCOND*/ 1602 !solisten_tpi_tcp)) { 1603 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1604 if (error) 1605 goto done; 1606 } 1607 error = sotpi_bindlisten(so, NULL, 0, backlog, 1608 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1609 } else { 1610 so->so_state |= SS_ACCEPTCONN; 1611 so->so_backlog = backlog; 1612 } 1613 if (error) 1614 goto done; 1615 ASSERT(so->so_state & SS_ACCEPTCONN); 1616 done: 1617 so_unlock_single(so, SOLOCKED); 1618 mutex_exit(&so->so_lock); 1619 return (error); 1620 } 1621 1622 /* 1623 * Disconnect either a specified seqno or all (-1). 1624 * The former is used on listening sockets only. 1625 * 1626 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1627 * the current use of sodisconnect(seqno == -1) is only for shutdown 1628 * so there is no point (and potentially incorrect) to unbind. 1629 */ 1630 static int 1631 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1632 { 1633 struct T_discon_req discon_req; 1634 int error = 0; 1635 mblk_t *mp; 1636 1637 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1638 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1639 1640 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1641 mutex_enter(&so->so_lock); 1642 so_lock_single(so); /* Set SOLOCKED */ 1643 } else { 1644 ASSERT(MUTEX_HELD(&so->so_lock)); 1645 ASSERT(so->so_flag & SOLOCKED); 1646 } 1647 1648 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1649 error = EINVAL; 1650 eprintsoline(so, error); 1651 goto done; 1652 } 1653 1654 mutex_exit(&so->so_lock); 1655 /* 1656 * Flush the write side (unless this is a listener) 1657 * and then send down a T_DISCON_REQ. 1658 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1659 * and other messages.) 1660 */ 1661 if (!(so->so_state & SS_ACCEPTCONN)) 1662 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1663 1664 discon_req.PRIM_type = T_DISCON_REQ; 1665 discon_req.SEQ_number = seqno; 1666 mp = soallocproto1(&discon_req, sizeof (discon_req), 1667 0, _ALLOC_SLEEP, CRED()); 1668 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1669 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1670 mutex_enter(&so->so_lock); 1671 if (error) { 1672 eprintsoline(so, error); 1673 goto done; 1674 } 1675 1676 error = sowaitokack(so, T_DISCON_REQ); 1677 if (error) { 1678 eprintsoline(so, error); 1679 goto done; 1680 } 1681 /* 1682 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1683 * strsock_proto while the lock was dropped above, the disconnect 1684 * is allowed to complete. However, it is not possible to 1685 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1686 */ 1687 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1688 SOTOTPI(so)->sti_laddr_valid = 0; 1689 SOTOTPI(so)->sti_faddr_valid = 0; 1690 done: 1691 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1692 so_unlock_single(so, SOLOCKED); 1693 mutex_exit(&so->so_lock); 1694 } else { 1695 /* If the caller held the lock don't release it here */ 1696 ASSERT(MUTEX_HELD(&so->so_lock)); 1697 ASSERT(so->so_flag & SOLOCKED); 1698 } 1699 return (error); 1700 } 1701 1702 /* ARGSUSED */ 1703 int 1704 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1705 struct sonode **nsop) 1706 { 1707 struct T_conn_ind *conn_ind; 1708 struct T_conn_res *conn_res; 1709 int error = 0; 1710 mblk_t *mp, *ctxmp, *ack_mp; 1711 struct sonode *nso; 1712 vnode_t *nvp; 1713 void *src; 1714 t_uscalar_t srclen; 1715 void *opt; 1716 t_uscalar_t optlen; 1717 t_scalar_t PRIM_type; 1718 t_scalar_t SEQ_number; 1719 size_t sinlen; 1720 sotpi_info_t *sti = SOTOTPI(so); 1721 sotpi_info_t *nsti; 1722 1723 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1724 (void *)so, fflag, (void *)nsop, 1725 pr_state(so->so_state, so->so_mode))); 1726 1727 /* 1728 * Defer single-threading the accepting socket until 1729 * the T_CONN_IND has been received and parsed and the 1730 * new sonode has been opened. 1731 */ 1732 1733 /* Check that we are not already connected */ 1734 if ((so->so_state & SS_ACCEPTCONN) == 0) 1735 goto conn_bad; 1736 again: 1737 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1738 goto e_bad; 1739 1740 ASSERT(mp != NULL); 1741 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1742 ctxmp = mp->b_cont; 1743 1744 /* 1745 * Save SEQ_number for error paths. 1746 */ 1747 SEQ_number = conn_ind->SEQ_number; 1748 1749 srclen = conn_ind->SRC_length; 1750 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1751 if (src == NULL) { 1752 error = EPROTO; 1753 freemsg(mp); 1754 eprintsoline(so, error); 1755 goto disconnect_unlocked; 1756 } 1757 optlen = conn_ind->OPT_length; 1758 switch (so->so_family) { 1759 case AF_INET: 1760 case AF_INET6: 1761 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1762 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1763 &opt, conn_ind->OPT_length); 1764 } else { 1765 /* 1766 * The transport (in this case TCP) hasn't sent up 1767 * a pointer to an instance for the accept fast-path. 1768 * Disable fast-path completely because the call to 1769 * sotpi_create() below would otherwise create an 1770 * incomplete TCP instance, which would lead to 1771 * problems when sockfs sends a normal T_CONN_RES 1772 * message down the new stream. 1773 */ 1774 if (sti->sti_direct) { 1775 int rval; 1776 /* 1777 * For consistency we inform tcp to disable 1778 * direct interface on the listener, though 1779 * we can certainly live without doing this 1780 * because no data will ever travel upstream 1781 * on the listening socket. 1782 */ 1783 sti->sti_direct = 0; 1784 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1785 0, 0, K_TO_K, cr, &rval); 1786 } 1787 opt = NULL; 1788 optlen = 0; 1789 } 1790 break; 1791 case AF_UNIX: 1792 default: 1793 if (optlen != 0) { 1794 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1795 __TPI_ALIGN_SIZE); 1796 if (opt == NULL) { 1797 error = EPROTO; 1798 freemsg(mp); 1799 eprintsoline(so, error); 1800 goto disconnect_unlocked; 1801 } 1802 } 1803 if (so->so_family == AF_UNIX) { 1804 if (!sti->sti_faddr_noxlate) { 1805 src = NULL; 1806 srclen = 0; 1807 } 1808 /* Extract src address from options */ 1809 if (optlen != 0) 1810 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1811 } 1812 break; 1813 } 1814 1815 /* 1816 * Create the new socket. 1817 */ 1818 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1819 if (nso == NULL) { 1820 ASSERT(error != 0); 1821 /* 1822 * Accept can not fail with ENOBUFS. sotpi_create 1823 * sleeps waiting for memory until a signal is caught 1824 * so return EINTR. 1825 */ 1826 freemsg(mp); 1827 if (error == ENOBUFS) 1828 error = EINTR; 1829 goto e_disc_unl; 1830 } 1831 nvp = SOTOV(nso); 1832 nsti = SOTOTPI(nso); 1833 1834 /* 1835 * If the transport sent up an SSL connection context, then attach 1836 * it the new socket, and set the (sd_wputdatafunc)() and 1837 * (sd_rputdatafunc)() stream head hooks to intercept and process 1838 * SSL records. 1839 */ 1840 if (ctxmp != NULL) { 1841 /* 1842 * This kssl_ctx_t is already held for us by the transport. 1843 * So, we don't need to do a kssl_hold_ctx() here. 1844 */ 1845 nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1846 freemsg(ctxmp); 1847 mp->b_cont = NULL; 1848 strsetrwputdatahooks(nvp, strsock_kssl_input, 1849 strsock_kssl_output); 1850 1851 /* Disable sodirect if any */ 1852 if (nso->so_direct != NULL) { 1853 mutex_enter(nso->so_direct->sod_lockp); 1854 SOD_DISABLE(nso->so_direct); 1855 mutex_exit(nso->so_direct->sod_lockp); 1856 } 1857 } 1858 #ifdef DEBUG 1859 /* 1860 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1861 * it's inherited early to allow debugging of the accept code itself. 1862 */ 1863 nso->so_options |= so->so_options & SO_DEBUG; 1864 #endif /* DEBUG */ 1865 1866 /* 1867 * Save the SRC address from the T_CONN_IND 1868 * for getpeername to work on AF_UNIX and on transports that do not 1869 * support TI_GETPEERNAME. 1870 * 1871 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1872 * copyin_name(). 1873 */ 1874 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1875 error = EINVAL; 1876 freemsg(mp); 1877 eprintsoline(so, error); 1878 goto disconnect_vp_unlocked; 1879 } 1880 nsti->sti_faddr_len = (socklen_t)srclen; 1881 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1882 bcopy(src, nsti->sti_faddr_sa, srclen); 1883 nsti->sti_faddr_valid = 1; 1884 1885 /* 1886 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1887 */ 1888 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1889 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1890 cred_t *cr; 1891 pid_t cpid; 1892 1893 cr = msg_getcred(mp, &cpid); 1894 if (cr != NULL) { 1895 crhold(cr); 1896 nso->so_peercred = cr; 1897 nso->so_cpid = cpid; 1898 } 1899 freemsg(mp); 1900 1901 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1902 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1903 if (mp == NULL) { 1904 /* 1905 * Accept can not fail with ENOBUFS. 1906 * A signal was caught so return EINTR. 1907 */ 1908 error = EINTR; 1909 eprintsoline(so, error); 1910 goto disconnect_vp_unlocked; 1911 } 1912 conn_res = (struct T_conn_res *)mp->b_rptr; 1913 } else { 1914 /* 1915 * For efficency reasons we use msg_extractcred; no crhold 1916 * needed since db_credp is cleared (i.e., we move the cred 1917 * from the message to so_peercred. 1918 */ 1919 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1920 1921 mp->b_rptr = DB_BASE(mp); 1922 conn_res = (struct T_conn_res *)mp->b_rptr; 1923 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1924 1925 mblk_setcred(mp, cr, curproc->p_pid); 1926 } 1927 1928 /* 1929 * New socket must be bound at least in sockfs and, except for AF_INET, 1930 * (or AF_INET6) it also has to be bound in the transport provider. 1931 * We set the local address in the sonode from the T_OK_ACK of the 1932 * T_CONN_RES. For this reason the address we bind to here isn't 1933 * important. 1934 */ 1935 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1936 /*CONSTCOND*/ 1937 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1938 /* 1939 * Optimization for AF_INET{,6} transports 1940 * that can handle a T_CONN_RES without being bound. 1941 */ 1942 mutex_enter(&nso->so_lock); 1943 so_automatic_bind(nso); 1944 mutex_exit(&nso->so_lock); 1945 } else { 1946 /* Perform NULL bind with the transport provider. */ 1947 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1948 cr)) != 0) { 1949 ASSERT(error != ENOBUFS); 1950 freemsg(mp); 1951 eprintsoline(nso, error); 1952 goto disconnect_vp_unlocked; 1953 } 1954 } 1955 1956 /* 1957 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1958 * so that any data arriving on the new socket will cause the 1959 * appropriate signals to be delivered for the new socket. 1960 * 1961 * No other thread (except strsock_proto and strsock_misc) 1962 * can access the new socket thus we relax the locking. 1963 */ 1964 nso->so_pgrp = so->so_pgrp; 1965 nso->so_state |= so->so_state & SS_ASYNC; 1966 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1967 1968 if (nso->so_pgrp != 0) { 1969 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1970 eprintsoline(nso, error); 1971 error = 0; 1972 nso->so_pgrp = 0; 1973 } 1974 } 1975 1976 /* 1977 * Make note of the socket level options. TCP and IP level options 1978 * are already inherited. We could do all this after accept is 1979 * successful but doing it here simplifies code and no harm done 1980 * for error case. 1981 */ 1982 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1983 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1984 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1985 nso->so_sndbuf = so->so_sndbuf; 1986 nso->so_rcvbuf = so->so_rcvbuf; 1987 if (nso->so_options & SO_LINGER) 1988 nso->so_linger = so->so_linger; 1989 1990 /* 1991 * Note that the following sti_direct code path should be 1992 * removed once we are confident that the direct sockets 1993 * do not result in any degradation. 1994 */ 1995 if (sti->sti_direct) { 1996 1997 ASSERT(opt != NULL); 1998 1999 conn_res->OPT_length = optlen; 2000 conn_res->OPT_offset = MBLKL(mp); 2001 bcopy(&opt, mp->b_wptr, optlen); 2002 mp->b_wptr += optlen; 2003 conn_res->PRIM_type = T_CONN_RES; 2004 conn_res->ACCEPTOR_id = 0; 2005 PRIM_type = T_CONN_RES; 2006 2007 /* Send down the T_CONN_RES on acceptor STREAM */ 2008 error = kstrputmsg(SOTOV(nso), mp, NULL, 2009 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2010 if (error) { 2011 mutex_enter(&so->so_lock); 2012 so_lock_single(so); 2013 eprintsoline(so, error); 2014 goto disconnect_vp; 2015 } 2016 mutex_enter(&nso->so_lock); 2017 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 2018 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2019 if (error) { 2020 mutex_exit(&nso->so_lock); 2021 mutex_enter(&so->so_lock); 2022 so_lock_single(so); 2023 eprintsoline(so, error); 2024 goto disconnect_vp; 2025 } 2026 if (nso->so_family == AF_INET) { 2027 sin_t *sin; 2028 2029 sin = (sin_t *)(ack_mp->b_rptr + 2030 sizeof (struct T_ok_ack)); 2031 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 2032 nsti->sti_laddr_len = sizeof (sin_t); 2033 } else { 2034 sin6_t *sin6; 2035 2036 sin6 = (sin6_t *)(ack_mp->b_rptr + 2037 sizeof (struct T_ok_ack)); 2038 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 2039 nsti->sti_laddr_len = sizeof (sin6_t); 2040 } 2041 freemsg(ack_mp); 2042 2043 nso->so_state |= SS_ISCONNECTED; 2044 nso->so_proto_handle = (sock_lower_handle_t)opt; 2045 nsti->sti_laddr_valid = 1; 2046 2047 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 2048 /* 2049 * A NL7C marked listen()er so the new socket 2050 * inherits the listen()er's NL7C state, except 2051 * for NL7C_POLLIN. 2052 * 2053 * Only call NL7C to process the new socket if 2054 * the listen socket allows blocking i/o. 2055 */ 2056 nsti->sti_nl7c_flags = 2057 sti->sti_nl7c_flags & (~NL7C_POLLIN); 2058 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 2059 /* 2060 * Nonblocking accept() just make it 2061 * persist to defer processing to the 2062 * read-side syscall (e.g. read). 2063 */ 2064 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 2065 } else if (nl7c_process(nso, B_FALSE)) { 2066 /* 2067 * NL7C has completed processing on the 2068 * socket, close the socket and back to 2069 * the top to await the next T_CONN_IND. 2070 */ 2071 mutex_exit(&nso->so_lock); 2072 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 2073 cr, NULL); 2074 VN_RELE(nvp); 2075 goto again; 2076 } 2077 /* Pass the new socket out */ 2078 } 2079 2080 mutex_exit(&nso->so_lock); 2081 2082 /* 2083 * It's possible, through the use of autopush for example, 2084 * that the acceptor stream may not support sti_direct 2085 * semantics. If the new socket does not support sti_direct 2086 * we issue a _SIOCSOCKFALLBACK to inform the transport 2087 * as we would in the I_PUSH case. 2088 */ 2089 if (nsti->sti_direct == 0) { 2090 int rval; 2091 2092 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2093 0, 0, K_TO_K, cr, &rval)) != 0) { 2094 mutex_enter(&so->so_lock); 2095 so_lock_single(so); 2096 eprintsoline(so, error); 2097 goto disconnect_vp; 2098 } 2099 } 2100 2101 /* 2102 * Pass out new socket. 2103 */ 2104 if (nsop != NULL) 2105 *nsop = nso; 2106 2107 return (0); 2108 } 2109 2110 /* 2111 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2112 * which don't support the FireEngine accept fast-path. It is also 2113 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2114 * again. Neither sockfs nor TCP attempt to find out if some other 2115 * random module has been inserted in between (in which case we 2116 * should follow TLI accept behaviour). We blindly assume the worst 2117 * case and revert back to old behaviour i.e. TCP will not send us 2118 * any option (eager) and the accept should happen on the listener 2119 * queue. Any queued T_conn_ind have already got their options removed 2120 * by so_sock2_stream() when "sockmod" was I_POP'd. 2121 */ 2122 /* 2123 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2124 */ 2125 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2126 #ifdef _ILP32 2127 queue_t *q; 2128 2129 /* 2130 * Find read queue in driver 2131 * Can safely do this since we "own" nso/nvp. 2132 */ 2133 q = strvp2wq(nvp)->q_next; 2134 while (SAMESTR(q)) 2135 q = q->q_next; 2136 q = RD(q); 2137 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2138 #else 2139 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2140 #endif /* _ILP32 */ 2141 conn_res->PRIM_type = O_T_CONN_RES; 2142 PRIM_type = O_T_CONN_RES; 2143 } else { 2144 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2145 conn_res->PRIM_type = T_CONN_RES; 2146 PRIM_type = T_CONN_RES; 2147 } 2148 conn_res->SEQ_number = SEQ_number; 2149 conn_res->OPT_length = 0; 2150 conn_res->OPT_offset = 0; 2151 2152 mutex_enter(&so->so_lock); 2153 so_lock_single(so); /* Set SOLOCKED */ 2154 mutex_exit(&so->so_lock); 2155 2156 error = kstrputmsg(SOTOV(so), mp, NULL, 2157 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2158 mutex_enter(&so->so_lock); 2159 if (error) { 2160 eprintsoline(so, error); 2161 goto disconnect_vp; 2162 } 2163 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2164 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2165 if (error) { 2166 eprintsoline(so, error); 2167 goto disconnect_vp; 2168 } 2169 /* 2170 * If there is a sin/sin6 appended onto the T_OK_ACK use 2171 * that to set the local address. If this is not present 2172 * then we zero out the address and don't set the 2173 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2174 * the pathname from the listening socket. 2175 */ 2176 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2177 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2178 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2179 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2180 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2181 nsti->sti_laddr_len = sinlen; 2182 nsti->sti_laddr_valid = 1; 2183 } else if (nso->so_family == AF_UNIX) { 2184 ASSERT(so->so_family == AF_UNIX); 2185 nsti->sti_laddr_len = sti->sti_laddr_len; 2186 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2187 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2188 nsti->sti_laddr_len); 2189 nsti->sti_laddr_valid = 1; 2190 } else { 2191 nsti->sti_laddr_len = sti->sti_laddr_len; 2192 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2193 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2194 nsti->sti_laddr_sa->sa_family = nso->so_family; 2195 } 2196 freemsg(ack_mp); 2197 2198 so_unlock_single(so, SOLOCKED); 2199 mutex_exit(&so->so_lock); 2200 2201 nso->so_state |= SS_ISCONNECTED; 2202 2203 /* 2204 * Pass out new socket. 2205 */ 2206 if (nsop != NULL) 2207 *nsop = nso; 2208 2209 return (0); 2210 2211 2212 eproto_disc_unl: 2213 error = EPROTO; 2214 e_disc_unl: 2215 eprintsoline(so, error); 2216 goto disconnect_unlocked; 2217 2218 pr_disc_vp_unl: 2219 eprintsoline(so, error); 2220 disconnect_vp_unlocked: 2221 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2222 VN_RELE(nvp); 2223 disconnect_unlocked: 2224 (void) sodisconnect(so, SEQ_number, 0); 2225 return (error); 2226 2227 pr_disc_vp: 2228 eprintsoline(so, error); 2229 disconnect_vp: 2230 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2231 so_unlock_single(so, SOLOCKED); 2232 mutex_exit(&so->so_lock); 2233 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2234 VN_RELE(nvp); 2235 return (error); 2236 2237 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2238 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2239 ? EOPNOTSUPP : EINVAL; 2240 e_bad: 2241 eprintsoline(so, error); 2242 return (error); 2243 } 2244 2245 /* 2246 * connect a socket. 2247 * 2248 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2249 * unconnect (by specifying a null address). 2250 */ 2251 int 2252 sotpi_connect(struct sonode *so, 2253 const struct sockaddr *name, 2254 socklen_t namelen, 2255 int fflag, 2256 int flags, 2257 struct cred *cr) 2258 { 2259 struct T_conn_req conn_req; 2260 int error = 0; 2261 mblk_t *mp; 2262 void *src; 2263 socklen_t srclen; 2264 void *addr; 2265 socklen_t addrlen; 2266 boolean_t need_unlock; 2267 sotpi_info_t *sti = SOTOTPI(so); 2268 2269 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2270 (void *)so, (void *)name, namelen, fflag, flags, 2271 pr_state(so->so_state, so->so_mode))); 2272 2273 /* 2274 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2275 * avoid sleeping for memory with SOLOCKED held. 2276 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2277 * + sizeof (struct T_opthdr). 2278 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2279 * exceed sti_faddr_maxlen). 2280 */ 2281 mp = soallocproto(sizeof (struct T_conn_req) + 2282 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2283 cr); 2284 if (mp == NULL) { 2285 /* 2286 * Connect can not fail with ENOBUFS. A signal was 2287 * caught so return EINTR. 2288 */ 2289 error = EINTR; 2290 eprintsoline(so, error); 2291 return (error); 2292 } 2293 2294 mutex_enter(&so->so_lock); 2295 /* 2296 * Make sure there is a preallocated T_unbind_req message 2297 * before any binding. This message is allocated when the 2298 * socket is created. Since another thread can consume 2299 * so_unbind_mp by the time we return from so_lock_single(), 2300 * we should check the availability of so_unbind_mp after 2301 * we return from so_lock_single(). 2302 */ 2303 2304 so_lock_single(so); /* Set SOLOCKED */ 2305 need_unlock = B_TRUE; 2306 2307 if (sti->sti_unbind_mp == NULL) { 2308 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2309 /* NOTE: holding so_lock while sleeping */ 2310 sti->sti_unbind_mp = 2311 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2312 if (sti->sti_unbind_mp == NULL) { 2313 error = EINTR; 2314 goto done; 2315 } 2316 } 2317 2318 /* 2319 * Can't have done a listen before connecting. 2320 */ 2321 if (so->so_state & SS_ACCEPTCONN) { 2322 error = EOPNOTSUPP; 2323 goto done; 2324 } 2325 2326 /* 2327 * Must be bound with the transport 2328 */ 2329 if (!(so->so_state & SS_ISBOUND)) { 2330 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2331 /*CONSTCOND*/ 2332 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2333 /* 2334 * Optimization for AF_INET{,6} transports 2335 * that can handle a T_CONN_REQ without being bound. 2336 */ 2337 so_automatic_bind(so); 2338 } else { 2339 error = sotpi_bind(so, NULL, 0, 2340 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2341 if (error) 2342 goto done; 2343 } 2344 ASSERT(so->so_state & SS_ISBOUND); 2345 flags |= _SOCONNECT_DID_BIND; 2346 } 2347 2348 /* 2349 * Handle a connect to a name parameter of type AF_UNSPEC like a 2350 * connect to a null address. This is the portable method to 2351 * unconnect a socket. 2352 */ 2353 if ((namelen >= sizeof (sa_family_t)) && 2354 (name->sa_family == AF_UNSPEC)) { 2355 name = NULL; 2356 namelen = 0; 2357 } 2358 2359 /* 2360 * Check that we are not already connected. 2361 * A connection-oriented socket cannot be reconnected. 2362 * A connected connection-less socket can be 2363 * - connected to a different address by a subsequent connect 2364 * - "unconnected" by a connect to the NULL address 2365 */ 2366 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2367 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2368 if (so->so_mode & SM_CONNREQUIRED) { 2369 /* Connection-oriented socket */ 2370 error = so->so_state & SS_ISCONNECTED ? 2371 EISCONN : EALREADY; 2372 goto done; 2373 } 2374 /* Connection-less socket */ 2375 if (name == NULL) { 2376 /* 2377 * Remove the connected state and clear SO_DGRAM_ERRIND 2378 * since it was set when the socket was connected. 2379 * If this is UDP also send down a T_DISCON_REQ. 2380 */ 2381 int val; 2382 2383 if ((so->so_family == AF_INET || 2384 so->so_family == AF_INET6) && 2385 (so->so_type == SOCK_DGRAM || 2386 so->so_type == SOCK_RAW) && 2387 /*CONSTCOND*/ 2388 !soconnect_tpi_udp) { 2389 /* XXX What about implicitly unbinding here? */ 2390 error = sodisconnect(so, -1, 2391 _SODISCONNECT_LOCK_HELD); 2392 } else { 2393 so->so_state &= 2394 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2395 sti->sti_faddr_valid = 0; 2396 sti->sti_faddr_len = 0; 2397 } 2398 2399 /* Remove SOLOCKED since setsockopt will grab it */ 2400 so_unlock_single(so, SOLOCKED); 2401 mutex_exit(&so->so_lock); 2402 2403 val = 0; 2404 (void) sotpi_setsockopt(so, SOL_SOCKET, 2405 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2406 cr); 2407 2408 mutex_enter(&so->so_lock); 2409 so_lock_single(so); /* Set SOLOCKED */ 2410 goto done; 2411 } 2412 } 2413 ASSERT(so->so_state & SS_ISBOUND); 2414 2415 if (name == NULL || namelen == 0) { 2416 error = EINVAL; 2417 goto done; 2418 } 2419 /* 2420 * Mark the socket if sti_faddr_sa represents the transport level 2421 * address. 2422 */ 2423 if (flags & _SOCONNECT_NOXLATE) { 2424 struct sockaddr_ux *soaddr_ux; 2425 2426 ASSERT(so->so_family == AF_UNIX); 2427 if (namelen != sizeof (struct sockaddr_ux)) { 2428 error = EINVAL; 2429 goto done; 2430 } 2431 soaddr_ux = (struct sockaddr_ux *)name; 2432 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2433 namelen = sizeof (soaddr_ux->sou_addr); 2434 sti->sti_faddr_noxlate = 1; 2435 } 2436 2437 /* 2438 * Length and family checks. 2439 */ 2440 error = so_addr_verify(so, name, namelen); 2441 if (error) 2442 goto bad; 2443 2444 /* 2445 * Save foreign address. Needed for AF_UNIX as well as 2446 * transport providers that do not support TI_GETPEERNAME. 2447 * Also used for cached foreign address for TCP and UDP. 2448 */ 2449 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2450 error = EINVAL; 2451 goto done; 2452 } 2453 sti->sti_faddr_len = (socklen_t)namelen; 2454 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2455 bcopy(name, sti->sti_faddr_sa, namelen); 2456 sti->sti_faddr_valid = 1; 2457 2458 if (so->so_family == AF_UNIX) { 2459 if (sti->sti_faddr_noxlate) { 2460 /* 2461 * Already have a transport internal address. Do not 2462 * pass any (transport internal) source address. 2463 */ 2464 addr = sti->sti_faddr_sa; 2465 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2466 src = NULL; 2467 srclen = 0; 2468 } else { 2469 /* 2470 * Pass the sockaddr_un source address as an option 2471 * and translate the remote address. 2472 * Holding so_lock thus sti_laddr_sa can not change. 2473 */ 2474 src = sti->sti_laddr_sa; 2475 srclen = (t_uscalar_t)sti->sti_laddr_len; 2476 dprintso(so, 1, 2477 ("sotpi_connect UNIX: srclen %d, src %p\n", 2478 srclen, src)); 2479 error = so_ux_addr_xlate(so, 2480 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2481 (flags & _SOCONNECT_XPG4_2), 2482 &addr, &addrlen); 2483 if (error) 2484 goto bad; 2485 } 2486 } else { 2487 addr = sti->sti_faddr_sa; 2488 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2489 src = NULL; 2490 srclen = 0; 2491 } 2492 /* 2493 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2494 * option which asks the transport provider to send T_UDERR_IND 2495 * messages. These T_UDERR_IND messages are used to return connected 2496 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2497 * 2498 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2499 * we send down a T_CONN_REQ. This is needed to let the 2500 * transport assign a local address that is consistent with 2501 * the remote address. Applications depend on a getsockname() 2502 * after a connect() to retrieve the "source" IP address for 2503 * the connected socket. Invalidate the cached local address 2504 * to force getsockname() to enquire of the transport. 2505 */ 2506 if (!(so->so_mode & SM_CONNREQUIRED)) { 2507 /* 2508 * Datagram socket. 2509 */ 2510 int32_t val; 2511 2512 so_unlock_single(so, SOLOCKED); 2513 mutex_exit(&so->so_lock); 2514 2515 val = 1; 2516 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2517 &val, (t_uscalar_t)sizeof (val), cr); 2518 2519 mutex_enter(&so->so_lock); 2520 so_lock_single(so); /* Set SOLOCKED */ 2521 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2522 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2523 soconnect_tpi_udp) { 2524 soisconnected(so); 2525 goto done; 2526 } 2527 /* 2528 * Send down T_CONN_REQ etc. 2529 * Clear fflag to avoid returning EWOULDBLOCK. 2530 */ 2531 fflag = 0; 2532 ASSERT(so->so_family != AF_UNIX); 2533 sti->sti_laddr_valid = 0; 2534 } else if (sti->sti_laddr_len != 0) { 2535 /* 2536 * If the local address or port was "any" then it may be 2537 * changed by the transport as a result of the 2538 * connect. Invalidate the cached version if we have one. 2539 */ 2540 switch (so->so_family) { 2541 case AF_INET: 2542 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2543 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2544 INADDR_ANY || 2545 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2546 sti->sti_laddr_valid = 0; 2547 break; 2548 2549 case AF_INET6: 2550 ASSERT(sti->sti_laddr_len == 2551 (socklen_t)sizeof (sin6_t)); 2552 if (IN6_IS_ADDR_UNSPECIFIED( 2553 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2554 IN6_IS_ADDR_V4MAPPED_ANY( 2555 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2556 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2557 sti->sti_laddr_valid = 0; 2558 break; 2559 2560 default: 2561 break; 2562 } 2563 } 2564 2565 /* 2566 * Check for failure of an earlier call 2567 */ 2568 if (so->so_error != 0) 2569 goto so_bad; 2570 2571 /* 2572 * Send down T_CONN_REQ. Message was allocated above. 2573 */ 2574 conn_req.PRIM_type = T_CONN_REQ; 2575 conn_req.DEST_length = addrlen; 2576 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2577 if (srclen == 0) { 2578 conn_req.OPT_length = 0; 2579 conn_req.OPT_offset = 0; 2580 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2581 soappendmsg(mp, addr, addrlen); 2582 } else { 2583 /* 2584 * There is a AF_UNIX sockaddr_un to include as a source 2585 * address option. 2586 */ 2587 struct T_opthdr toh; 2588 2589 toh.level = SOL_SOCKET; 2590 toh.name = SO_SRCADDR; 2591 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2592 toh.status = 0; 2593 conn_req.OPT_length = 2594 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2595 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2596 _TPI_ALIGN_TOPT(addrlen)); 2597 2598 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2599 soappendmsg(mp, addr, addrlen); 2600 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2601 soappendmsg(mp, &toh, sizeof (toh)); 2602 soappendmsg(mp, src, srclen); 2603 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2604 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2605 } 2606 /* 2607 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2608 * in order to have the right state when the T_CONN_CON shows up. 2609 */ 2610 soisconnecting(so); 2611 mutex_exit(&so->so_lock); 2612 2613 if (audit_active) 2614 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2615 2616 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2617 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2618 mp = NULL; 2619 mutex_enter(&so->so_lock); 2620 if (error != 0) 2621 goto bad; 2622 2623 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2624 goto bad; 2625 2626 /* Allow other threads to access the socket */ 2627 so_unlock_single(so, SOLOCKED); 2628 need_unlock = B_FALSE; 2629 2630 /* 2631 * Wait until we get a T_CONN_CON or an error 2632 */ 2633 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2634 so_lock_single(so); /* Set SOLOCKED */ 2635 need_unlock = B_TRUE; 2636 } 2637 2638 done: 2639 freemsg(mp); 2640 switch (error) { 2641 case EINPROGRESS: 2642 case EALREADY: 2643 case EISCONN: 2644 case EINTR: 2645 /* Non-fatal errors */ 2646 sti->sti_laddr_valid = 0; 2647 /* FALLTHRU */ 2648 case 0: 2649 break; 2650 default: 2651 ASSERT(need_unlock); 2652 /* 2653 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2654 * and invalidate local-address cache 2655 */ 2656 so->so_state &= ~SS_ISCONNECTING; 2657 sti->sti_laddr_valid = 0; 2658 /* A discon_ind might have already unbound us */ 2659 if ((flags & _SOCONNECT_DID_BIND) && 2660 (so->so_state & SS_ISBOUND)) { 2661 int err; 2662 2663 err = sotpi_unbind(so, 0); 2664 /* LINTED - statement has no conseq */ 2665 if (err) { 2666 eprintsoline(so, err); 2667 } 2668 } 2669 break; 2670 } 2671 if (need_unlock) 2672 so_unlock_single(so, SOLOCKED); 2673 mutex_exit(&so->so_lock); 2674 return (error); 2675 2676 so_bad: error = sogeterr(so, B_TRUE); 2677 bad: eprintsoline(so, error); 2678 goto done; 2679 } 2680 2681 /* ARGSUSED */ 2682 int 2683 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2684 { 2685 struct T_ordrel_req ordrel_req; 2686 mblk_t *mp; 2687 uint_t old_state, state_change; 2688 int error = 0; 2689 sotpi_info_t *sti = SOTOTPI(so); 2690 2691 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2692 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2693 2694 mutex_enter(&so->so_lock); 2695 so_lock_single(so); /* Set SOLOCKED */ 2696 2697 /* 2698 * SunOS 4.X has no check for datagram sockets. 2699 * 5.X checks that it is connected (ENOTCONN) 2700 * X/Open requires that we check the connected state. 2701 */ 2702 if (!(so->so_state & SS_ISCONNECTED)) { 2703 if (!xnet_skip_checks) { 2704 error = ENOTCONN; 2705 if (xnet_check_print) { 2706 printf("sockfs: X/Open shutdown check " 2707 "caused ENOTCONN\n"); 2708 } 2709 } 2710 goto done; 2711 } 2712 /* 2713 * Record the current state and then perform any state changes. 2714 * Then use the difference between the old and new states to 2715 * determine which messages need to be sent. 2716 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2717 * duplicate calls to shutdown(). 2718 */ 2719 old_state = so->so_state; 2720 2721 switch (how) { 2722 case 0: 2723 socantrcvmore(so); 2724 break; 2725 case 1: 2726 socantsendmore(so); 2727 break; 2728 case 2: 2729 socantsendmore(so); 2730 socantrcvmore(so); 2731 break; 2732 default: 2733 error = EINVAL; 2734 goto done; 2735 } 2736 2737 /* 2738 * Assumes that the SS_CANT* flags are never cleared in the above code. 2739 */ 2740 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2741 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2742 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2743 2744 switch (state_change) { 2745 case 0: 2746 dprintso(so, 1, 2747 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2748 so->so_state)); 2749 goto done; 2750 2751 case SS_CANTRCVMORE: 2752 mutex_exit(&so->so_lock); 2753 strseteof(SOTOV(so), 1); 2754 /* 2755 * strseteof takes care of read side wakeups, 2756 * pollwakeups, and signals. 2757 */ 2758 /* 2759 * Get the read lock before flushing data to avoid problems 2760 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2761 */ 2762 mutex_enter(&so->so_lock); 2763 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2764 mutex_exit(&so->so_lock); 2765 2766 /* Flush read side queue */ 2767 strflushrq(SOTOV(so), FLUSHALL); 2768 2769 mutex_enter(&so->so_lock); 2770 so_unlock_read(so); /* Clear SOREADLOCKED */ 2771 break; 2772 2773 case SS_CANTSENDMORE: 2774 mutex_exit(&so->so_lock); 2775 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2776 mutex_enter(&so->so_lock); 2777 break; 2778 2779 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2780 mutex_exit(&so->so_lock); 2781 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2782 strseteof(SOTOV(so), 1); 2783 /* 2784 * strseteof takes care of read side wakeups, 2785 * pollwakeups, and signals. 2786 */ 2787 /* 2788 * Get the read lock before flushing data to avoid problems 2789 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2790 */ 2791 mutex_enter(&so->so_lock); 2792 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2793 mutex_exit(&so->so_lock); 2794 2795 /* Flush read side queue */ 2796 strflushrq(SOTOV(so), FLUSHALL); 2797 2798 mutex_enter(&so->so_lock); 2799 so_unlock_read(so); /* Clear SOREADLOCKED */ 2800 break; 2801 } 2802 2803 ASSERT(MUTEX_HELD(&so->so_lock)); 2804 2805 /* 2806 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2807 * was set due to this call and the new state has both of them set: 2808 * Send the AF_UNIX close indication 2809 * For T_COTS send a discon_ind 2810 * 2811 * If cantsend was set due to this call: 2812 * For T_COTSORD send an ordrel_ind 2813 * 2814 * Note that for T_CLTS there is no message sent here. 2815 */ 2816 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2817 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2818 /* 2819 * For SunOS 4.X compatibility we tell the other end 2820 * that we are unable to receive at this point. 2821 */ 2822 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2823 so_unix_close(so); 2824 2825 if (sti->sti_serv_type == T_COTS) 2826 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2827 } 2828 if ((state_change & SS_CANTSENDMORE) && 2829 (sti->sti_serv_type == T_COTS_ORD)) { 2830 /* Send an orderly release */ 2831 ordrel_req.PRIM_type = T_ORDREL_REQ; 2832 2833 mutex_exit(&so->so_lock); 2834 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2835 0, _ALLOC_SLEEP, cr); 2836 /* 2837 * Send down the T_ORDREL_REQ even if there is flow control. 2838 * This prevents shutdown from blocking. 2839 * Note that there is no T_OK_ACK for ordrel_req. 2840 */ 2841 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2842 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2843 mutex_enter(&so->so_lock); 2844 if (error) { 2845 eprintsoline(so, error); 2846 goto done; 2847 } 2848 } 2849 2850 done: 2851 so_unlock_single(so, SOLOCKED); 2852 mutex_exit(&so->so_lock); 2853 return (error); 2854 } 2855 2856 /* 2857 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2858 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2859 * that we have closed. 2860 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2861 * T_UNITDATA_REQ containing the same option. 2862 * 2863 * For SOCK_DGRAM half-connections (somebody connected to this end 2864 * but this end is not connect) we don't know where to send any 2865 * SO_UNIX_CLOSE. 2866 * 2867 * We have to ignore stream head errors just in case there has been 2868 * a shutdown(output). 2869 * Ignore any flow control to try to get the message more quickly to the peer. 2870 * While locally ignoring flow control solves the problem when there 2871 * is only the loopback transport on the stream it would not provide 2872 * the correct AF_UNIX socket semantics when one or more modules have 2873 * been pushed. 2874 */ 2875 void 2876 so_unix_close(struct sonode *so) 2877 { 2878 int error; 2879 struct T_opthdr toh; 2880 mblk_t *mp; 2881 sotpi_info_t *sti = SOTOTPI(so); 2882 2883 ASSERT(MUTEX_HELD(&so->so_lock)); 2884 2885 ASSERT(so->so_family == AF_UNIX); 2886 2887 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2888 (SS_ISCONNECTED|SS_ISBOUND)) 2889 return; 2890 2891 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2892 (void *)so, pr_state(so->so_state, so->so_mode))); 2893 2894 toh.level = SOL_SOCKET; 2895 toh.name = SO_UNIX_CLOSE; 2896 2897 /* zero length + header */ 2898 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2899 toh.status = 0; 2900 2901 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2902 struct T_optdata_req tdr; 2903 2904 tdr.PRIM_type = T_OPTDATA_REQ; 2905 tdr.DATA_flag = 0; 2906 2907 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2908 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2909 2910 /* NOTE: holding so_lock while sleeping */ 2911 mp = soallocproto2(&tdr, sizeof (tdr), 2912 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2913 } else { 2914 struct T_unitdata_req tudr; 2915 void *addr; 2916 socklen_t addrlen; 2917 void *src; 2918 socklen_t srclen; 2919 struct T_opthdr toh2; 2920 t_scalar_t size; 2921 2922 /* Connecteded DGRAM socket */ 2923 2924 /* 2925 * For AF_UNIX the destination address is translated to 2926 * an internal name and the source address is passed as 2927 * an option. 2928 */ 2929 /* 2930 * Length and family checks. 2931 */ 2932 error = so_addr_verify(so, sti->sti_faddr_sa, 2933 (t_uscalar_t)sti->sti_faddr_len); 2934 if (error) { 2935 eprintsoline(so, error); 2936 return; 2937 } 2938 if (sti->sti_faddr_noxlate) { 2939 /* 2940 * Already have a transport internal address. Do not 2941 * pass any (transport internal) source address. 2942 */ 2943 addr = sti->sti_faddr_sa; 2944 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2945 src = NULL; 2946 srclen = 0; 2947 } else { 2948 /* 2949 * Pass the sockaddr_un source address as an option 2950 * and translate the remote address. 2951 * Holding so_lock thus sti_laddr_sa can not change. 2952 */ 2953 src = sti->sti_laddr_sa; 2954 srclen = (socklen_t)sti->sti_laddr_len; 2955 dprintso(so, 1, 2956 ("so_ux_close: srclen %d, src %p\n", 2957 srclen, src)); 2958 error = so_ux_addr_xlate(so, 2959 sti->sti_faddr_sa, 2960 (socklen_t)sti->sti_faddr_len, 0, 2961 &addr, &addrlen); 2962 if (error) { 2963 eprintsoline(so, error); 2964 return; 2965 } 2966 } 2967 tudr.PRIM_type = T_UNITDATA_REQ; 2968 tudr.DEST_length = addrlen; 2969 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2970 if (srclen == 0) { 2971 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2972 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2973 _TPI_ALIGN_TOPT(addrlen)); 2974 2975 size = tudr.OPT_offset + tudr.OPT_length; 2976 /* NOTE: holding so_lock while sleeping */ 2977 mp = soallocproto2(&tudr, sizeof (tudr), 2978 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2979 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2980 soappendmsg(mp, &toh, sizeof (toh)); 2981 } else { 2982 /* 2983 * There is a AF_UNIX sockaddr_un to include as a 2984 * source address option. 2985 */ 2986 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2987 _TPI_ALIGN_TOPT(srclen)); 2988 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2989 _TPI_ALIGN_TOPT(addrlen)); 2990 2991 toh2.level = SOL_SOCKET; 2992 toh2.name = SO_SRCADDR; 2993 toh2.len = (t_uscalar_t)(srclen + 2994 sizeof (struct T_opthdr)); 2995 toh2.status = 0; 2996 2997 size = tudr.OPT_offset + tudr.OPT_length; 2998 2999 /* NOTE: holding so_lock while sleeping */ 3000 mp = soallocproto2(&tudr, sizeof (tudr), 3001 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 3002 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3003 soappendmsg(mp, &toh, sizeof (toh)); 3004 soappendmsg(mp, &toh2, sizeof (toh2)); 3005 soappendmsg(mp, src, srclen); 3006 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3007 } 3008 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3009 } 3010 mutex_exit(&so->so_lock); 3011 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 3012 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 3013 mutex_enter(&so->so_lock); 3014 } 3015 3016 /* 3017 * Called by sotpi_recvmsg when reading a non-zero amount of data. 3018 * In addition, the caller typically verifies that there is some 3019 * potential state to clear by checking 3020 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 3021 * before calling this routine. 3022 * Note that such a check can be made without holding so_lock since 3023 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 3024 * decrements sti_oobsigcnt. 3025 * 3026 * When data is read *after* the point that all pending 3027 * oob data has been consumed the oob indication is cleared. 3028 * 3029 * This logic keeps select/poll returning POLLRDBAND and 3030 * SIOCATMARK returning true until we have read past 3031 * the mark. 3032 */ 3033 static void 3034 sorecv_update_oobstate(struct sonode *so) 3035 { 3036 sotpi_info_t *sti = SOTOTPI(so); 3037 3038 mutex_enter(&so->so_lock); 3039 ASSERT(so_verify_oobstate(so)); 3040 dprintso(so, 1, 3041 ("sorecv_update_oobstate: counts %d/%d state %s\n", 3042 sti->sti_oobsigcnt, 3043 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 3044 if (sti->sti_oobsigcnt == 0) { 3045 /* No more pending oob indications */ 3046 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 3047 freemsg(so->so_oobmsg); 3048 so->so_oobmsg = NULL; 3049 } 3050 ASSERT(so_verify_oobstate(so)); 3051 mutex_exit(&so->so_lock); 3052 } 3053 3054 /* 3055 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 3056 */ 3057 static int 3058 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 3059 { 3060 sotpi_info_t *sti = SOTOTPI(so); 3061 int error = 0; 3062 mblk_t *tmp = NULL; 3063 mblk_t *pmp = NULL; 3064 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 3065 3066 ASSERT(nmp != NULL); 3067 3068 while (nmp != NULL && uiop->uio_resid > 0) { 3069 ssize_t n; 3070 3071 if (DB_TYPE(nmp) == M_DATA) { 3072 /* 3073 * We have some data, uiomove up to resid bytes. 3074 */ 3075 n = MIN(MBLKL(nmp), uiop->uio_resid); 3076 if (n > 0) 3077 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3078 nmp->b_rptr += n; 3079 if (nmp->b_rptr == nmp->b_wptr) { 3080 pmp = nmp; 3081 nmp = nmp->b_cont; 3082 } 3083 if (error) 3084 break; 3085 } else { 3086 /* 3087 * We only handle data, save for caller to handle. 3088 */ 3089 if (pmp != NULL) { 3090 pmp->b_cont = nmp->b_cont; 3091 } 3092 nmp->b_cont = NULL; 3093 if (*rmp == NULL) { 3094 *rmp = nmp; 3095 } else { 3096 tmp->b_cont = nmp; 3097 } 3098 nmp = nmp->b_cont; 3099 tmp = nmp; 3100 } 3101 } 3102 if (pmp != NULL) { 3103 /* Free any mblk_t(s) which we have consumed */ 3104 pmp->b_cont = NULL; 3105 freemsg(sti->sti_nl7c_rcv_mp); 3106 } 3107 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3108 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3109 if (error == 0) { 3110 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3111 3112 error = p->r_v.r_v2; 3113 p->r_v.r_v2 = 0; 3114 } 3115 rp->r_vals = sti->sti_nl7c_rcv_rval; 3116 sti->sti_nl7c_rcv_rval = 0; 3117 } else { 3118 /* More mblk_t(s) to process so no rval to return */ 3119 rp->r_vals = 0; 3120 } 3121 return (error); 3122 } 3123 /* 3124 * Receive the next message on the queue. 3125 * If msg_controllen is non-zero when called the caller is interested in 3126 * any received control info (options). 3127 * If msg_namelen is non-zero when called the caller is interested in 3128 * any received source address. 3129 * The routine returns with msg_control and msg_name pointing to 3130 * kmem_alloc'ed memory which the caller has to free. 3131 */ 3132 /* ARGSUSED */ 3133 int 3134 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3135 struct cred *cr) 3136 { 3137 union T_primitives *tpr; 3138 mblk_t *mp; 3139 uchar_t pri; 3140 int pflag, opflag; 3141 void *control; 3142 t_uscalar_t controllen; 3143 t_uscalar_t namelen; 3144 int so_state = so->so_state; /* Snapshot */ 3145 ssize_t saved_resid; 3146 rval_t rval; 3147 int flags; 3148 clock_t timout; 3149 int error = 0; 3150 int reterr = 0; 3151 struct uio *suiop = NULL; 3152 sotpi_info_t *sti = SOTOTPI(so); 3153 3154 flags = msg->msg_flags; 3155 msg->msg_flags = 0; 3156 3157 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3158 (void *)so, (void *)msg, flags, 3159 pr_state(so->so_state, so->so_mode), so->so_error)); 3160 3161 if (so->so_version == SOV_STREAM) { 3162 so_update_attrs(so, SOACC); 3163 /* The imaginary "sockmod" has been popped - act as a stream */ 3164 return (strread(SOTOV(so), uiop, cr)); 3165 } 3166 3167 /* 3168 * If we are not connected because we have never been connected 3169 * we return ENOTCONN. If we have been connected (but are no longer 3170 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3171 * the EOF. 3172 * 3173 * An alternative would be to post an ENOTCONN error in stream head 3174 * (read+write) and clear it when we're connected. However, that error 3175 * would cause incorrect poll/select behavior! 3176 */ 3177 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3178 (so->so_mode & SM_CONNREQUIRED)) { 3179 return (ENOTCONN); 3180 } 3181 3182 /* 3183 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3184 * after checking that the read queue is empty) and returns zero. 3185 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3186 * is zero. 3187 */ 3188 3189 if (flags & MSG_OOB) { 3190 /* Check that the transport supports OOB */ 3191 if (!(so->so_mode & SM_EXDATA)) 3192 return (EOPNOTSUPP); 3193 so_update_attrs(so, SOACC); 3194 return (sorecvoob(so, msg, uiop, flags, 3195 (so->so_options & SO_OOBINLINE))); 3196 } 3197 3198 so_update_attrs(so, SOACC); 3199 3200 /* 3201 * Set msg_controllen and msg_namelen to zero here to make it 3202 * simpler in the cases that no control or name is returned. 3203 */ 3204 controllen = msg->msg_controllen; 3205 namelen = msg->msg_namelen; 3206 msg->msg_controllen = 0; 3207 msg->msg_namelen = 0; 3208 3209 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3210 namelen, controllen)); 3211 3212 mutex_enter(&so->so_lock); 3213 /* 3214 * If an NL7C enabled socket and not waiting for write data. 3215 */ 3216 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3217 NL7C_ENABLED) { 3218 if (sti->sti_nl7c_uri) { 3219 /* Close uri processing for a previous request */ 3220 nl7c_close(so); 3221 } 3222 if ((so_state & SS_CANTRCVMORE) && 3223 sti->sti_nl7c_rcv_mp == NULL) { 3224 /* Nothing to process, EOF */ 3225 mutex_exit(&so->so_lock); 3226 return (0); 3227 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3228 /* Persistent NL7C socket, try to process request */ 3229 boolean_t ret; 3230 3231 ret = nl7c_process(so, 3232 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3233 rval.r_vals = sti->sti_nl7c_rcv_rval; 3234 error = rval.r_v.r_v2; 3235 if (error) { 3236 /* Error of some sort, return it */ 3237 mutex_exit(&so->so_lock); 3238 return (error); 3239 } 3240 if (sti->sti_nl7c_flags && 3241 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3242 /* 3243 * Still an NL7C socket and no data 3244 * to pass up to the caller. 3245 */ 3246 mutex_exit(&so->so_lock); 3247 if (ret) { 3248 /* EOF */ 3249 return (0); 3250 } else { 3251 /* Need more data */ 3252 return (EAGAIN); 3253 } 3254 } 3255 } else { 3256 /* 3257 * Not persistent so no further NL7C processing. 3258 */ 3259 sti->sti_nl7c_flags = 0; 3260 } 3261 } 3262 /* 3263 * Only one reader is allowed at any given time. This is needed 3264 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3265 * 3266 * This is slightly different that BSD behavior in that it fails with 3267 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3268 * is single-threaded using sblock(), which is dropped while waiting 3269 * for data to appear. The difference shows up e.g. if one 3270 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3271 * does use nonblocking io and different threads are reading each 3272 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3273 * in this case as long as the read queue doesn't get empty. 3274 * In this implementation the thread using nonblocking io can 3275 * get an EWOULDBLOCK error due to the blocking thread executing 3276 * e.g. in the uiomove in kstrgetmsg. 3277 * This difference is not believed to be significant. 3278 */ 3279 /* Set SOREADLOCKED */ 3280 error = so_lock_read_intr(so, 3281 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3282 mutex_exit(&so->so_lock); 3283 if (error) 3284 return (error); 3285 3286 /* 3287 * Tell kstrgetmsg to not inspect the stream head errors until all 3288 * queued data has been consumed. 3289 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3290 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3291 * 3292 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3293 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3294 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3295 */ 3296 pflag = MSG_ANY | MSG_DELAYERROR; 3297 if (flags & MSG_PEEK) { 3298 pflag |= MSG_IPEEK; 3299 flags &= ~MSG_WAITALL; 3300 } 3301 if (so->so_mode & SM_ATOMIC) 3302 pflag |= MSG_DISCARDTAIL; 3303 3304 if (flags & MSG_DONTWAIT) 3305 timout = 0; 3306 else 3307 timout = -1; 3308 opflag = pflag; 3309 3310 suiop = sod_rcv_init(so, flags, &uiop); 3311 retry: 3312 saved_resid = uiop->uio_resid; 3313 pri = 0; 3314 mp = NULL; 3315 if (sti->sti_nl7c_rcv_mp != NULL) { 3316 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3317 error = nl7c_sorecv(so, &mp, uiop, &rval); 3318 } else { 3319 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3320 timout, &rval); 3321 } 3322 if (error != 0) { 3323 /* kstrgetmsg returns ETIME when timeout expires */ 3324 if (error == ETIME) 3325 error = EWOULDBLOCK; 3326 goto out; 3327 } 3328 /* 3329 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3330 * For non-datagrams MOREDATA is used to set MSG_EOR. 3331 */ 3332 ASSERT(!(rval.r_val1 & MORECTL)); 3333 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3334 msg->msg_flags |= MSG_TRUNC; 3335 3336 if (mp == NULL) { 3337 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3338 /* 3339 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3340 * The draft Posix socket spec states that the mark should 3341 * not be cleared when peeking. We follow the latter. 3342 */ 3343 if ((so->so_state & 3344 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3345 (uiop->uio_resid != saved_resid) && 3346 !(flags & MSG_PEEK)) { 3347 sorecv_update_oobstate(so); 3348 } 3349 3350 mutex_enter(&so->so_lock); 3351 /* Set MSG_EOR based on MOREDATA */ 3352 if (!(rval.r_val1 & MOREDATA)) { 3353 if (so->so_state & SS_SAVEDEOR) { 3354 msg->msg_flags |= MSG_EOR; 3355 so->so_state &= ~SS_SAVEDEOR; 3356 } 3357 } 3358 /* 3359 * If some data was received (i.e. not EOF) and the 3360 * read/recv* has not been satisfied wait for some more. 3361 */ 3362 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3363 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3364 mutex_exit(&so->so_lock); 3365 pflag = opflag | MSG_NOMARK; 3366 goto retry; 3367 } 3368 goto out_locked; 3369 } 3370 3371 /* strsock_proto has already verified length and alignment */ 3372 tpr = (union T_primitives *)mp->b_rptr; 3373 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3374 3375 switch (tpr->type) { 3376 case T_DATA_IND: { 3377 if ((so->so_state & 3378 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3379 (uiop->uio_resid != saved_resid) && 3380 !(flags & MSG_PEEK)) { 3381 sorecv_update_oobstate(so); 3382 } 3383 3384 /* 3385 * Set msg_flags to MSG_EOR based on 3386 * MORE_flag and MOREDATA. 3387 */ 3388 mutex_enter(&so->so_lock); 3389 so->so_state &= ~SS_SAVEDEOR; 3390 if (!(tpr->data_ind.MORE_flag & 1)) { 3391 if (!(rval.r_val1 & MOREDATA)) 3392 msg->msg_flags |= MSG_EOR; 3393 else 3394 so->so_state |= SS_SAVEDEOR; 3395 } 3396 freemsg(mp); 3397 /* 3398 * If some data was received (i.e. not EOF) and the 3399 * read/recv* has not been satisfied wait for some more. 3400 */ 3401 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3402 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3403 mutex_exit(&so->so_lock); 3404 pflag = opflag | MSG_NOMARK; 3405 goto retry; 3406 } 3407 goto out_locked; 3408 } 3409 case T_UNITDATA_IND: { 3410 void *addr; 3411 t_uscalar_t addrlen; 3412 void *abuf; 3413 t_uscalar_t optlen; 3414 void *opt; 3415 3416 if ((so->so_state & 3417 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3418 (uiop->uio_resid != saved_resid) && 3419 !(flags & MSG_PEEK)) { 3420 sorecv_update_oobstate(so); 3421 } 3422 3423 if (namelen != 0) { 3424 /* Caller wants source address */ 3425 addrlen = tpr->unitdata_ind.SRC_length; 3426 addr = sogetoff(mp, 3427 tpr->unitdata_ind.SRC_offset, 3428 addrlen, 1); 3429 if (addr == NULL) { 3430 freemsg(mp); 3431 error = EPROTO; 3432 eprintsoline(so, error); 3433 goto out; 3434 } 3435 if (so->so_family == AF_UNIX) { 3436 /* 3437 * Can not use the transport level address. 3438 * If there is a SO_SRCADDR option carrying 3439 * the socket level address it will be 3440 * extracted below. 3441 */ 3442 addr = NULL; 3443 addrlen = 0; 3444 } 3445 } 3446 optlen = tpr->unitdata_ind.OPT_length; 3447 if (optlen != 0) { 3448 t_uscalar_t ncontrollen; 3449 3450 /* 3451 * Extract any source address option. 3452 * Determine how large cmsg buffer is needed. 3453 */ 3454 opt = sogetoff(mp, 3455 tpr->unitdata_ind.OPT_offset, 3456 optlen, __TPI_ALIGN_SIZE); 3457 3458 if (opt == NULL) { 3459 freemsg(mp); 3460 error = EPROTO; 3461 eprintsoline(so, error); 3462 goto out; 3463 } 3464 if (so->so_family == AF_UNIX) 3465 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3466 ncontrollen = so_cmsglen(mp, opt, optlen, 3467 !(flags & MSG_XPG4_2)); 3468 if (controllen != 0) 3469 controllen = ncontrollen; 3470 else if (ncontrollen != 0) 3471 msg->msg_flags |= MSG_CTRUNC; 3472 } else { 3473 controllen = 0; 3474 } 3475 3476 if (namelen != 0) { 3477 /* 3478 * Return address to caller. 3479 * Caller handles truncation if length 3480 * exceeds msg_namelen. 3481 * NOTE: AF_UNIX NUL termination is ensured by 3482 * the sender's copyin_name(). 3483 */ 3484 abuf = kmem_alloc(addrlen, KM_SLEEP); 3485 3486 bcopy(addr, abuf, addrlen); 3487 msg->msg_name = abuf; 3488 msg->msg_namelen = addrlen; 3489 } 3490 3491 if (controllen != 0) { 3492 /* 3493 * Return control msg to caller. 3494 * Caller handles truncation if length 3495 * exceeds msg_controllen. 3496 */ 3497 control = kmem_zalloc(controllen, KM_SLEEP); 3498 3499 error = so_opt2cmsg(mp, opt, optlen, 3500 !(flags & MSG_XPG4_2), 3501 control, controllen); 3502 if (error) { 3503 freemsg(mp); 3504 if (msg->msg_namelen != 0) 3505 kmem_free(msg->msg_name, 3506 msg->msg_namelen); 3507 kmem_free(control, controllen); 3508 eprintsoline(so, error); 3509 goto out; 3510 } 3511 msg->msg_control = control; 3512 msg->msg_controllen = controllen; 3513 } 3514 3515 freemsg(mp); 3516 goto out; 3517 } 3518 case T_OPTDATA_IND: { 3519 struct T_optdata_req *tdr; 3520 void *opt; 3521 t_uscalar_t optlen; 3522 3523 if ((so->so_state & 3524 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3525 (uiop->uio_resid != saved_resid) && 3526 !(flags & MSG_PEEK)) { 3527 sorecv_update_oobstate(so); 3528 } 3529 3530 tdr = (struct T_optdata_req *)mp->b_rptr; 3531 optlen = tdr->OPT_length; 3532 if (optlen != 0) { 3533 t_uscalar_t ncontrollen; 3534 /* 3535 * Determine how large cmsg buffer is needed. 3536 */ 3537 opt = sogetoff(mp, 3538 tpr->optdata_ind.OPT_offset, 3539 optlen, __TPI_ALIGN_SIZE); 3540 3541 if (opt == NULL) { 3542 freemsg(mp); 3543 error = EPROTO; 3544 eprintsoline(so, error); 3545 goto out; 3546 } 3547 3548 ncontrollen = so_cmsglen(mp, opt, optlen, 3549 !(flags & MSG_XPG4_2)); 3550 if (controllen != 0) 3551 controllen = ncontrollen; 3552 else if (ncontrollen != 0) 3553 msg->msg_flags |= MSG_CTRUNC; 3554 } else { 3555 controllen = 0; 3556 } 3557 3558 if (controllen != 0) { 3559 /* 3560 * Return control msg to caller. 3561 * Caller handles truncation if length 3562 * exceeds msg_controllen. 3563 */ 3564 control = kmem_zalloc(controllen, KM_SLEEP); 3565 3566 error = so_opt2cmsg(mp, opt, optlen, 3567 !(flags & MSG_XPG4_2), 3568 control, controllen); 3569 if (error) { 3570 freemsg(mp); 3571 kmem_free(control, controllen); 3572 eprintsoline(so, error); 3573 goto out; 3574 } 3575 msg->msg_control = control; 3576 msg->msg_controllen = controllen; 3577 } 3578 3579 /* 3580 * Set msg_flags to MSG_EOR based on 3581 * DATA_flag and MOREDATA. 3582 */ 3583 mutex_enter(&so->so_lock); 3584 so->so_state &= ~SS_SAVEDEOR; 3585 if (!(tpr->data_ind.MORE_flag & 1)) { 3586 if (!(rval.r_val1 & MOREDATA)) 3587 msg->msg_flags |= MSG_EOR; 3588 else 3589 so->so_state |= SS_SAVEDEOR; 3590 } 3591 freemsg(mp); 3592 /* 3593 * If some data was received (i.e. not EOF) and the 3594 * read/recv* has not been satisfied wait for some more. 3595 * Not possible to wait if control info was received. 3596 */ 3597 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3598 controllen == 0 && 3599 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3600 mutex_exit(&so->so_lock); 3601 pflag = opflag | MSG_NOMARK; 3602 goto retry; 3603 } 3604 goto out_locked; 3605 } 3606 case T_EXDATA_IND: { 3607 dprintso(so, 1, 3608 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3609 "state %s\n", 3610 sti->sti_oobsigcnt, sti->sti_oobcnt, 3611 saved_resid - uiop->uio_resid, 3612 pr_state(so->so_state, so->so_mode))); 3613 /* 3614 * kstrgetmsg handles MSGMARK so there is nothing to 3615 * inspect in the T_EXDATA_IND. 3616 * strsock_proto makes the stream head queue the T_EXDATA_IND 3617 * as a separate message with no M_DATA component. Furthermore, 3618 * the stream head does not consolidate M_DATA messages onto 3619 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3620 * remains a message by itself. This is needed since MSGMARK 3621 * marks both the whole message as well as the last byte 3622 * of the message. 3623 */ 3624 freemsg(mp); 3625 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3626 if (flags & MSG_PEEK) { 3627 /* 3628 * Even though we are peeking we consume the 3629 * T_EXDATA_IND thereby moving the mark information 3630 * to SS_RCVATMARK. Then the oob code below will 3631 * retry the peeking kstrgetmsg. 3632 * Note that the stream head read queue is 3633 * never flushed without holding SOREADLOCKED 3634 * thus the T_EXDATA_IND can not disappear 3635 * underneath us. 3636 */ 3637 dprintso(so, 1, 3638 ("sotpi_recvmsg: consume EXDATA_IND " 3639 "counts %d/%d state %s\n", 3640 sti->sti_oobsigcnt, 3641 sti->sti_oobcnt, 3642 pr_state(so->so_state, so->so_mode))); 3643 3644 pflag = MSG_ANY | MSG_DELAYERROR; 3645 if (so->so_mode & SM_ATOMIC) 3646 pflag |= MSG_DISCARDTAIL; 3647 3648 pri = 0; 3649 mp = NULL; 3650 3651 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3652 &pri, &pflag, (clock_t)-1, &rval); 3653 ASSERT(uiop->uio_resid == saved_resid); 3654 3655 if (error) { 3656 #ifdef SOCK_DEBUG 3657 if (error != EWOULDBLOCK && error != EINTR) { 3658 eprintsoline(so, error); 3659 } 3660 #endif /* SOCK_DEBUG */ 3661 goto out; 3662 } 3663 ASSERT(mp); 3664 tpr = (union T_primitives *)mp->b_rptr; 3665 ASSERT(tpr->type == T_EXDATA_IND); 3666 freemsg(mp); 3667 } /* end "if (flags & MSG_PEEK)" */ 3668 3669 /* 3670 * Decrement the number of queued and pending oob. 3671 * 3672 * SS_RCVATMARK is cleared when we read past a mark. 3673 * SS_HAVEOOBDATA is cleared when we've read past the 3674 * last mark. 3675 * SS_OOBPEND is cleared if we've read past the last 3676 * mark and no (new) SIGURG has been posted. 3677 */ 3678 mutex_enter(&so->so_lock); 3679 ASSERT(so_verify_oobstate(so)); 3680 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3681 ASSERT(sti->sti_oobsigcnt > 0); 3682 sti->sti_oobsigcnt--; 3683 ASSERT(sti->sti_oobcnt > 0); 3684 sti->sti_oobcnt--; 3685 /* 3686 * Since the T_EXDATA_IND has been removed from the stream 3687 * head, but we have not read data past the mark, 3688 * sockfs needs to track that the socket is still at the mark. 3689 * 3690 * Since no data was received call kstrgetmsg again to wait 3691 * for data. 3692 */ 3693 so->so_state |= SS_RCVATMARK; 3694 mutex_exit(&so->so_lock); 3695 dprintso(so, 1, 3696 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3697 sti->sti_oobsigcnt, sti->sti_oobcnt, 3698 pr_state(so->so_state, so->so_mode))); 3699 pflag = opflag; 3700 goto retry; 3701 } 3702 default: 3703 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3704 (void *)so, tpr->type, (void *)mp); 3705 ASSERT(0); 3706 freemsg(mp); 3707 error = EPROTO; 3708 eprintsoline(so, error); 3709 goto out; 3710 } 3711 /* NOTREACHED */ 3712 out: 3713 mutex_enter(&so->so_lock); 3714 out_locked: 3715 if (so->so_direct != NULL) { 3716 mutex_enter(so->so_direct->sod_lockp); 3717 reterr = sod_rcv_done(so, suiop, uiop); 3718 mutex_exit(so->so_direct->sod_lockp); 3719 } 3720 if (reterr != 0 && error == 0) 3721 error = reterr; 3722 so_unlock_read(so); /* Clear SOREADLOCKED */ 3723 mutex_exit(&so->so_lock); 3724 return (error); 3725 } 3726 3727 /* 3728 * Sending data with options on a datagram socket. 3729 * Assumes caller has verified that SS_ISBOUND etc. are set. 3730 */ 3731 static int 3732 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3733 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3734 { 3735 struct T_unitdata_req tudr; 3736 mblk_t *mp; 3737 int error; 3738 void *addr; 3739 socklen_t addrlen; 3740 void *src; 3741 socklen_t srclen; 3742 ssize_t len; 3743 int size; 3744 struct T_opthdr toh; 3745 struct fdbuf *fdbuf; 3746 t_uscalar_t optlen; 3747 void *fds; 3748 int fdlen; 3749 sotpi_info_t *sti = SOTOTPI(so); 3750 3751 ASSERT(name && namelen); 3752 ASSERT(control && controllen); 3753 3754 len = uiop->uio_resid; 3755 if (len > (ssize_t)sti->sti_tidu_size) { 3756 return (EMSGSIZE); 3757 } 3758 3759 /* 3760 * For AF_UNIX the destination address is translated to an internal 3761 * name and the source address is passed as an option. 3762 * Also, file descriptors are passed as file pointers in an 3763 * option. 3764 */ 3765 3766 /* 3767 * Length and family checks. 3768 */ 3769 error = so_addr_verify(so, name, namelen); 3770 if (error) { 3771 eprintsoline(so, error); 3772 return (error); 3773 } 3774 if (so->so_family == AF_UNIX) { 3775 if (sti->sti_faddr_noxlate) { 3776 /* 3777 * Already have a transport internal address. Do not 3778 * pass any (transport internal) source address. 3779 */ 3780 addr = name; 3781 addrlen = namelen; 3782 src = NULL; 3783 srclen = 0; 3784 } else { 3785 /* 3786 * Pass the sockaddr_un source address as an option 3787 * and translate the remote address. 3788 * 3789 * Note that this code does not prevent sti_laddr_sa 3790 * from changing while it is being used. Thus 3791 * if an unbind+bind occurs concurrently with this 3792 * send the peer might see a partially new and a 3793 * partially old "from" address. 3794 */ 3795 src = sti->sti_laddr_sa; 3796 srclen = (t_uscalar_t)sti->sti_laddr_len; 3797 dprintso(so, 1, 3798 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3799 srclen, src)); 3800 error = so_ux_addr_xlate(so, name, namelen, 3801 (flags & MSG_XPG4_2), 3802 &addr, &addrlen); 3803 if (error) { 3804 eprintsoline(so, error); 3805 return (error); 3806 } 3807 } 3808 } else { 3809 addr = name; 3810 addrlen = namelen; 3811 src = NULL; 3812 srclen = 0; 3813 } 3814 optlen = so_optlen(control, controllen, 3815 !(flags & MSG_XPG4_2)); 3816 tudr.PRIM_type = T_UNITDATA_REQ; 3817 tudr.DEST_length = addrlen; 3818 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3819 if (srclen != 0) 3820 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3821 _TPI_ALIGN_TOPT(srclen)); 3822 else 3823 tudr.OPT_length = optlen; 3824 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3825 _TPI_ALIGN_TOPT(addrlen)); 3826 3827 size = tudr.OPT_offset + tudr.OPT_length; 3828 3829 /* 3830 * File descriptors only when SM_FDPASSING set. 3831 */ 3832 error = so_getfdopt(control, controllen, 3833 !(flags & MSG_XPG4_2), &fds, &fdlen); 3834 if (error) 3835 return (error); 3836 if (fdlen != -1) { 3837 if (!(so->so_mode & SM_FDPASSING)) 3838 return (EOPNOTSUPP); 3839 3840 error = fdbuf_create(fds, fdlen, &fdbuf); 3841 if (error) 3842 return (error); 3843 mp = fdbuf_allocmsg(size, fdbuf); 3844 } else { 3845 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3846 if (mp == NULL) { 3847 /* 3848 * Caught a signal waiting for memory. 3849 * Let send* return EINTR. 3850 */ 3851 return (EINTR); 3852 } 3853 } 3854 soappendmsg(mp, &tudr, sizeof (tudr)); 3855 soappendmsg(mp, addr, addrlen); 3856 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3857 3858 if (fdlen != -1) { 3859 ASSERT(fdbuf != NULL); 3860 toh.level = SOL_SOCKET; 3861 toh.name = SO_FILEP; 3862 toh.len = fdbuf->fd_size + 3863 (t_uscalar_t)sizeof (struct T_opthdr); 3864 toh.status = 0; 3865 soappendmsg(mp, &toh, sizeof (toh)); 3866 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3867 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3868 } 3869 if (srclen != 0) { 3870 /* 3871 * There is a AF_UNIX sockaddr_un to include as a source 3872 * address option. 3873 */ 3874 toh.level = SOL_SOCKET; 3875 toh.name = SO_SRCADDR; 3876 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3877 toh.status = 0; 3878 soappendmsg(mp, &toh, sizeof (toh)); 3879 soappendmsg(mp, src, srclen); 3880 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3881 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3882 } 3883 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3884 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3885 /* At most 3 bytes left in the message */ 3886 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3887 ASSERT(MBLKL(mp) <= (ssize_t)size); 3888 3889 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3890 if (audit_active) 3891 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3892 3893 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3894 #ifdef SOCK_DEBUG 3895 if (error) { 3896 eprintsoline(so, error); 3897 } 3898 #endif /* SOCK_DEBUG */ 3899 return (error); 3900 } 3901 3902 /* 3903 * Sending data with options on a connected stream socket. 3904 * Assumes caller has verified that SS_ISCONNECTED is set. 3905 */ 3906 static int 3907 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3908 t_uscalar_t controllen, int flags) 3909 { 3910 struct T_optdata_req tdr; 3911 mblk_t *mp; 3912 int error; 3913 ssize_t iosize; 3914 int size; 3915 struct fdbuf *fdbuf; 3916 t_uscalar_t optlen; 3917 void *fds; 3918 int fdlen; 3919 struct T_opthdr toh; 3920 sotpi_info_t *sti = SOTOTPI(so); 3921 3922 dprintso(so, 1, 3923 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3924 3925 /* 3926 * Has to be bound and connected. However, since no locks are 3927 * held the state could have changed after sotpi_sendmsg checked it 3928 * thus it is not possible to ASSERT on the state. 3929 */ 3930 3931 /* Options on connection-oriented only when SM_OPTDATA set. */ 3932 if (!(so->so_mode & SM_OPTDATA)) 3933 return (EOPNOTSUPP); 3934 3935 do { 3936 /* 3937 * Set the MORE flag if uio_resid does not fit in this 3938 * message or if the caller passed in "more". 3939 * Error for transports with zero tidu_size. 3940 */ 3941 tdr.PRIM_type = T_OPTDATA_REQ; 3942 iosize = sti->sti_tidu_size; 3943 if (iosize <= 0) 3944 return (EMSGSIZE); 3945 if (uiop->uio_resid > iosize) { 3946 tdr.DATA_flag = 1; 3947 } else { 3948 if (more) 3949 tdr.DATA_flag = 1; 3950 else 3951 tdr.DATA_flag = 0; 3952 iosize = uiop->uio_resid; 3953 } 3954 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3955 tdr.DATA_flag, iosize)); 3956 3957 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3958 tdr.OPT_length = optlen; 3959 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3960 3961 size = (int)sizeof (tdr) + optlen; 3962 /* 3963 * File descriptors only when SM_FDPASSING set. 3964 */ 3965 error = so_getfdopt(control, controllen, 3966 !(flags & MSG_XPG4_2), &fds, &fdlen); 3967 if (error) 3968 return (error); 3969 if (fdlen != -1) { 3970 if (!(so->so_mode & SM_FDPASSING)) 3971 return (EOPNOTSUPP); 3972 3973 error = fdbuf_create(fds, fdlen, &fdbuf); 3974 if (error) 3975 return (error); 3976 mp = fdbuf_allocmsg(size, fdbuf); 3977 } else { 3978 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3979 if (mp == NULL) { 3980 /* 3981 * Caught a signal waiting for memory. 3982 * Let send* return EINTR. 3983 */ 3984 return (EINTR); 3985 } 3986 } 3987 soappendmsg(mp, &tdr, sizeof (tdr)); 3988 3989 if (fdlen != -1) { 3990 ASSERT(fdbuf != NULL); 3991 toh.level = SOL_SOCKET; 3992 toh.name = SO_FILEP; 3993 toh.len = fdbuf->fd_size + 3994 (t_uscalar_t)sizeof (struct T_opthdr); 3995 toh.status = 0; 3996 soappendmsg(mp, &toh, sizeof (toh)); 3997 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3998 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3999 } 4000 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 4001 /* At most 3 bytes left in the message */ 4002 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 4003 ASSERT(MBLKL(mp) <= (ssize_t)size); 4004 4005 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4006 4007 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4008 0, MSG_BAND, 0); 4009 if (error) { 4010 eprintsoline(so, error); 4011 return (error); 4012 } 4013 control = NULL; 4014 if (uiop->uio_resid > 0) { 4015 /* 4016 * Recheck for fatal errors. Fail write even though 4017 * some data have been written. This is consistent 4018 * with strwrite semantics and BSD sockets semantics. 4019 */ 4020 if (so->so_state & SS_CANTSENDMORE) { 4021 eprintsoline(so, error); 4022 return (EPIPE); 4023 } 4024 if (so->so_error != 0) { 4025 mutex_enter(&so->so_lock); 4026 error = sogeterr(so, B_TRUE); 4027 mutex_exit(&so->so_lock); 4028 if (error != 0) { 4029 eprintsoline(so, error); 4030 return (error); 4031 } 4032 } 4033 } 4034 } while (uiop->uio_resid > 0); 4035 return (0); 4036 } 4037 4038 /* 4039 * Sending data on a datagram socket. 4040 * Assumes caller has verified that SS_ISBOUND etc. are set. 4041 * 4042 * For AF_UNIX the destination address is translated to an internal 4043 * name and the source address is passed as an option. 4044 */ 4045 int 4046 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4047 struct uio *uiop, int flags) 4048 { 4049 struct T_unitdata_req tudr; 4050 mblk_t *mp; 4051 int error; 4052 void *addr; 4053 socklen_t addrlen; 4054 void *src; 4055 socklen_t srclen; 4056 ssize_t len; 4057 sotpi_info_t *sti = SOTOTPI(so); 4058 4059 ASSERT(name != NULL && namelen != 0); 4060 4061 len = uiop->uio_resid; 4062 if (len > sti->sti_tidu_size) { 4063 error = EMSGSIZE; 4064 goto done; 4065 } 4066 4067 /* Length and family checks */ 4068 error = so_addr_verify(so, name, namelen); 4069 if (error != 0) 4070 goto done; 4071 4072 if (sti->sti_direct) 4073 return (sodgram_direct(so, name, namelen, uiop, flags)); 4074 4075 if (so->so_family == AF_UNIX) { 4076 if (sti->sti_faddr_noxlate) { 4077 /* 4078 * Already have a transport internal address. Do not 4079 * pass any (transport internal) source address. 4080 */ 4081 addr = name; 4082 addrlen = namelen; 4083 src = NULL; 4084 srclen = 0; 4085 } else { 4086 /* 4087 * Pass the sockaddr_un source address as an option 4088 * and translate the remote address. 4089 * 4090 * Note that this code does not prevent sti_laddr_sa 4091 * from changing while it is being used. Thus 4092 * if an unbind+bind occurs concurrently with this 4093 * send the peer might see a partially new and a 4094 * partially old "from" address. 4095 */ 4096 src = sti->sti_laddr_sa; 4097 srclen = (socklen_t)sti->sti_laddr_len; 4098 dprintso(so, 1, 4099 ("sosend_dgram UNIX: srclen %d, src %p\n", 4100 srclen, src)); 4101 error = so_ux_addr_xlate(so, name, namelen, 4102 (flags & MSG_XPG4_2), 4103 &addr, &addrlen); 4104 if (error) { 4105 eprintsoline(so, error); 4106 goto done; 4107 } 4108 } 4109 } else { 4110 addr = name; 4111 addrlen = namelen; 4112 src = NULL; 4113 srclen = 0; 4114 } 4115 tudr.PRIM_type = T_UNITDATA_REQ; 4116 tudr.DEST_length = addrlen; 4117 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4118 if (srclen == 0) { 4119 tudr.OPT_length = 0; 4120 tudr.OPT_offset = 0; 4121 4122 mp = soallocproto2(&tudr, sizeof (tudr), 4123 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4124 if (mp == NULL) { 4125 /* 4126 * Caught a signal waiting for memory. 4127 * Let send* return EINTR. 4128 */ 4129 error = EINTR; 4130 goto done; 4131 } 4132 } else { 4133 /* 4134 * There is a AF_UNIX sockaddr_un to include as a source 4135 * address option. 4136 */ 4137 struct T_opthdr toh; 4138 ssize_t size; 4139 4140 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4141 _TPI_ALIGN_TOPT(srclen)); 4142 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4143 _TPI_ALIGN_TOPT(addrlen)); 4144 4145 toh.level = SOL_SOCKET; 4146 toh.name = SO_SRCADDR; 4147 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4148 toh.status = 0; 4149 4150 size = tudr.OPT_offset + tudr.OPT_length; 4151 mp = soallocproto2(&tudr, sizeof (tudr), 4152 addr, addrlen, size, _ALLOC_INTR, CRED()); 4153 if (mp == NULL) { 4154 /* 4155 * Caught a signal waiting for memory. 4156 * Let send* return EINTR. 4157 */ 4158 error = EINTR; 4159 goto done; 4160 } 4161 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4162 soappendmsg(mp, &toh, sizeof (toh)); 4163 soappendmsg(mp, src, srclen); 4164 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4165 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4166 } 4167 4168 if (audit_active) 4169 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4170 4171 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4172 done: 4173 #ifdef SOCK_DEBUG 4174 if (error) { 4175 eprintsoline(so, error); 4176 } 4177 #endif /* SOCK_DEBUG */ 4178 return (error); 4179 } 4180 4181 /* 4182 * Sending data on a connected stream socket. 4183 * Assumes caller has verified that SS_ISCONNECTED is set. 4184 */ 4185 int 4186 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4187 int sflag) 4188 { 4189 struct T_data_req tdr; 4190 mblk_t *mp; 4191 int error; 4192 ssize_t iosize; 4193 sotpi_info_t *sti = SOTOTPI(so); 4194 4195 dprintso(so, 1, 4196 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4197 (void *)so, uiop->uio_resid, prim, sflag)); 4198 4199 /* 4200 * Has to be bound and connected. However, since no locks are 4201 * held the state could have changed after sotpi_sendmsg checked it 4202 * thus it is not possible to ASSERT on the state. 4203 */ 4204 4205 do { 4206 /* 4207 * Set the MORE flag if uio_resid does not fit in this 4208 * message or if the caller passed in "more". 4209 * Error for transports with zero tidu_size. 4210 */ 4211 tdr.PRIM_type = prim; 4212 iosize = sti->sti_tidu_size; 4213 if (iosize <= 0) 4214 return (EMSGSIZE); 4215 if (uiop->uio_resid > iosize) { 4216 tdr.MORE_flag = 1; 4217 } else { 4218 if (more) 4219 tdr.MORE_flag = 1; 4220 else 4221 tdr.MORE_flag = 0; 4222 iosize = uiop->uio_resid; 4223 } 4224 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4225 prim, tdr.MORE_flag, iosize)); 4226 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4227 if (mp == NULL) { 4228 /* 4229 * Caught a signal waiting for memory. 4230 * Let send* return EINTR. 4231 */ 4232 return (EINTR); 4233 } 4234 4235 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4236 0, sflag | MSG_BAND, 0); 4237 if (error) { 4238 eprintsoline(so, error); 4239 return (error); 4240 } 4241 if (uiop->uio_resid > 0) { 4242 /* 4243 * Recheck for fatal errors. Fail write even though 4244 * some data have been written. This is consistent 4245 * with strwrite semantics and BSD sockets semantics. 4246 */ 4247 if (so->so_state & SS_CANTSENDMORE) { 4248 eprintsoline(so, error); 4249 return (EPIPE); 4250 } 4251 if (so->so_error != 0) { 4252 mutex_enter(&so->so_lock); 4253 error = sogeterr(so, B_TRUE); 4254 mutex_exit(&so->so_lock); 4255 if (error != 0) { 4256 eprintsoline(so, error); 4257 return (error); 4258 } 4259 } 4260 } 4261 } while (uiop->uio_resid > 0); 4262 return (0); 4263 } 4264 4265 /* 4266 * Check the state for errors and call the appropriate send function. 4267 * 4268 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4269 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4270 * after sending the message. 4271 */ 4272 static int 4273 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4274 struct cred *cr) 4275 { 4276 int so_state; 4277 int so_mode; 4278 int error; 4279 struct sockaddr *name; 4280 t_uscalar_t namelen; 4281 int dontroute; 4282 int flags; 4283 sotpi_info_t *sti = SOTOTPI(so); 4284 4285 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4286 (void *)so, (void *)msg, msg->msg_flags, 4287 pr_state(so->so_state, so->so_mode), so->so_error)); 4288 4289 if (so->so_version == SOV_STREAM) { 4290 /* The imaginary "sockmod" has been popped - act as a stream */ 4291 so_update_attrs(so, SOMOD); 4292 return (strwrite(SOTOV(so), uiop, cr)); 4293 } 4294 4295 mutex_enter(&so->so_lock); 4296 so_state = so->so_state; 4297 4298 if (so_state & SS_CANTSENDMORE) { 4299 mutex_exit(&so->so_lock); 4300 return (EPIPE); 4301 } 4302 4303 if (so->so_error != 0) { 4304 error = sogeterr(so, B_TRUE); 4305 if (error != 0) { 4306 mutex_exit(&so->so_lock); 4307 return (error); 4308 } 4309 } 4310 4311 name = (struct sockaddr *)msg->msg_name; 4312 namelen = msg->msg_namelen; 4313 4314 so_mode = so->so_mode; 4315 4316 if (name == NULL) { 4317 if (!(so_state & SS_ISCONNECTED)) { 4318 mutex_exit(&so->so_lock); 4319 if (so_mode & SM_CONNREQUIRED) 4320 return (ENOTCONN); 4321 else 4322 return (EDESTADDRREQ); 4323 } 4324 if (so_mode & SM_CONNREQUIRED) { 4325 name = NULL; 4326 namelen = 0; 4327 } else { 4328 /* 4329 * Note that this code does not prevent sti_faddr_sa 4330 * from changing while it is being used. Thus 4331 * if an "unconnect"+connect occurs concurrently with 4332 * this send the datagram might be delivered to a 4333 * garbaled address. 4334 */ 4335 ASSERT(sti->sti_faddr_sa); 4336 name = sti->sti_faddr_sa; 4337 namelen = (t_uscalar_t)sti->sti_faddr_len; 4338 } 4339 } else { 4340 if (!(so_state & SS_ISCONNECTED) && 4341 (so_mode & SM_CONNREQUIRED)) { 4342 /* Required but not connected */ 4343 mutex_exit(&so->so_lock); 4344 return (ENOTCONN); 4345 } 4346 /* 4347 * Ignore the address on connection-oriented sockets. 4348 * Just like BSD this code does not generate an error for 4349 * TCP (a CONNREQUIRED socket) when sending to an address 4350 * passed in with sendto/sendmsg. Instead the data is 4351 * delivered on the connection as if no address had been 4352 * supplied. 4353 */ 4354 if ((so_state & SS_ISCONNECTED) && 4355 !(so_mode & SM_CONNREQUIRED)) { 4356 mutex_exit(&so->so_lock); 4357 return (EISCONN); 4358 } 4359 if (!(so_state & SS_ISBOUND)) { 4360 so_lock_single(so); /* Set SOLOCKED */ 4361 error = sotpi_bind(so, NULL, 0, 4362 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4363 so_unlock_single(so, SOLOCKED); 4364 if (error) { 4365 mutex_exit(&so->so_lock); 4366 eprintsoline(so, error); 4367 return (error); 4368 } 4369 } 4370 /* 4371 * Handle delayed datagram errors. These are only queued 4372 * when the application sets SO_DGRAM_ERRIND. 4373 * Return the error if we are sending to the address 4374 * that was returned in the last T_UDERROR_IND. 4375 * If sending to some other address discard the delayed 4376 * error indication. 4377 */ 4378 if (sti->sti_delayed_error) { 4379 struct T_uderror_ind *tudi; 4380 void *addr; 4381 t_uscalar_t addrlen; 4382 boolean_t match = B_FALSE; 4383 4384 ASSERT(sti->sti_eaddr_mp); 4385 error = sti->sti_delayed_error; 4386 sti->sti_delayed_error = 0; 4387 tudi = 4388 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4389 addrlen = tudi->DEST_length; 4390 addr = sogetoff(sti->sti_eaddr_mp, 4391 tudi->DEST_offset, addrlen, 1); 4392 ASSERT(addr); /* Checked by strsock_proto */ 4393 switch (so->so_family) { 4394 case AF_INET: { 4395 /* Compare just IP address and port */ 4396 sin_t *sin1 = (sin_t *)name; 4397 sin_t *sin2 = (sin_t *)addr; 4398 4399 if (addrlen == sizeof (sin_t) && 4400 namelen == addrlen && 4401 sin1->sin_port == sin2->sin_port && 4402 sin1->sin_addr.s_addr == 4403 sin2->sin_addr.s_addr) 4404 match = B_TRUE; 4405 break; 4406 } 4407 case AF_INET6: { 4408 /* Compare just IP address and port. Not flow */ 4409 sin6_t *sin1 = (sin6_t *)name; 4410 sin6_t *sin2 = (sin6_t *)addr; 4411 4412 if (addrlen == sizeof (sin6_t) && 4413 namelen == addrlen && 4414 sin1->sin6_port == sin2->sin6_port && 4415 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4416 &sin2->sin6_addr)) 4417 match = B_TRUE; 4418 break; 4419 } 4420 case AF_UNIX: 4421 default: 4422 if (namelen == addrlen && 4423 bcmp(name, addr, namelen) == 0) 4424 match = B_TRUE; 4425 } 4426 if (match) { 4427 freemsg(sti->sti_eaddr_mp); 4428 sti->sti_eaddr_mp = NULL; 4429 mutex_exit(&so->so_lock); 4430 #ifdef DEBUG 4431 dprintso(so, 0, 4432 ("sockfs delayed error %d for %s\n", 4433 error, 4434 pr_addr(so->so_family, name, namelen))); 4435 #endif /* DEBUG */ 4436 return (error); 4437 } 4438 freemsg(sti->sti_eaddr_mp); 4439 sti->sti_eaddr_mp = NULL; 4440 } 4441 } 4442 mutex_exit(&so->so_lock); 4443 4444 flags = msg->msg_flags; 4445 dontroute = 0; 4446 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4447 uint32_t val; 4448 4449 val = 1; 4450 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4451 &val, (t_uscalar_t)sizeof (val), cr); 4452 if (error) 4453 return (error); 4454 dontroute = 1; 4455 } 4456 4457 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4458 error = EOPNOTSUPP; 4459 goto done; 4460 } 4461 if (msg->msg_controllen != 0) { 4462 if (!(so_mode & SM_CONNREQUIRED)) { 4463 so_update_attrs(so, SOMOD); 4464 error = sosend_dgramcmsg(so, name, namelen, uiop, 4465 msg->msg_control, msg->msg_controllen, flags); 4466 } else { 4467 if (flags & MSG_OOB) { 4468 /* Can't generate T_EXDATA_REQ with options */ 4469 error = EOPNOTSUPP; 4470 goto done; 4471 } 4472 so_update_attrs(so, SOMOD); 4473 error = sosend_svccmsg(so, uiop, 4474 !(flags & MSG_EOR), 4475 msg->msg_control, msg->msg_controllen, 4476 flags); 4477 } 4478 goto done; 4479 } 4480 4481 so_update_attrs(so, SOMOD); 4482 if (!(so_mode & SM_CONNREQUIRED)) { 4483 /* 4484 * If there is no SO_DONTROUTE to turn off return immediately 4485 * from send_dgram. This can allow tail-call optimizations. 4486 */ 4487 if (!dontroute) { 4488 return (sosend_dgram(so, name, namelen, uiop, flags)); 4489 } 4490 error = sosend_dgram(so, name, namelen, uiop, flags); 4491 } else { 4492 t_scalar_t prim; 4493 int sflag; 4494 4495 /* Ignore msg_name in the connected state */ 4496 if (flags & MSG_OOB) { 4497 prim = T_EXDATA_REQ; 4498 /* 4499 * Send down T_EXDATA_REQ even if there is flow 4500 * control for data. 4501 */ 4502 sflag = MSG_IGNFLOW; 4503 } else { 4504 if (so_mode & SM_BYTESTREAM) { 4505 /* Byte stream transport - use write */ 4506 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4507 4508 /* Send M_DATA messages */ 4509 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4510 (error = nl7c_data(so, uiop)) >= 0) { 4511 /* NL7C consumed the data */ 4512 return (error); 4513 } 4514 /* 4515 * If there is no SO_DONTROUTE to turn off, 4516 * sti_direct is on, and there is no flow 4517 * control, we can take the fast path. 4518 */ 4519 if (!dontroute && sti->sti_direct != 0 && 4520 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4521 return (sostream_direct(so, uiop, 4522 NULL, cr)); 4523 } 4524 error = strwrite(SOTOV(so), uiop, cr); 4525 goto done; 4526 } 4527 prim = T_DATA_REQ; 4528 sflag = 0; 4529 } 4530 /* 4531 * If there is no SO_DONTROUTE to turn off return immediately 4532 * from sosend_svc. This can allow tail-call optimizations. 4533 */ 4534 if (!dontroute) 4535 return (sosend_svc(so, uiop, prim, 4536 !(flags & MSG_EOR), sflag)); 4537 error = sosend_svc(so, uiop, prim, 4538 !(flags & MSG_EOR), sflag); 4539 } 4540 ASSERT(dontroute); 4541 done: 4542 if (dontroute) { 4543 uint32_t val; 4544 4545 val = 0; 4546 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4547 &val, (t_uscalar_t)sizeof (val), cr); 4548 } 4549 return (error); 4550 } 4551 4552 /* 4553 * kstrwritemp() has very similar semantics as that of strwrite(). 4554 * The main difference is it obtains mblks from the caller and also 4555 * does not do any copy as done in strwrite() from user buffers to 4556 * kernel buffers. 4557 * 4558 * Currently, this routine is used by sendfile to send data allocated 4559 * within the kernel without any copying. This interface does not use the 4560 * synchronous stream interface as synch. stream interface implies 4561 * copying. 4562 */ 4563 int 4564 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4565 { 4566 struct stdata *stp; 4567 struct queue *wqp; 4568 mblk_t *newmp; 4569 char waitflag; 4570 int tempmode; 4571 int error = 0; 4572 int done = 0; 4573 struct sonode *so; 4574 boolean_t direct; 4575 4576 ASSERT(vp->v_stream); 4577 stp = vp->v_stream; 4578 4579 so = VTOSO(vp); 4580 direct = _SOTOTPI(so)->sti_direct; 4581 4582 /* 4583 * This is the sockfs direct fast path. canputnext() need 4584 * not be accurate so we don't grab the sd_lock here. If 4585 * we get flow-controlled, we grab sd_lock just before the 4586 * do..while loop below to emulate what strwrite() does. 4587 */ 4588 wqp = stp->sd_wrq; 4589 if (canputnext(wqp) && direct && 4590 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4591 return (sostream_direct(so, NULL, mp, CRED())); 4592 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4593 /* Fast check of flags before acquiring the lock */ 4594 mutex_enter(&stp->sd_lock); 4595 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4596 mutex_exit(&stp->sd_lock); 4597 if (error != 0) { 4598 if (!(stp->sd_flag & STPLEX) && 4599 (stp->sd_wput_opt & SW_SIGPIPE)) { 4600 error = EPIPE; 4601 } 4602 return (error); 4603 } 4604 } 4605 4606 waitflag = WRITEWAIT; 4607 if (stp->sd_flag & OLDNDELAY) 4608 tempmode = fmode & ~FNDELAY; 4609 else 4610 tempmode = fmode; 4611 4612 mutex_enter(&stp->sd_lock); 4613 do { 4614 if (canputnext(wqp)) { 4615 mutex_exit(&stp->sd_lock); 4616 if (stp->sd_wputdatafunc != NULL) { 4617 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4618 NULL, NULL, NULL); 4619 if (newmp == NULL) { 4620 /* The caller will free mp */ 4621 return (ECOMM); 4622 } 4623 mp = newmp; 4624 } 4625 putnext(wqp, mp); 4626 return (0); 4627 } 4628 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4629 &done); 4630 } while (error == 0 && !done); 4631 4632 mutex_exit(&stp->sd_lock); 4633 /* 4634 * EAGAIN tells the application to try again. ENOMEM 4635 * is returned only if the memory allocation size 4636 * exceeds the physical limits of the system. ENOMEM 4637 * can't be true here. 4638 */ 4639 if (error == ENOMEM) 4640 error = EAGAIN; 4641 return (error); 4642 } 4643 4644 /* ARGSUSED */ 4645 static int 4646 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4647 struct cred *cr, mblk_t **mpp) 4648 { 4649 int error; 4650 4651 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4652 return (EAFNOSUPPORT); 4653 4654 if (so->so_state & SS_CANTSENDMORE) 4655 return (EPIPE); 4656 4657 if (so->so_type != SOCK_STREAM) 4658 return (EOPNOTSUPP); 4659 4660 if ((so->so_state & SS_ISCONNECTED) == 0) 4661 return (ENOTCONN); 4662 4663 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4664 if (error == 0) 4665 *mpp = NULL; 4666 return (error); 4667 } 4668 4669 /* 4670 * Sending data on a datagram socket. 4671 * Assumes caller has verified that SS_ISBOUND etc. are set. 4672 */ 4673 /* ARGSUSED */ 4674 static int 4675 sodgram_direct(struct sonode *so, struct sockaddr *name, 4676 socklen_t namelen, struct uio *uiop, int flags) 4677 { 4678 struct T_unitdata_req tudr; 4679 mblk_t *mp = NULL; 4680 int error = 0; 4681 void *addr; 4682 socklen_t addrlen; 4683 ssize_t len; 4684 struct stdata *stp = SOTOV(so)->v_stream; 4685 int so_state; 4686 queue_t *udp_wq; 4687 boolean_t connected; 4688 mblk_t *mpdata = NULL; 4689 sotpi_info_t *sti = SOTOTPI(so); 4690 4691 ASSERT(name != NULL && namelen != 0); 4692 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4693 ASSERT(!(so->so_mode & SM_EXDATA)); 4694 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4695 ASSERT(SOTOV(so)->v_type == VSOCK); 4696 4697 /* Caller checked for proper length */ 4698 len = uiop->uio_resid; 4699 ASSERT(len <= sti->sti_tidu_size); 4700 4701 /* Length and family checks have been done by caller */ 4702 ASSERT(name->sa_family == so->so_family); 4703 ASSERT(so->so_family == AF_INET || 4704 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4705 ASSERT(so->so_family == AF_INET6 || 4706 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4707 4708 addr = name; 4709 addrlen = namelen; 4710 4711 if (stp->sd_sidp != NULL && 4712 (error = straccess(stp, JCWRITE)) != 0) 4713 goto done; 4714 4715 so_state = so->so_state; 4716 4717 connected = so_state & SS_ISCONNECTED; 4718 if (!connected) { 4719 tudr.PRIM_type = T_UNITDATA_REQ; 4720 tudr.DEST_length = addrlen; 4721 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4722 tudr.OPT_length = 0; 4723 tudr.OPT_offset = 0; 4724 4725 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4726 _ALLOC_INTR, CRED()); 4727 if (mp == NULL) { 4728 /* 4729 * Caught a signal waiting for memory. 4730 * Let send* return EINTR. 4731 */ 4732 error = EINTR; 4733 goto done; 4734 } 4735 } 4736 4737 /* 4738 * For UDP we don't break up the copyin into smaller pieces 4739 * as in the TCP case. That means if ENOMEM is returned by 4740 * mcopyinuio() then the uio vector has not been modified at 4741 * all and we fallback to either strwrite() or kstrputmsg() 4742 * below. Note also that we never generate priority messages 4743 * from here. 4744 */ 4745 udp_wq = stp->sd_wrq->q_next; 4746 if (canput(udp_wq) && 4747 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4748 ASSERT(DB_TYPE(mpdata) == M_DATA); 4749 ASSERT(uiop->uio_resid == 0); 4750 if (!connected) 4751 linkb(mp, mpdata); 4752 else 4753 mp = mpdata; 4754 if (audit_active) 4755 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4756 4757 udp_wput(udp_wq, mp); 4758 return (0); 4759 } 4760 4761 ASSERT(mpdata == NULL); 4762 if (error != 0 && error != ENOMEM) { 4763 freemsg(mp); 4764 return (error); 4765 } 4766 4767 /* 4768 * For connected, let strwrite() handle the blocking case. 4769 * Otherwise we fall thru and use kstrputmsg(). 4770 */ 4771 if (connected) 4772 return (strwrite(SOTOV(so), uiop, CRED())); 4773 4774 if (audit_active) 4775 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4776 4777 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4778 done: 4779 #ifdef SOCK_DEBUG 4780 if (error != 0) { 4781 eprintsoline(so, error); 4782 } 4783 #endif /* SOCK_DEBUG */ 4784 return (error); 4785 } 4786 4787 int 4788 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4789 { 4790 struct stdata *stp = SOTOV(so)->v_stream; 4791 ssize_t iosize, rmax, maxblk; 4792 queue_t *tcp_wq = stp->sd_wrq->q_next; 4793 mblk_t *newmp; 4794 int error = 0, wflag = 0; 4795 4796 ASSERT(so->so_mode & SM_BYTESTREAM); 4797 ASSERT(SOTOV(so)->v_type == VSOCK); 4798 4799 if (stp->sd_sidp != NULL && 4800 (error = straccess(stp, JCWRITE)) != 0) 4801 return (error); 4802 4803 if (uiop == NULL) { 4804 /* 4805 * kstrwritemp() should have checked sd_flag and 4806 * flow-control before coming here. If we end up 4807 * here it means that we can simply pass down the 4808 * data to tcp. 4809 */ 4810 ASSERT(mp != NULL); 4811 if (stp->sd_wputdatafunc != NULL) { 4812 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4813 NULL, NULL, NULL); 4814 if (newmp == NULL) { 4815 /* The caller will free mp */ 4816 return (ECOMM); 4817 } 4818 mp = newmp; 4819 } 4820 tcp_wput(tcp_wq, mp); 4821 return (0); 4822 } 4823 4824 /* Fallback to strwrite() to do proper error handling */ 4825 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4826 return (strwrite(SOTOV(so), uiop, cr)); 4827 4828 rmax = stp->sd_qn_maxpsz; 4829 ASSERT(rmax >= 0 || rmax == INFPSZ); 4830 if (rmax == 0 || uiop->uio_resid <= 0) 4831 return (0); 4832 4833 if (rmax == INFPSZ) 4834 rmax = uiop->uio_resid; 4835 4836 maxblk = stp->sd_maxblk; 4837 4838 for (;;) { 4839 iosize = MIN(uiop->uio_resid, rmax); 4840 4841 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4842 if (mp == NULL) { 4843 /* 4844 * Fallback to strwrite() for ENOMEM; if this 4845 * is our first time in this routine and the uio 4846 * vector has not been modified, we will end up 4847 * calling strwrite() without any flag set. 4848 */ 4849 if (error == ENOMEM) 4850 goto slow_send; 4851 else 4852 return (error); 4853 } 4854 ASSERT(uiop->uio_resid >= 0); 4855 /* 4856 * If mp is non-NULL and ENOMEM is set, it means that 4857 * mcopyinuio() was able to break down some of the user 4858 * data into one or more mblks. Send the partial data 4859 * to tcp and let the rest be handled in strwrite(). 4860 */ 4861 ASSERT(error == 0 || error == ENOMEM); 4862 if (stp->sd_wputdatafunc != NULL) { 4863 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4864 NULL, NULL, NULL); 4865 if (newmp == NULL) { 4866 /* The caller will free mp */ 4867 return (ECOMM); 4868 } 4869 mp = newmp; 4870 } 4871 tcp_wput(tcp_wq, mp); 4872 4873 wflag |= NOINTR; 4874 4875 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4876 ASSERT(error == 0); 4877 break; 4878 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4879 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4880 slow_send: 4881 /* 4882 * We were able to send down partial data using 4883 * the direct call interface, but are now relying 4884 * on strwrite() to handle the non-fastpath cases. 4885 * If the socket is blocking we will sleep in 4886 * strwaitq() until write is permitted, otherwise, 4887 * we will need to return the amount of bytes 4888 * written so far back to the app. This is the 4889 * reason why we pass NOINTR flag to strwrite() 4890 * for non-blocking socket, because we don't want 4891 * to return EAGAIN when portion of the user data 4892 * has actually been sent down. 4893 */ 4894 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4895 } 4896 } 4897 return (0); 4898 } 4899 4900 /* 4901 * Update sti_faddr by asking the transport (unless AF_UNIX). 4902 */ 4903 /* ARGSUSED */ 4904 int 4905 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4906 boolean_t accept, struct cred *cr) 4907 { 4908 struct strbuf strbuf; 4909 int error = 0, res; 4910 void *addr; 4911 t_uscalar_t addrlen; 4912 k_sigset_t smask; 4913 sotpi_info_t *sti = SOTOTPI(so); 4914 4915 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4916 (void *)so, pr_state(so->so_state, so->so_mode))); 4917 4918 ASSERT(*namelen > 0); 4919 mutex_enter(&so->so_lock); 4920 so_lock_single(so); /* Set SOLOCKED */ 4921 4922 if (accept) { 4923 bcopy(sti->sti_faddr_sa, name, 4924 MIN(*namelen, sti->sti_faddr_len)); 4925 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4926 goto done; 4927 } 4928 4929 if (!(so->so_state & SS_ISCONNECTED)) { 4930 error = ENOTCONN; 4931 goto done; 4932 } 4933 /* Added this check for X/Open */ 4934 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4935 error = EINVAL; 4936 if (xnet_check_print) { 4937 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4938 } 4939 goto done; 4940 } 4941 4942 if (sti->sti_faddr_valid) { 4943 bcopy(sti->sti_faddr_sa, name, 4944 MIN(*namelen, sti->sti_faddr_len)); 4945 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4946 goto done; 4947 } 4948 4949 #ifdef DEBUG 4950 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4951 pr_addr(so->so_family, sti->sti_faddr_sa, 4952 (t_uscalar_t)sti->sti_faddr_len))); 4953 #endif /* DEBUG */ 4954 4955 if (so->so_family == AF_UNIX) { 4956 /* Transport has different name space - return local info */ 4957 if (sti->sti_faddr_noxlate) 4958 *namelen = 0; 4959 error = 0; 4960 goto done; 4961 } 4962 4963 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4964 4965 ASSERT(sti->sti_faddr_sa); 4966 /* Allocate local buffer to use with ioctl */ 4967 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4968 mutex_exit(&so->so_lock); 4969 addr = kmem_alloc(addrlen, KM_SLEEP); 4970 4971 /* 4972 * Issue TI_GETPEERNAME with signals masked. 4973 * Put the result in sti_faddr_sa so that getpeername works after 4974 * a shutdown(output). 4975 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4976 * back to the socket. 4977 */ 4978 strbuf.buf = addr; 4979 strbuf.maxlen = addrlen; 4980 strbuf.len = 0; 4981 4982 sigintr(&smask, 0); 4983 res = 0; 4984 ASSERT(cr); 4985 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4986 0, K_TO_K, cr, &res); 4987 sigunintr(&smask); 4988 4989 mutex_enter(&so->so_lock); 4990 /* 4991 * If there is an error record the error in so_error put don't fail 4992 * the getpeername. Instead fallback on the recorded 4993 * sti->sti_faddr_sa. 4994 */ 4995 if (error) { 4996 /* 4997 * Various stream head errors can be returned to the ioctl. 4998 * However, it is impossible to determine which ones of 4999 * these are really socket level errors that were incorrectly 5000 * consumed by the ioctl. Thus this code silently ignores the 5001 * error - to code explicitly does not reinstate the error 5002 * using soseterror(). 5003 * Experiments have shows that at least this set of 5004 * errors are reported and should not be reinstated on the 5005 * socket: 5006 * EINVAL E.g. if an I_LINK was in effect when 5007 * getpeername was called. 5008 * EPIPE The ioctl error semantics prefer the write 5009 * side error over the read side error. 5010 * ENOTCONN The transport just got disconnected but 5011 * sockfs had not yet seen the T_DISCON_IND 5012 * when issuing the ioctl. 5013 */ 5014 error = 0; 5015 } else if (res == 0 && strbuf.len > 0 && 5016 (so->so_state & SS_ISCONNECTED)) { 5017 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 5018 sti->sti_faddr_len = (socklen_t)strbuf.len; 5019 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 5020 sti->sti_faddr_valid = 1; 5021 5022 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 5023 *namelen = sti->sti_faddr_len; 5024 } 5025 kmem_free(addr, addrlen); 5026 #ifdef DEBUG 5027 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 5028 pr_addr(so->so_family, sti->sti_faddr_sa, 5029 (t_uscalar_t)sti->sti_faddr_len))); 5030 #endif /* DEBUG */ 5031 done: 5032 so_unlock_single(so, SOLOCKED); 5033 mutex_exit(&so->so_lock); 5034 return (error); 5035 } 5036 5037 /* 5038 * Update sti_laddr by asking the transport (unless AF_UNIX). 5039 */ 5040 int 5041 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5042 struct cred *cr) 5043 { 5044 struct strbuf strbuf; 5045 int error = 0, res; 5046 void *addr; 5047 t_uscalar_t addrlen; 5048 k_sigset_t smask; 5049 sotpi_info_t *sti = SOTOTPI(so); 5050 5051 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5052 (void *)so, pr_state(so->so_state, so->so_mode))); 5053 5054 ASSERT(*namelen > 0); 5055 mutex_enter(&so->so_lock); 5056 so_lock_single(so); /* Set SOLOCKED */ 5057 5058 #ifdef DEBUG 5059 5060 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5061 pr_addr(so->so_family, sti->sti_laddr_sa, 5062 (t_uscalar_t)sti->sti_laddr_len))); 5063 #endif /* DEBUG */ 5064 if (sti->sti_laddr_valid) { 5065 bcopy(sti->sti_laddr_sa, name, 5066 MIN(*namelen, sti->sti_laddr_len)); 5067 *namelen = sti->sti_laddr_len; 5068 goto done; 5069 } 5070 5071 if (so->so_family == AF_UNIX) { 5072 /* Transport has different name space - return local info */ 5073 error = 0; 5074 *namelen = 0; 5075 goto done; 5076 } 5077 if (!(so->so_state & SS_ISBOUND)) { 5078 /* If not bound, then nothing to return. */ 5079 error = 0; 5080 goto done; 5081 } 5082 5083 /* Allocate local buffer to use with ioctl */ 5084 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5085 mutex_exit(&so->so_lock); 5086 addr = kmem_alloc(addrlen, KM_SLEEP); 5087 5088 /* 5089 * Issue TI_GETMYNAME with signals masked. 5090 * Put the result in sti_laddr_sa so that getsockname works after 5091 * a shutdown(output). 5092 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5093 * back to the socket. 5094 */ 5095 strbuf.buf = addr; 5096 strbuf.maxlen = addrlen; 5097 strbuf.len = 0; 5098 5099 sigintr(&smask, 0); 5100 res = 0; 5101 ASSERT(cr); 5102 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5103 0, K_TO_K, cr, &res); 5104 sigunintr(&smask); 5105 5106 mutex_enter(&so->so_lock); 5107 /* 5108 * If there is an error record the error in so_error put don't fail 5109 * the getsockname. Instead fallback on the recorded 5110 * sti->sti_laddr_sa. 5111 */ 5112 if (error) { 5113 /* 5114 * Various stream head errors can be returned to the ioctl. 5115 * However, it is impossible to determine which ones of 5116 * these are really socket level errors that were incorrectly 5117 * consumed by the ioctl. Thus this code silently ignores the 5118 * error - to code explicitly does not reinstate the error 5119 * using soseterror(). 5120 * Experiments have shows that at least this set of 5121 * errors are reported and should not be reinstated on the 5122 * socket: 5123 * EINVAL E.g. if an I_LINK was in effect when 5124 * getsockname was called. 5125 * EPIPE The ioctl error semantics prefer the write 5126 * side error over the read side error. 5127 */ 5128 error = 0; 5129 } else if (res == 0 && strbuf.len > 0 && 5130 (so->so_state & SS_ISBOUND)) { 5131 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5132 sti->sti_laddr_len = (socklen_t)strbuf.len; 5133 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5134 sti->sti_laddr_valid = 1; 5135 5136 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5137 *namelen = sti->sti_laddr_len; 5138 } 5139 kmem_free(addr, addrlen); 5140 #ifdef DEBUG 5141 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5142 pr_addr(so->so_family, sti->sti_laddr_sa, 5143 (t_uscalar_t)sti->sti_laddr_len))); 5144 #endif /* DEBUG */ 5145 done: 5146 so_unlock_single(so, SOLOCKED); 5147 mutex_exit(&so->so_lock); 5148 return (error); 5149 } 5150 5151 /* 5152 * Get socket options. For SOL_SOCKET options some options are handled 5153 * by the sockfs while others use the value recorded in the sonode as a 5154 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5155 * 5156 * On the return most *optlenp bytes are copied to optval. 5157 */ 5158 /* ARGSUSED */ 5159 int 5160 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5161 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5162 { 5163 struct T_optmgmt_req optmgmt_req; 5164 struct T_optmgmt_ack *optmgmt_ack; 5165 struct opthdr oh; 5166 struct opthdr *opt_res; 5167 mblk_t *mp = NULL; 5168 int error = 0; 5169 void *option = NULL; /* Set if fallback value */ 5170 t_uscalar_t maxlen = *optlenp; 5171 t_uscalar_t len; 5172 uint32_t value; 5173 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5174 struct timeval32 tmo_val32; 5175 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5176 5177 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5178 (void *)so, level, option_name, optval, (void *)optlenp, 5179 pr_state(so->so_state, so->so_mode))); 5180 5181 mutex_enter(&so->so_lock); 5182 so_lock_single(so); /* Set SOLOCKED */ 5183 5184 /* 5185 * Check for SOL_SOCKET options. 5186 * Certain SOL_SOCKET options are returned directly whereas 5187 * others only provide a default (fallback) value should 5188 * the T_SVR4_OPTMGMT_REQ fail. 5189 */ 5190 if (level == SOL_SOCKET) { 5191 /* Check parameters */ 5192 switch (option_name) { 5193 case SO_TYPE: 5194 case SO_ERROR: 5195 case SO_DEBUG: 5196 case SO_ACCEPTCONN: 5197 case SO_REUSEADDR: 5198 case SO_KEEPALIVE: 5199 case SO_DONTROUTE: 5200 case SO_BROADCAST: 5201 case SO_USELOOPBACK: 5202 case SO_OOBINLINE: 5203 case SO_SNDBUF: 5204 case SO_RCVBUF: 5205 #ifdef notyet 5206 case SO_SNDLOWAT: 5207 case SO_RCVLOWAT: 5208 #endif /* notyet */ 5209 case SO_DOMAIN: 5210 case SO_DGRAM_ERRIND: 5211 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5212 error = EINVAL; 5213 eprintsoline(so, error); 5214 goto done2; 5215 } 5216 break; 5217 case SO_RCVTIMEO: 5218 case SO_SNDTIMEO: 5219 if (get_udatamodel() == DATAMODEL_NONE || 5220 get_udatamodel() == DATAMODEL_NATIVE) { 5221 if (maxlen < sizeof (struct timeval)) { 5222 error = EINVAL; 5223 eprintsoline(so, error); 5224 goto done2; 5225 } 5226 } else { 5227 if (maxlen < sizeof (struct timeval32)) { 5228 error = EINVAL; 5229 eprintsoline(so, error); 5230 goto done2; 5231 } 5232 5233 } 5234 break; 5235 case SO_LINGER: 5236 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5237 error = EINVAL; 5238 eprintsoline(so, error); 5239 goto done2; 5240 } 5241 break; 5242 case SO_SND_BUFINFO: 5243 if (maxlen < (t_uscalar_t) 5244 sizeof (struct so_snd_bufinfo)) { 5245 error = EINVAL; 5246 eprintsoline(so, error); 5247 goto done2; 5248 } 5249 break; 5250 } 5251 5252 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5253 5254 switch (option_name) { 5255 case SO_TYPE: 5256 value = so->so_type; 5257 option = &value; 5258 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5259 5260 case SO_ERROR: 5261 value = sogeterr(so, B_TRUE); 5262 option = &value; 5263 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5264 5265 case SO_ACCEPTCONN: 5266 if (so->so_state & SS_ACCEPTCONN) 5267 value = SO_ACCEPTCONN; 5268 else 5269 value = 0; 5270 #ifdef DEBUG 5271 if (value) { 5272 dprintso(so, 1, 5273 ("sotpi_getsockopt: 0x%x is set\n", 5274 option_name)); 5275 } else { 5276 dprintso(so, 1, 5277 ("sotpi_getsockopt: 0x%x not set\n", 5278 option_name)); 5279 } 5280 #endif /* DEBUG */ 5281 option = &value; 5282 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5283 5284 case SO_DEBUG: 5285 case SO_REUSEADDR: 5286 case SO_KEEPALIVE: 5287 case SO_DONTROUTE: 5288 case SO_BROADCAST: 5289 case SO_USELOOPBACK: 5290 case SO_OOBINLINE: 5291 case SO_DGRAM_ERRIND: 5292 value = (so->so_options & option_name); 5293 #ifdef DEBUG 5294 if (value) { 5295 dprintso(so, 1, 5296 ("sotpi_getsockopt: 0x%x is set\n", 5297 option_name)); 5298 } else { 5299 dprintso(so, 1, 5300 ("sotpi_getsockopt: 0x%x not set\n", 5301 option_name)); 5302 } 5303 #endif /* DEBUG */ 5304 option = &value; 5305 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5306 5307 /* 5308 * The following options are only returned by sockfs when the 5309 * T_SVR4_OPTMGMT_REQ fails. 5310 */ 5311 case SO_LINGER: 5312 option = &so->so_linger; 5313 len = (t_uscalar_t)sizeof (struct linger); 5314 break; 5315 case SO_SNDBUF: { 5316 ssize_t lvalue; 5317 5318 /* 5319 * If the option has not been set then get a default 5320 * value from the read queue. This value is 5321 * returned if the transport fails 5322 * the T_SVR4_OPTMGMT_REQ. 5323 */ 5324 lvalue = so->so_sndbuf; 5325 if (lvalue == 0) { 5326 mutex_exit(&so->so_lock); 5327 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5328 QHIWAT, 0, &lvalue); 5329 mutex_enter(&so->so_lock); 5330 dprintso(so, 1, 5331 ("got SO_SNDBUF %ld from q\n", lvalue)); 5332 } 5333 value = (int)lvalue; 5334 option = &value; 5335 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5336 break; 5337 } 5338 case SO_RCVBUF: { 5339 ssize_t lvalue; 5340 5341 /* 5342 * If the option has not been set then get a default 5343 * value from the read queue. This value is 5344 * returned if the transport fails 5345 * the T_SVR4_OPTMGMT_REQ. 5346 * 5347 * XXX If SO_RCVBUF has been set and this is an 5348 * XPG 4.2 application then do not ask the transport 5349 * since the transport might adjust the value and not 5350 * return exactly what was set by the application. 5351 * For non-XPG 4.2 application we return the value 5352 * that the transport is actually using. 5353 */ 5354 lvalue = so->so_rcvbuf; 5355 if (lvalue == 0) { 5356 mutex_exit(&so->so_lock); 5357 (void) strqget(RD(strvp2wq(SOTOV(so))), 5358 QHIWAT, 0, &lvalue); 5359 mutex_enter(&so->so_lock); 5360 dprintso(so, 1, 5361 ("got SO_RCVBUF %ld from q\n", lvalue)); 5362 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5363 value = (int)lvalue; 5364 option = &value; 5365 goto copyout; /* skip asking transport */ 5366 } 5367 value = (int)lvalue; 5368 option = &value; 5369 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5370 break; 5371 } 5372 case SO_DOMAIN: 5373 value = so->so_family; 5374 option = &value; 5375 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5376 5377 #ifdef notyet 5378 /* 5379 * We do not implement the semantics of these options 5380 * thus we shouldn't implement the options either. 5381 */ 5382 case SO_SNDLOWAT: 5383 value = so->so_sndlowat; 5384 option = &value; 5385 break; 5386 case SO_RCVLOWAT: 5387 value = so->so_rcvlowat; 5388 option = &value; 5389 break; 5390 #endif /* notyet */ 5391 case SO_SNDTIMEO: 5392 case SO_RCVTIMEO: { 5393 clock_t val; 5394 5395 if (option_name == SO_RCVTIMEO) 5396 val = drv_hztousec(so->so_rcvtimeo); 5397 else 5398 val = drv_hztousec(so->so_sndtimeo); 5399 tmo_val.tv_sec = val / (1000 * 1000); 5400 tmo_val.tv_usec = val % (1000 * 1000); 5401 if (get_udatamodel() == DATAMODEL_NONE || 5402 get_udatamodel() == DATAMODEL_NATIVE) { 5403 option = &tmo_val; 5404 len = sizeof (struct timeval); 5405 } else { 5406 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5407 option = &tmo_val32; 5408 len = sizeof (struct timeval32); 5409 } 5410 break; 5411 } 5412 case SO_SND_BUFINFO: { 5413 snd_bufinfo.sbi_wroff = 5414 (so->so_proto_props).sopp_wroff; 5415 snd_bufinfo.sbi_maxblk = 5416 (so->so_proto_props).sopp_maxblk; 5417 snd_bufinfo.sbi_maxpsz = 5418 (so->so_proto_props).sopp_maxpsz; 5419 snd_bufinfo.sbi_tail = 5420 (so->so_proto_props).sopp_tail; 5421 option = &snd_bufinfo; 5422 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5423 break; 5424 } 5425 } 5426 } 5427 5428 mutex_exit(&so->so_lock); 5429 5430 /* Send request */ 5431 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5432 optmgmt_req.MGMT_flags = T_CHECK; 5433 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5434 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5435 5436 oh.level = level; 5437 oh.name = option_name; 5438 oh.len = maxlen; 5439 5440 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5441 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5442 /* Let option management work in the presence of data flow control */ 5443 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5444 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5445 mp = NULL; 5446 mutex_enter(&so->so_lock); 5447 if (error) { 5448 eprintsoline(so, error); 5449 goto done2; 5450 } 5451 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5452 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5453 if (error) { 5454 if (option != NULL) { 5455 /* We have a fallback value */ 5456 error = 0; 5457 goto copyout; 5458 } 5459 eprintsoline(so, error); 5460 goto done2; 5461 } 5462 ASSERT(mp); 5463 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5464 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5465 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5466 if (opt_res == NULL) { 5467 if (option != NULL) { 5468 /* We have a fallback value */ 5469 error = 0; 5470 goto copyout; 5471 } 5472 error = EPROTO; 5473 eprintsoline(so, error); 5474 goto done; 5475 } 5476 option = &opt_res[1]; 5477 5478 /* check to ensure that the option is within bounds */ 5479 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5480 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5481 if (option != NULL) { 5482 /* We have a fallback value */ 5483 error = 0; 5484 goto copyout; 5485 } 5486 error = EPROTO; 5487 eprintsoline(so, error); 5488 goto done; 5489 } 5490 5491 len = opt_res->len; 5492 5493 copyout: { 5494 t_uscalar_t size = MIN(len, maxlen); 5495 bcopy(option, optval, size); 5496 bcopy(&size, optlenp, sizeof (size)); 5497 } 5498 done: 5499 freemsg(mp); 5500 done2: 5501 so_unlock_single(so, SOLOCKED); 5502 mutex_exit(&so->so_lock); 5503 5504 return (error); 5505 } 5506 5507 /* 5508 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5509 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5510 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5511 * setsockopt has to work even if the transport does not support the option. 5512 */ 5513 /* ARGSUSED */ 5514 int 5515 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5516 const void *optval, t_uscalar_t optlen, struct cred *cr) 5517 { 5518 struct T_optmgmt_req optmgmt_req; 5519 struct opthdr oh; 5520 mblk_t *mp; 5521 int error = 0; 5522 boolean_t handled = B_FALSE; 5523 5524 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5525 (void *)so, level, option_name, optval, optlen, 5526 pr_state(so->so_state, so->so_mode))); 5527 5528 /* X/Open requires this check */ 5529 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5530 if (xnet_check_print) 5531 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5532 return (EINVAL); 5533 } 5534 5535 mutex_enter(&so->so_lock); 5536 so_lock_single(so); /* Set SOLOCKED */ 5537 mutex_exit(&so->so_lock); 5538 5539 /* 5540 * For SOCKET or TCP level options, try to set it here itself 5541 * provided socket has not been popped and we know the tcp 5542 * structure (stored in so_priv). 5543 */ 5544 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5545 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5546 (so->so_version == SOV_SOCKSTREAM) && 5547 (so->so_proto_handle != NULL)) { 5548 tcp_t *tcp = (tcp_t *)so->so_proto_handle; 5549 boolean_t onoff; 5550 5551 #define intvalue (*(int32_t *)optval) 5552 5553 switch (level) { 5554 case SOL_SOCKET: 5555 switch (option_name) { /* Check length param */ 5556 case SO_DEBUG: 5557 case SO_REUSEADDR: 5558 case SO_DONTROUTE: 5559 case SO_BROADCAST: 5560 case SO_USELOOPBACK: 5561 case SO_OOBINLINE: 5562 case SO_DGRAM_ERRIND: 5563 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5564 error = EINVAL; 5565 eprintsoline(so, error); 5566 mutex_enter(&so->so_lock); 5567 goto done2; 5568 } 5569 ASSERT(optval); 5570 onoff = intvalue != 0; 5571 handled = B_TRUE; 5572 break; 5573 case SO_SNDTIMEO: 5574 case SO_RCVTIMEO: 5575 if (get_udatamodel() == DATAMODEL_NONE || 5576 get_udatamodel() == DATAMODEL_NATIVE) { 5577 if (optlen != 5578 sizeof (struct timeval)) { 5579 error = EINVAL; 5580 eprintsoline(so, error); 5581 mutex_enter(&so->so_lock); 5582 goto done2; 5583 } 5584 } else { 5585 if (optlen != 5586 sizeof (struct timeval32)) { 5587 error = EINVAL; 5588 eprintsoline(so, error); 5589 mutex_enter(&so->so_lock); 5590 goto done2; 5591 } 5592 } 5593 ASSERT(optval); 5594 handled = B_TRUE; 5595 break; 5596 case SO_LINGER: 5597 if (optlen != 5598 (t_uscalar_t)sizeof (struct linger)) { 5599 error = EINVAL; 5600 eprintsoline(so, error); 5601 mutex_enter(&so->so_lock); 5602 goto done2; 5603 } 5604 ASSERT(optval); 5605 handled = B_TRUE; 5606 break; 5607 } 5608 5609 switch (option_name) { /* Do actions */ 5610 case SO_LINGER: { 5611 struct linger *lgr = (struct linger *)optval; 5612 5613 if (lgr->l_onoff) { 5614 tcp->tcp_linger = 1; 5615 tcp->tcp_lingertime = lgr->l_linger; 5616 so->so_linger.l_onoff = SO_LINGER; 5617 so->so_options |= SO_LINGER; 5618 } else { 5619 tcp->tcp_linger = 0; 5620 tcp->tcp_lingertime = 0; 5621 so->so_linger.l_onoff = 0; 5622 so->so_options &= ~SO_LINGER; 5623 } 5624 so->so_linger.l_linger = lgr->l_linger; 5625 handled = B_TRUE; 5626 break; 5627 } 5628 case SO_SNDTIMEO: 5629 case SO_RCVTIMEO: { 5630 struct timeval tl; 5631 clock_t val; 5632 5633 if (get_udatamodel() == DATAMODEL_NONE || 5634 get_udatamodel() == DATAMODEL_NATIVE) 5635 bcopy(&tl, (struct timeval *)optval, 5636 sizeof (struct timeval)); 5637 else 5638 TIMEVAL32_TO_TIMEVAL(&tl, 5639 (struct timeval32 *)optval); 5640 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5641 if (option_name == SO_RCVTIMEO) 5642 so->so_rcvtimeo = drv_usectohz(val); 5643 else 5644 so->so_sndtimeo = drv_usectohz(val); 5645 break; 5646 } 5647 5648 case SO_DEBUG: 5649 tcp->tcp_debug = onoff; 5650 #ifdef SOCK_TEST 5651 if (intvalue & 2) 5652 sock_test_timelimit = 10 * hz; 5653 else 5654 sock_test_timelimit = 0; 5655 5656 if (intvalue & 4) 5657 do_useracc = 0; 5658 else 5659 do_useracc = 1; 5660 #endif /* SOCK_TEST */ 5661 break; 5662 case SO_DONTROUTE: 5663 /* 5664 * SO_DONTROUTE, SO_USELOOPBACK and 5665 * SO_BROADCAST are only of interest to IP. 5666 * We track them here only so 5667 * that we can report their current value. 5668 */ 5669 tcp->tcp_dontroute = onoff; 5670 if (onoff) 5671 so->so_options |= option_name; 5672 else 5673 so->so_options &= ~option_name; 5674 break; 5675 case SO_USELOOPBACK: 5676 tcp->tcp_useloopback = onoff; 5677 if (onoff) 5678 so->so_options |= option_name; 5679 else 5680 so->so_options &= ~option_name; 5681 break; 5682 case SO_BROADCAST: 5683 tcp->tcp_broadcast = onoff; 5684 if (onoff) 5685 so->so_options |= option_name; 5686 else 5687 so->so_options &= ~option_name; 5688 break; 5689 case SO_REUSEADDR: 5690 tcp->tcp_reuseaddr = onoff; 5691 if (onoff) 5692 so->so_options |= option_name; 5693 else 5694 so->so_options &= ~option_name; 5695 break; 5696 case SO_OOBINLINE: 5697 tcp->tcp_oobinline = onoff; 5698 if (onoff) 5699 so->so_options |= option_name; 5700 else 5701 so->so_options &= ~option_name; 5702 break; 5703 case SO_DGRAM_ERRIND: 5704 tcp->tcp_dgram_errind = onoff; 5705 if (onoff) 5706 so->so_options |= option_name; 5707 else 5708 so->so_options &= ~option_name; 5709 break; 5710 } 5711 break; 5712 case IPPROTO_TCP: 5713 switch (option_name) { 5714 case TCP_NODELAY: 5715 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5716 error = EINVAL; 5717 eprintsoline(so, error); 5718 mutex_enter(&so->so_lock); 5719 goto done2; 5720 } 5721 ASSERT(optval); 5722 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5723 handled = B_TRUE; 5724 break; 5725 } 5726 break; 5727 default: 5728 handled = B_FALSE; 5729 break; 5730 } 5731 } 5732 5733 if (handled) { 5734 mutex_enter(&so->so_lock); 5735 goto done2; 5736 } 5737 5738 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5739 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5740 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5741 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5742 5743 oh.level = level; 5744 oh.name = option_name; 5745 oh.len = optlen; 5746 5747 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5748 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5749 /* Let option management work in the presence of data flow control */ 5750 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5751 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5752 mp = NULL; 5753 mutex_enter(&so->so_lock); 5754 if (error) { 5755 eprintsoline(so, error); 5756 goto done2; 5757 } 5758 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5759 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5760 if (error) { 5761 eprintsoline(so, error); 5762 goto done; 5763 } 5764 ASSERT(mp); 5765 /* No need to verify T_optmgmt_ack */ 5766 freemsg(mp); 5767 done: 5768 /* 5769 * Check for SOL_SOCKET options and record their values. 5770 * If we know about a SOL_SOCKET parameter and the transport 5771 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5772 * EPROTO) we let the setsockopt succeed. 5773 */ 5774 if (level == SOL_SOCKET) { 5775 /* Check parameters */ 5776 switch (option_name) { 5777 case SO_DEBUG: 5778 case SO_REUSEADDR: 5779 case SO_KEEPALIVE: 5780 case SO_DONTROUTE: 5781 case SO_BROADCAST: 5782 case SO_USELOOPBACK: 5783 case SO_OOBINLINE: 5784 case SO_SNDBUF: 5785 case SO_RCVBUF: 5786 #ifdef notyet 5787 case SO_SNDLOWAT: 5788 case SO_RCVLOWAT: 5789 #endif /* notyet */ 5790 case SO_DGRAM_ERRIND: 5791 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5792 error = EINVAL; 5793 eprintsoline(so, error); 5794 goto done2; 5795 } 5796 ASSERT(optval); 5797 handled = B_TRUE; 5798 break; 5799 case SO_SNDTIMEO: 5800 case SO_RCVTIMEO: 5801 if (get_udatamodel() == DATAMODEL_NONE || 5802 get_udatamodel() == DATAMODEL_NATIVE) { 5803 if (optlen != sizeof (struct timeval)) { 5804 error = EINVAL; 5805 eprintsoline(so, error); 5806 goto done2; 5807 } 5808 } else { 5809 if (optlen != sizeof (struct timeval32)) { 5810 error = EINVAL; 5811 eprintsoline(so, error); 5812 goto done2; 5813 } 5814 } 5815 ASSERT(optval); 5816 handled = B_TRUE; 5817 break; 5818 case SO_LINGER: 5819 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5820 error = EINVAL; 5821 eprintsoline(so, error); 5822 goto done2; 5823 } 5824 ASSERT(optval); 5825 handled = B_TRUE; 5826 break; 5827 } 5828 5829 #define intvalue (*(int32_t *)optval) 5830 5831 switch (option_name) { 5832 case SO_TYPE: 5833 case SO_ERROR: 5834 case SO_ACCEPTCONN: 5835 /* Can't be set */ 5836 error = ENOPROTOOPT; 5837 goto done2; 5838 case SO_LINGER: { 5839 struct linger *l = (struct linger *)optval; 5840 5841 so->so_linger.l_linger = l->l_linger; 5842 if (l->l_onoff) { 5843 so->so_linger.l_onoff = SO_LINGER; 5844 so->so_options |= SO_LINGER; 5845 } else { 5846 so->so_linger.l_onoff = 0; 5847 so->so_options &= ~SO_LINGER; 5848 } 5849 break; 5850 } 5851 5852 case SO_DEBUG: 5853 #ifdef SOCK_TEST 5854 if (intvalue & 2) 5855 sock_test_timelimit = 10 * hz; 5856 else 5857 sock_test_timelimit = 0; 5858 5859 if (intvalue & 4) 5860 do_useracc = 0; 5861 else 5862 do_useracc = 1; 5863 #endif /* SOCK_TEST */ 5864 /* FALLTHRU */ 5865 case SO_REUSEADDR: 5866 case SO_KEEPALIVE: 5867 case SO_DONTROUTE: 5868 case SO_BROADCAST: 5869 case SO_USELOOPBACK: 5870 case SO_OOBINLINE: 5871 case SO_DGRAM_ERRIND: 5872 if (intvalue != 0) { 5873 dprintso(so, 1, 5874 ("socket_setsockopt: setting 0x%x\n", 5875 option_name)); 5876 so->so_options |= option_name; 5877 } else { 5878 dprintso(so, 1, 5879 ("socket_setsockopt: clearing 0x%x\n", 5880 option_name)); 5881 so->so_options &= ~option_name; 5882 } 5883 break; 5884 /* 5885 * The following options are only returned by us when the 5886 * transport layer fails. 5887 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5888 * since the transport might adjust the value and not 5889 * return exactly what was set by the application. 5890 */ 5891 case SO_SNDBUF: 5892 so->so_sndbuf = intvalue; 5893 break; 5894 case SO_RCVBUF: 5895 so->so_rcvbuf = intvalue; 5896 break; 5897 case SO_RCVPSH: 5898 so->so_rcv_timer_interval = intvalue; 5899 break; 5900 #ifdef notyet 5901 /* 5902 * We do not implement the semantics of these options 5903 * thus we shouldn't implement the options either. 5904 */ 5905 case SO_SNDLOWAT: 5906 so->so_sndlowat = intvalue; 5907 break; 5908 case SO_RCVLOWAT: 5909 so->so_rcvlowat = intvalue; 5910 break; 5911 #endif /* notyet */ 5912 case SO_SNDTIMEO: 5913 case SO_RCVTIMEO: { 5914 struct timeval tl; 5915 clock_t val; 5916 5917 if (get_udatamodel() == DATAMODEL_NONE || 5918 get_udatamodel() == DATAMODEL_NATIVE) 5919 bcopy(&tl, (struct timeval *)optval, 5920 sizeof (struct timeval)); 5921 else 5922 TIMEVAL32_TO_TIMEVAL(&tl, 5923 (struct timeval32 *)optval); 5924 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5925 if (option_name == SO_RCVTIMEO) 5926 so->so_rcvtimeo = drv_usectohz(val); 5927 else 5928 so->so_sndtimeo = drv_usectohz(val); 5929 break; 5930 } 5931 } 5932 #undef intvalue 5933 5934 if (error) { 5935 if ((error == ENOPROTOOPT || error == EPROTO || 5936 error == EINVAL) && handled) { 5937 dprintso(so, 1, 5938 ("setsockopt: ignoring error %d for 0x%x\n", 5939 error, option_name)); 5940 error = 0; 5941 } 5942 } 5943 } 5944 done2: 5945 so_unlock_single(so, SOLOCKED); 5946 mutex_exit(&so->so_lock); 5947 return (error); 5948 } 5949 5950 /* 5951 * sotpi_close() is called when the last open reference goes away. 5952 */ 5953 /* ARGSUSED */ 5954 int 5955 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5956 { 5957 struct vnode *vp = SOTOV(so); 5958 dev_t dev; 5959 int error = 0; 5960 sotpi_info_t *sti = SOTOTPI(so); 5961 5962 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5963 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5964 5965 dev = sti->sti_dev; 5966 5967 ASSERT(STREAMSTAB(getmajor(dev))); 5968 5969 mutex_enter(&so->so_lock); 5970 so_lock_single(so); /* Set SOLOCKED */ 5971 5972 ASSERT(so_verify_oobstate(so)); 5973 5974 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5975 sti->sti_nl7c_flags = 0; 5976 nl7c_close(so); 5977 } 5978 5979 if (vp->v_stream != NULL) { 5980 vnode_t *ux_vp; 5981 5982 if (so->so_family == AF_UNIX) { 5983 /* Could avoid this when CANTSENDMORE for !dgram */ 5984 so_unix_close(so); 5985 } 5986 5987 mutex_exit(&so->so_lock); 5988 /* 5989 * Disassemble the linkage from the AF_UNIX underlying file 5990 * system vnode to this socket (by atomically clearing 5991 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5992 * and frees the stream head. 5993 */ 5994 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5995 ASSERT(ux_vp->v_stream); 5996 sti->sti_ux_bound_vp = NULL; 5997 vn_rele_stream(ux_vp); 5998 } 5999 if (so->so_family == AF_INET || so->so_family == AF_INET6) { 6000 strsetrwputdatahooks(SOTOV(so), NULL, NULL); 6001 if (sti->sti_kssl_ent != NULL) { 6002 kssl_release_ent(sti->sti_kssl_ent, so, 6003 sti->sti_kssl_type); 6004 sti->sti_kssl_ent = NULL; 6005 } 6006 if (sti->sti_kssl_ctx != NULL) { 6007 kssl_release_ctx(sti->sti_kssl_ctx); 6008 sti->sti_kssl_ctx = NULL; 6009 } 6010 sti->sti_kssl_type = KSSL_NO_PROXY; 6011 } 6012 error = strclose(vp, flag, cr); 6013 vp->v_stream = NULL; 6014 mutex_enter(&so->so_lock); 6015 } 6016 6017 /* 6018 * Flush the T_DISCON_IND on sti_discon_ind_mp. 6019 */ 6020 so_flush_discon_ind(so); 6021 6022 so_unlock_single(so, SOLOCKED); 6023 mutex_exit(&so->so_lock); 6024 6025 /* 6026 * Needed for STREAMs. 6027 * Decrement the device driver's reference count for streams 6028 * opened via the clone dip. The driver was held in clone_open(). 6029 * The absence of clone_close() forces this asymmetry. 6030 */ 6031 if (so->so_flag & SOCLONE) 6032 ddi_rele_driver(getmajor(dev)); 6033 6034 return (error); 6035 } 6036 6037 static int 6038 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 6039 struct cred *cr, int32_t *rvalp) 6040 { 6041 struct vnode *vp = SOTOV(so); 6042 sotpi_info_t *sti = SOTOTPI(so); 6043 int error = 0; 6044 6045 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 6046 cmd, arg, pr_state(so->so_state, so->so_mode))); 6047 6048 switch (cmd) { 6049 case SIOCSQPTR: 6050 /* 6051 * SIOCSQPTR is valid only when helper stream is created 6052 * by the protocol. 6053 */ 6054 case _I_INSERT: 6055 case _I_REMOVE: 6056 /* 6057 * Since there's no compelling reason to support these ioctls 6058 * on sockets, and doing so would increase the complexity 6059 * markedly, prevent it. 6060 */ 6061 return (EOPNOTSUPP); 6062 6063 case I_FIND: 6064 case I_LIST: 6065 case I_LOOK: 6066 case I_POP: 6067 case I_PUSH: 6068 /* 6069 * To prevent races and inconsistencies between the actual 6070 * state of the stream and the state according to the sonode, 6071 * we serialize all operations which modify or operate on the 6072 * list of modules on the socket's stream. 6073 */ 6074 mutex_enter(&sti->sti_plumb_lock); 6075 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 6076 mutex_exit(&sti->sti_plumb_lock); 6077 return (error); 6078 6079 default: 6080 if (so->so_version != SOV_STREAM) 6081 break; 6082 6083 /* 6084 * The imaginary "sockmod" has been popped; act as a stream. 6085 */ 6086 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6087 } 6088 6089 ASSERT(so->so_version != SOV_STREAM); 6090 6091 /* 6092 * Process socket-specific ioctls. 6093 */ 6094 switch (cmd) { 6095 case FIONBIO: { 6096 int32_t value; 6097 6098 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6099 (mode & (int)FKIOCTL))) 6100 return (EFAULT); 6101 6102 mutex_enter(&so->so_lock); 6103 if (value) { 6104 so->so_state |= SS_NDELAY; 6105 } else { 6106 so->so_state &= ~SS_NDELAY; 6107 } 6108 mutex_exit(&so->so_lock); 6109 return (0); 6110 } 6111 6112 case FIOASYNC: { 6113 int32_t value; 6114 6115 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6116 (mode & (int)FKIOCTL))) 6117 return (EFAULT); 6118 6119 mutex_enter(&so->so_lock); 6120 /* 6121 * SS_ASYNC flag not already set correctly? 6122 * (!value != !(so->so_state & SS_ASYNC)) 6123 * but some engineers find that too hard to read. 6124 */ 6125 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 6126 value != 0 && (so->so_state & SS_ASYNC) == 0) 6127 error = so_flip_async(so, vp, mode, cr); 6128 mutex_exit(&so->so_lock); 6129 return (error); 6130 } 6131 6132 case SIOCSPGRP: 6133 case FIOSETOWN: { 6134 pid_t pgrp; 6135 6136 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 6137 (mode & (int)FKIOCTL))) 6138 return (EFAULT); 6139 6140 mutex_enter(&so->so_lock); 6141 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 6142 /* Any change? */ 6143 if (pgrp != so->so_pgrp) 6144 error = so_set_siggrp(so, vp, pgrp, mode, cr); 6145 mutex_exit(&so->so_lock); 6146 return (error); 6147 } 6148 case SIOCGPGRP: 6149 case FIOGETOWN: 6150 if (so_copyout(&so->so_pgrp, (void *)arg, 6151 sizeof (pid_t), (mode & (int)FKIOCTL))) 6152 return (EFAULT); 6153 return (0); 6154 6155 case SIOCATMARK: { 6156 int retval; 6157 uint_t so_state; 6158 6159 /* 6160 * strwaitmark has a finite timeout after which it 6161 * returns -1 if the mark state is undetermined. 6162 * In order to avoid any race between the mark state 6163 * in sockfs and the mark state in the stream head this 6164 * routine loops until the mark state can be determined 6165 * (or the urgent data indication has been removed by some 6166 * other thread). 6167 */ 6168 do { 6169 mutex_enter(&so->so_lock); 6170 so_state = so->so_state; 6171 mutex_exit(&so->so_lock); 6172 if (so_state & SS_RCVATMARK) { 6173 retval = 1; 6174 } else if (!(so_state & SS_OOBPEND)) { 6175 /* 6176 * No SIGURG has been generated -- there is no 6177 * pending or present urgent data. Thus can't 6178 * possibly be at the mark. 6179 */ 6180 retval = 0; 6181 } else { 6182 /* 6183 * Have the stream head wait until there is 6184 * either some messages on the read queue, or 6185 * STRATMARK or STRNOTATMARK gets set. The 6186 * STRNOTATMARK flag is used so that the 6187 * transport can send up a MSGNOTMARKNEXT 6188 * M_DATA to indicate that it is not 6189 * at the mark and additional data is not about 6190 * to be send upstream. 6191 * 6192 * If the mark state is undetermined this will 6193 * return -1 and we will loop rechecking the 6194 * socket state. 6195 */ 6196 retval = strwaitmark(vp); 6197 } 6198 } while (retval == -1); 6199 6200 if (so_copyout(&retval, (void *)arg, sizeof (int), 6201 (mode & (int)FKIOCTL))) 6202 return (EFAULT); 6203 return (0); 6204 } 6205 6206 case I_FDINSERT: 6207 case I_SENDFD: 6208 case I_RECVFD: 6209 case I_ATMARK: 6210 case _SIOCSOCKFALLBACK: 6211 /* 6212 * These ioctls do not apply to sockets. I_FDINSERT can be 6213 * used to send M_PROTO messages without modifying the socket 6214 * state. I_SENDFD/RECVFD should not be used for socket file 6215 * descriptor passing since they assume a twisted stream. 6216 * SIOCATMARK must be used instead of I_ATMARK. 6217 * 6218 * _SIOCSOCKFALLBACK from an application should never be 6219 * processed. It is only generated by socktpi_open() or 6220 * in response to I_POP or I_PUSH. 6221 */ 6222 #ifdef DEBUG 6223 zcmn_err(getzoneid(), CE_WARN, 6224 "Unsupported STREAMS ioctl 0x%x on socket. " 6225 "Pid = %d\n", cmd, curproc->p_pid); 6226 #endif /* DEBUG */ 6227 return (EOPNOTSUPP); 6228 6229 case _I_GETPEERCRED: 6230 if ((mode & FKIOCTL) == 0) 6231 return (EINVAL); 6232 6233 mutex_enter(&so->so_lock); 6234 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6235 error = ENOTSUP; 6236 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6237 error = ENOTCONN; 6238 } else if (so->so_peercred != NULL) { 6239 k_peercred_t *kp = (k_peercred_t *)arg; 6240 kp->pc_cr = so->so_peercred; 6241 kp->pc_cpid = so->so_cpid; 6242 crhold(so->so_peercred); 6243 } else { 6244 error = EINVAL; 6245 } 6246 mutex_exit(&so->so_lock); 6247 return (error); 6248 6249 default: 6250 /* 6251 * Do the higher-order bits of the ioctl cmd indicate 6252 * that it is an I_* streams ioctl? 6253 */ 6254 if ((cmd & 0xffffff00U) == STR && 6255 so->so_version == SOV_SOCKBSD) { 6256 #ifdef DEBUG 6257 zcmn_err(getzoneid(), CE_WARN, 6258 "Unsupported STREAMS ioctl 0x%x on socket. " 6259 "Pid = %d\n", cmd, curproc->p_pid); 6260 #endif /* DEBUG */ 6261 return (EOPNOTSUPP); 6262 } 6263 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6264 } 6265 } 6266 6267 /* 6268 * Handle plumbing-related ioctls. 6269 */ 6270 static int 6271 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6272 struct cred *cr, int32_t *rvalp) 6273 { 6274 static const char sockmod_name[] = "sockmod"; 6275 struct sonode *so = VTOSO(vp); 6276 char mname[FMNAMESZ + 1]; 6277 int error; 6278 sotpi_info_t *sti = SOTOTPI(so); 6279 6280 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6281 6282 if (so->so_version == SOV_SOCKBSD) 6283 return (EOPNOTSUPP); 6284 6285 if (so->so_version == SOV_STREAM) { 6286 /* 6287 * The imaginary "sockmod" has been popped - act as a stream. 6288 * If this is a push of sockmod then change back to a socket. 6289 */ 6290 if (cmd == I_PUSH) { 6291 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6292 (void *)arg, mname, sizeof (mname), NULL); 6293 6294 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6295 dprintso(so, 0, ("socktpi_ioctl: going to " 6296 "socket version\n")); 6297 so_stream2sock(so); 6298 return (0); 6299 } 6300 } 6301 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6302 } 6303 6304 switch (cmd) { 6305 case I_PUSH: 6306 if (sti->sti_direct) { 6307 mutex_enter(&so->so_lock); 6308 so_lock_single(so); 6309 mutex_exit(&so->so_lock); 6310 6311 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6312 cr, rvalp); 6313 6314 mutex_enter(&so->so_lock); 6315 if (error == 0) 6316 sti->sti_direct = 0; 6317 so_unlock_single(so, SOLOCKED); 6318 mutex_exit(&so->so_lock); 6319 6320 if (error != 0) 6321 return (error); 6322 } 6323 6324 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6325 if (error == 0) 6326 sti->sti_pushcnt++; 6327 return (error); 6328 6329 case I_POP: 6330 if (sti->sti_pushcnt == 0) { 6331 /* Emulate sockmod being popped */ 6332 dprintso(so, 0, 6333 ("socktpi_ioctl: going to STREAMS version\n")); 6334 return (so_sock2stream(so)); 6335 } 6336 6337 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6338 if (error == 0) 6339 sti->sti_pushcnt--; 6340 return (error); 6341 6342 case I_LIST: { 6343 struct str_mlist *kmlistp, *umlistp; 6344 struct str_list kstrlist; 6345 ssize_t kstrlistsize; 6346 int i, nmods; 6347 6348 STRUCT_DECL(str_list, ustrlist); 6349 STRUCT_INIT(ustrlist, mode); 6350 6351 if (arg == NULL) { 6352 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6353 if (error == 0) 6354 (*rvalp)++; /* Add one for sockmod */ 6355 return (error); 6356 } 6357 6358 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6359 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6360 if (error != 0) 6361 return (error); 6362 6363 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6364 if (nmods <= 0) 6365 return (EINVAL); 6366 /* 6367 * Ceiling nmods at nstrpush to prevent someone from 6368 * maliciously consuming lots of kernel memory. 6369 */ 6370 nmods = MIN(nmods, nstrpush); 6371 6372 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6373 kstrlist.sl_nmods = nmods; 6374 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6375 6376 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6377 cr, rvalp); 6378 if (error != 0) 6379 goto done; 6380 6381 /* 6382 * Considering the module list as a 0-based array of sl_nmods 6383 * modules, sockmod should conceptually exist at slot 6384 * sti_pushcnt. Insert sockmod at this location by sliding all 6385 * of the module names after so_pushcnt over by one. We know 6386 * that there will be room to do this since we allocated 6387 * sl_modlist with an additional slot. 6388 */ 6389 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6390 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6391 6392 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6393 kstrlist.sl_nmods++; 6394 6395 /* 6396 * Copy all of the entries out to ustrlist. 6397 */ 6398 kmlistp = kstrlist.sl_modlist; 6399 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6400 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6401 error = so_copyout(kmlistp++, umlistp++, 6402 sizeof (struct str_mlist), mode & FKIOCTL); 6403 if (error != 0) 6404 goto done; 6405 } 6406 6407 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6408 mode & FKIOCTL); 6409 if (error == 0) 6410 *rvalp = 0; 6411 done: 6412 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6413 return (error); 6414 } 6415 case I_LOOK: 6416 if (sti->sti_pushcnt == 0) { 6417 return (so_copyout(sockmod_name, (void *)arg, 6418 sizeof (sockmod_name), mode & FKIOCTL)); 6419 } 6420 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6421 6422 case I_FIND: 6423 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6424 if (error && error != EINVAL) 6425 return (error); 6426 6427 /* if not found and string was sockmod return 1 */ 6428 if (*rvalp == 0 || error == EINVAL) { 6429 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6430 (void *)arg, mname, sizeof (mname), NULL); 6431 if (error == ENAMETOOLONG) 6432 error = EINVAL; 6433 6434 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6435 *rvalp = 1; 6436 } 6437 return (error); 6438 6439 default: 6440 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6441 break; 6442 } 6443 6444 return (0); 6445 } 6446 6447 /* 6448 * Wrapper around the streams poll routine that implements socket poll 6449 * semantics. 6450 * The sockfs never calls pollwakeup itself - the stream head take care 6451 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6452 * stream head there can never be a deadlock due to holding so_lock across 6453 * pollwakeup and acquiring so_lock in this routine. 6454 * 6455 * However, since the performance of VOP_POLL is critical we avoid 6456 * acquiring so_lock here. This is based on two assumptions: 6457 * - The poll implementation holds locks to serialize the VOP_POLL call 6458 * and a pollwakeup for the same pollhead. This ensures that should 6459 * e.g. so_state change during a socktpi_poll call the pollwakeup 6460 * (which strsock_* and strrput conspire to issue) is issued after 6461 * the state change. Thus the pollwakeup will block until VOP_POLL has 6462 * returned and then wake up poll and have it call VOP_POLL again. 6463 * - The reading of so_state without holding so_lock does not result in 6464 * stale data that is older than the latest state change that has dropped 6465 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6466 * memory barrier to force the data into the coherency domain. 6467 */ 6468 static int 6469 sotpi_poll( 6470 struct sonode *so, 6471 short events, 6472 int anyyet, 6473 short *reventsp, 6474 struct pollhead **phpp) 6475 { 6476 short origevents = events; 6477 struct vnode *vp = SOTOV(so); 6478 int error; 6479 int so_state = so->so_state; /* snapshot */ 6480 sotpi_info_t *sti = SOTOTPI(so); 6481 6482 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6483 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6484 6485 ASSERT(vp->v_type == VSOCK); 6486 ASSERT(vp->v_stream != NULL); 6487 6488 if (so->so_version == SOV_STREAM) { 6489 /* The imaginary "sockmod" has been popped - act as a stream */ 6490 return (strpoll(vp->v_stream, events, anyyet, 6491 reventsp, phpp)); 6492 } 6493 6494 if (!(so_state & SS_ISCONNECTED) && 6495 (so->so_mode & SM_CONNREQUIRED)) { 6496 /* Not connected yet - turn off write side events */ 6497 events &= ~(POLLOUT|POLLWRBAND); 6498 } 6499 /* 6500 * Check for errors without calling strpoll if the caller wants them. 6501 * In sockets the errors are represented as input/output events 6502 * and there is no need to ask the stream head for this information. 6503 */ 6504 if (so->so_error != 0 && 6505 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6506 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6507 return (0); 6508 } 6509 /* 6510 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6511 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6512 * will not trigger a POLLIN event with POLLRDDATA set. 6513 * The handling of urgent data (causing POLLRDBAND) is done by 6514 * inspecting SS_OOBPEND below. 6515 */ 6516 events |= POLLRDDATA; 6517 6518 /* 6519 * After shutdown(output) a stream head write error is set. 6520 * However, we should not return output events. 6521 */ 6522 events |= POLLNOERR; 6523 error = strpoll(vp->v_stream, events, anyyet, 6524 reventsp, phpp); 6525 if (error) 6526 return (error); 6527 6528 ASSERT(!(*reventsp & POLLERR)); 6529 6530 /* 6531 * Notes on T_CONN_IND handling for sockets. 6532 * 6533 * If strpoll() returned without events, SR_POLLIN is guaranteed 6534 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6535 * 6536 * Since the so_lock is not held, soqueueconnind() may have run 6537 * and a T_CONN_IND may be waiting. We now check for any queued 6538 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6539 * to ensure poll returns. 6540 * 6541 * However: 6542 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6543 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6544 * the following actions will occur; taken together they ensure the 6545 * syscall will return. 6546 * 6547 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6548 * the accept() was run on a non-blocking socket sowaitconnind() 6549 * may have already returned EWOULDBLOCK, so not be waiting to 6550 * process the message. Additionally socktpi_poll() has probably 6551 * proceeded past the sti_conn_ind_head check below. 6552 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6553 * this thread, however that could occur before poll_common() 6554 * has entered cv_wait. 6555 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6556 * 6557 * Before proceeding to cv_wait() in poll_common() for an event, 6558 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6559 * and if set, re-calls strpoll() to ensure the late arriving 6560 * T_CONN_IND is recognized, and pollsys() returns. 6561 */ 6562 6563 if (sti->sti_conn_ind_head != NULL) 6564 *reventsp |= (POLLIN|POLLRDNORM) & events; 6565 6566 if (so->so_state & SS_OOBPEND) 6567 *reventsp |= POLLRDBAND & events; 6568 6569 if (sti->sti_nl7c_rcv_mp != NULL) { 6570 *reventsp |= (POLLIN|POLLRDNORM) & events; 6571 } 6572 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6573 ((POLLIN|POLLRDNORM) & *reventsp)) { 6574 sti->sti_nl7c_flags |= NL7C_POLLIN; 6575 } 6576 6577 return (0); 6578 } 6579 6580 /*ARGSUSED*/ 6581 static int 6582 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6583 { 6584 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6585 int error = 0; 6586 6587 error = sonode_constructor(buf, cdrarg, kmflags); 6588 if (error != 0) 6589 return (error); 6590 6591 error = i_sotpi_info_constructor(&st->st_info); 6592 if (error != 0) 6593 sonode_destructor(buf, cdrarg); 6594 6595 st->st_sonode.so_priv = &st->st_info; 6596 6597 return (error); 6598 } 6599 6600 /*ARGSUSED1*/ 6601 static void 6602 socktpi_destructor(void *buf, void *cdrarg) 6603 { 6604 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6605 6606 ASSERT(st->st_sonode.so_priv == &st->st_info); 6607 st->st_sonode.so_priv = NULL; 6608 6609 i_sotpi_info_destructor(&st->st_info); 6610 sonode_destructor(buf, cdrarg); 6611 } 6612 6613 static int 6614 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6615 { 6616 int retval; 6617 6618 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6619 struct sonode *so = (struct sonode *)buf; 6620 sotpi_info_t *sti = SOTOTPI(so); 6621 6622 mutex_enter(&socklist.sl_lock); 6623 6624 sti->sti_next_so = socklist.sl_list; 6625 sti->sti_prev_so = NULL; 6626 if (sti->sti_next_so != NULL) 6627 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6628 socklist.sl_list = so; 6629 6630 mutex_exit(&socklist.sl_lock); 6631 6632 } 6633 return (retval); 6634 } 6635 6636 static void 6637 socktpi_unix_destructor(void *buf, void *cdrarg) 6638 { 6639 struct sonode *so = (struct sonode *)buf; 6640 sotpi_info_t *sti = SOTOTPI(so); 6641 6642 mutex_enter(&socklist.sl_lock); 6643 6644 if (sti->sti_next_so != NULL) 6645 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6646 if (sti->sti_prev_so != NULL) 6647 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6648 else 6649 socklist.sl_list = sti->sti_next_so; 6650 6651 mutex_exit(&socklist.sl_lock); 6652 6653 socktpi_destructor(buf, cdrarg); 6654 } 6655 6656 int 6657 socktpi_init(void) 6658 { 6659 /* 6660 * Create sonode caches. We create a special one for AF_UNIX so 6661 * that we can track them for netstat(1m). 6662 */ 6663 socktpi_cache = kmem_cache_create("socktpi_cache", 6664 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6665 socktpi_destructor, NULL, NULL, NULL, 0); 6666 6667 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6668 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6669 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6670 6671 return (0); 6672 } 6673 6674 /* 6675 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6676 * 6677 * Caller must still update state and mode using sotpi_update_state(). 6678 */ 6679 int 6680 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6681 boolean_t *direct, queue_t **qp, struct cred *cr) 6682 { 6683 sotpi_info_t *sti; 6684 struct sockparams *origsp = so->so_sockparams; 6685 sock_lower_handle_t handle = so->so_proto_handle; 6686 struct stdata *stp; 6687 struct vnode *vp; 6688 queue_t *q; 6689 int error = 0; 6690 6691 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6692 SS_FALLBACK_PENDING); 6693 ASSERT(SOCK_IS_NONSTR(so)); 6694 6695 *qp = NULL; 6696 *direct = B_FALSE; 6697 so->so_sockparams = newsp; 6698 /* 6699 * Allocate and initalize fields required by TPI. 6700 */ 6701 (void) sotpi_info_create(so, KM_SLEEP); 6702 sotpi_info_init(so); 6703 6704 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6705 sotpi_info_fini(so); 6706 sotpi_info_destroy(so); 6707 return (error); 6708 } 6709 ASSERT(handle == so->so_proto_handle); 6710 sti = SOTOTPI(so); 6711 if (sti->sti_direct != 0) 6712 *direct = B_TRUE; 6713 6714 /* 6715 * When it comes to urgent data we have two cases to deal with; 6716 * (1) The oob byte has already arrived, or (2) the protocol has 6717 * notified that oob data is pending, but it has not yet arrived. 6718 * 6719 * For (1) all we need to do is send a T_EXDATA_IND to indicate were 6720 * in the byte stream the oob byte is. For (2) we have to send a 6721 * SIGURG (M_PCSIG), followed by a zero-length mblk indicating whether 6722 * the oob byte will be the next byte from the protocol. 6723 * 6724 * So in the worst case we need two mblks, one for the signal, another 6725 * for mark indication. In that case we use the exdata_mp for the sig. 6726 */ 6727 sti->sti_exdata_mp = allocb_wait(sizeof (struct T_exdata_ind), BPRI_MED, 6728 STR_NOSIG, NULL); 6729 sti->sti_urgmark_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL); 6730 6731 /* 6732 * Keep the original sp around so we can properly dispose of the 6733 * sonode when the socket is being closed. 6734 */ 6735 sti->sti_orig_sp = origsp; 6736 6737 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6738 so_alloc_addr(so, so->so_max_addr_len); 6739 6740 /* 6741 * If the application has done a SIOCSPGRP, make sure the 6742 * STREAM head is aware. This needs to take place before 6743 * the protocol start sending up messages. Otherwise we 6744 * might miss to generate SIGPOLL. 6745 * 6746 * It is possible that the application will receive duplicate 6747 * signals if some were already generated for either data or 6748 * connection indications. 6749 */ 6750 if (so->so_pgrp != 0) { 6751 if (so_set_events(so, so->so_vnode, cr) != 0) 6752 so->so_pgrp = 0; 6753 } 6754 6755 /* 6756 * Determine which queue to use. 6757 */ 6758 vp = SOTOV(so); 6759 stp = vp->v_stream; 6760 ASSERT(stp != NULL); 6761 q = stp->sd_wrq->q_next; 6762 6763 /* 6764 * Skip any modules that may have been auto pushed when the device 6765 * was opened 6766 */ 6767 while (q->q_next != NULL) 6768 q = q->q_next; 6769 *qp = _RD(q); 6770 6771 /* This is now a STREAMS sockets */ 6772 so->so_not_str = B_FALSE; 6773 6774 return (error); 6775 } 6776 6777 /* 6778 * Revert a TPI sonode. It is only allowed to revert the sonode during 6779 * the fallback process. 6780 */ 6781 void 6782 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6783 { 6784 vnode_t *vp = SOTOV(so); 6785 6786 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6787 SS_FALLBACK_PENDING); 6788 ASSERT(!SOCK_IS_NONSTR(so)); 6789 ASSERT(vp->v_stream != NULL); 6790 6791 if (SOTOTPI(so)->sti_exdata_mp != NULL) { 6792 freeb(SOTOTPI(so)->sti_exdata_mp); 6793 SOTOTPI(so)->sti_exdata_mp = NULL; 6794 } 6795 6796 if (SOTOTPI(so)->sti_urgmark_mp != NULL) { 6797 freeb(SOTOTPI(so)->sti_urgmark_mp); 6798 SOTOTPI(so)->sti_urgmark_mp = NULL; 6799 } 6800 6801 strclean(vp); 6802 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6803 6804 /* 6805 * Restore the original sockparams. The caller is responsible for 6806 * dropping the ref to the new sp. 6807 */ 6808 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6809 6810 sotpi_info_fini(so); 6811 sotpi_info_destroy(so); 6812 6813 /* This is no longer a STREAMS sockets */ 6814 so->so_not_str = B_TRUE; 6815 } 6816 6817 void 6818 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6819 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6820 socklen_t faddrlen, short opts) 6821 { 6822 sotpi_info_t *sti = SOTOTPI(so); 6823 6824 so_proc_tcapability_ack(so, tcap); 6825 6826 so->so_options |= opts; 6827 6828 /* 6829 * Determine whether the foreign and local address are valid 6830 */ 6831 if (laddrlen != 0) { 6832 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6833 sti->sti_laddr_len = laddrlen; 6834 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6835 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6836 } 6837 6838 if (faddrlen != 0) { 6839 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6840 sti->sti_faddr_len = faddrlen; 6841 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6842 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6843 } 6844 6845 } 6846 6847 /* 6848 * Allocate enough space to cache the local and foreign addresses. 6849 */ 6850 void 6851 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6852 { 6853 sotpi_info_t *sti = SOTOTPI(so); 6854 6855 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6856 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6857 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6858 P2ROUNDUP(maxlen, KMEM_ALIGN); 6859 so->so_max_addr_len = sti->sti_laddr_maxlen; 6860 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6861 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6862 + sti->sti_laddr_maxlen); 6863 6864 if (so->so_family == AF_UNIX) { 6865 /* 6866 * Initialize AF_UNIX related fields. 6867 */ 6868 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6869 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6870 } 6871 } 6872 6873 6874 sotpi_info_t * 6875 sotpi_sototpi(struct sonode *so) 6876 { 6877 sotpi_info_t *sti; 6878 6879 ASSERT(so != NULL); 6880 6881 sti = (sotpi_info_t *)so->so_priv; 6882 6883 ASSERT(sti != NULL); 6884 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6885 6886 return (sti); 6887 } 6888 6889 static int 6890 i_sotpi_info_constructor(sotpi_info_t *sti) 6891 { 6892 sti->sti_magic = SOTPI_INFO_MAGIC; 6893 sti->sti_ack_mp = NULL; 6894 sti->sti_discon_ind_mp = NULL; 6895 sti->sti_ux_bound_vp = NULL; 6896 sti->sti_unbind_mp = NULL; 6897 6898 sti->sti_conn_ind_head = NULL; 6899 sti->sti_conn_ind_tail = NULL; 6900 6901 sti->sti_laddr_sa = NULL; 6902 sti->sti_faddr_sa = NULL; 6903 6904 sti->sti_nl7c_flags = 0; 6905 sti->sti_nl7c_uri = NULL; 6906 sti->sti_nl7c_rcv_mp = NULL; 6907 6908 sti->sti_exdata_mp = NULL; 6909 sti->sti_urgmark_mp = NULL; 6910 6911 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6912 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6913 6914 return (0); 6915 } 6916 6917 static void 6918 i_sotpi_info_destructor(sotpi_info_t *sti) 6919 { 6920 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6921 ASSERT(sti->sti_ack_mp == NULL); 6922 ASSERT(sti->sti_discon_ind_mp == NULL); 6923 ASSERT(sti->sti_ux_bound_vp == NULL); 6924 ASSERT(sti->sti_unbind_mp == NULL); 6925 6926 ASSERT(sti->sti_conn_ind_head == NULL); 6927 ASSERT(sti->sti_conn_ind_tail == NULL); 6928 6929 ASSERT(sti->sti_laddr_sa == NULL); 6930 ASSERT(sti->sti_faddr_sa == NULL); 6931 6932 ASSERT(sti->sti_nl7c_flags == 0); 6933 ASSERT(sti->sti_nl7c_uri == NULL); 6934 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6935 6936 ASSERT(sti->sti_exdata_mp == NULL); 6937 ASSERT(sti->sti_urgmark_mp == NULL); 6938 6939 mutex_destroy(&sti->sti_plumb_lock); 6940 cv_destroy(&sti->sti_ack_cv); 6941 } 6942 6943 /* 6944 * Creates and attaches TPI information to the given sonode 6945 */ 6946 static boolean_t 6947 sotpi_info_create(struct sonode *so, int kmflags) 6948 { 6949 sotpi_info_t *sti; 6950 6951 ASSERT(so->so_priv == NULL); 6952 6953 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6954 return (B_FALSE); 6955 6956 if (i_sotpi_info_constructor(sti) != 0) { 6957 kmem_free(sti, sizeof (*sti)); 6958 return (B_FALSE); 6959 } 6960 6961 so->so_priv = (void *)sti; 6962 return (B_TRUE); 6963 } 6964 6965 /* 6966 * Initializes the TPI information. 6967 */ 6968 static void 6969 sotpi_info_init(struct sonode *so) 6970 { 6971 struct vnode *vp = SOTOV(so); 6972 sotpi_info_t *sti = SOTOTPI(so); 6973 time_t now; 6974 6975 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6976 vp->v_rdev = sti->sti_dev; 6977 6978 sti->sti_orig_sp = NULL; 6979 6980 sti->sti_pushcnt = 0; 6981 6982 now = gethrestime_sec(); 6983 sti->sti_atime = now; 6984 sti->sti_mtime = now; 6985 sti->sti_ctime = now; 6986 6987 sti->sti_eaddr_mp = NULL; 6988 sti->sti_delayed_error = 0; 6989 6990 sti->sti_provinfo = NULL; 6991 6992 sti->sti_oobcnt = 0; 6993 sti->sti_oobsigcnt = 0; 6994 6995 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6996 6997 sti->sti_laddr_sa = 0; 6998 sti->sti_faddr_sa = 0; 6999 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 7000 sti->sti_laddr_len = sti->sti_faddr_len = 0; 7001 7002 sti->sti_laddr_valid = 0; 7003 sti->sti_faddr_valid = 0; 7004 sti->sti_faddr_noxlate = 0; 7005 7006 sti->sti_direct = 0; 7007 7008 ASSERT(sti->sti_ack_mp == NULL); 7009 ASSERT(sti->sti_ux_bound_vp == NULL); 7010 ASSERT(sti->sti_unbind_mp == NULL); 7011 7012 ASSERT(sti->sti_conn_ind_head == NULL); 7013 ASSERT(sti->sti_conn_ind_tail == NULL); 7014 7015 /* Initialize the kernel SSL proxy fields */ 7016 sti->sti_kssl_type = KSSL_NO_PROXY; 7017 sti->sti_kssl_ent = NULL; 7018 sti->sti_kssl_ctx = NULL; 7019 } 7020 7021 /* 7022 * Given a sonode, grab the TPI info and free any data. 7023 */ 7024 static void 7025 sotpi_info_fini(struct sonode *so) 7026 { 7027 sotpi_info_t *sti = SOTOTPI(so); 7028 mblk_t *mp; 7029 7030 ASSERT(sti->sti_discon_ind_mp == NULL); 7031 7032 if ((mp = sti->sti_conn_ind_head) != NULL) { 7033 mblk_t *mp1; 7034 7035 while (mp) { 7036 mp1 = mp->b_next; 7037 mp->b_next = NULL; 7038 freemsg(mp); 7039 mp = mp1; 7040 } 7041 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 7042 } 7043 7044 /* 7045 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 7046 * indirect them. It also uses so_count as a validity test. 7047 */ 7048 mutex_enter(&so->so_lock); 7049 7050 if (sti->sti_laddr_sa) { 7051 ASSERT((caddr_t)sti->sti_faddr_sa == 7052 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 7053 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 7054 sti->sti_laddr_valid = 0; 7055 sti->sti_faddr_valid = 0; 7056 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 7057 sti->sti_laddr_sa = NULL; 7058 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 7059 sti->sti_faddr_sa = NULL; 7060 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 7061 } 7062 7063 mutex_exit(&so->so_lock); 7064 7065 if ((mp = sti->sti_eaddr_mp) != NULL) { 7066 freemsg(mp); 7067 sti->sti_eaddr_mp = NULL; 7068 sti->sti_delayed_error = 0; 7069 } 7070 7071 if ((mp = sti->sti_ack_mp) != NULL) { 7072 freemsg(mp); 7073 sti->sti_ack_mp = NULL; 7074 } 7075 7076 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 7077 sti->sti_nl7c_rcv_mp = NULL; 7078 freemsg(mp); 7079 } 7080 sti->sti_nl7c_rcv_rval = 0; 7081 if (sti->sti_nl7c_uri != NULL) { 7082 nl7c_urifree(so); 7083 /* urifree() cleared nl7c_uri */ 7084 } 7085 if (sti->sti_nl7c_flags) { 7086 sti->sti_nl7c_flags = 0; 7087 } 7088 7089 ASSERT(sti->sti_ux_bound_vp == NULL); 7090 if ((mp = sti->sti_unbind_mp) != NULL) { 7091 freemsg(mp); 7092 sti->sti_unbind_mp = NULL; 7093 } 7094 } 7095 7096 /* 7097 * Destroys the TPI information attached to a sonode. 7098 */ 7099 static void 7100 sotpi_info_destroy(struct sonode *so) 7101 { 7102 sotpi_info_t *sti = SOTOTPI(so); 7103 7104 i_sotpi_info_destructor(sti); 7105 kmem_free(sti, sizeof (*sti)); 7106 7107 so->so_priv = NULL; 7108 } 7109 7110 /* 7111 * Create the global sotpi socket module entry. It will never be freed. 7112 */ 7113 smod_info_t * 7114 sotpi_smod_create(void) 7115 { 7116 smod_info_t *smodp; 7117 7118 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 7119 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 7120 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 7121 /* 7122 * Initialize the smod_refcnt to 1 so it will never be freed. 7123 */ 7124 smodp->smod_refcnt = 1; 7125 smodp->smod_uc_version = SOCK_UC_VERSION; 7126 smodp->smod_dc_version = SOCK_DC_VERSION; 7127 smodp->smod_sock_create_func = &sotpi_create; 7128 smodp->smod_sock_destroy_func = &sotpi_destroy; 7129 return (smodp); 7130 } 7131