1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/t_lock.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/buf.h> 31 #include <sys/conf.h> 32 #include <sys/cred.h> 33 #include <sys/kmem.h> 34 #include <sys/kmem_impl.h> 35 #include <sys/sysmacros.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/time.h> 41 #include <sys/file.h> 42 #include <sys/open.h> 43 #include <sys/user.h> 44 #include <sys/termios.h> 45 #include <sys/stream.h> 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/suntpi.h> 49 #include <sys/ddi.h> 50 #include <sys/esunddi.h> 51 #include <sys/flock.h> 52 #include <sys/modctl.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/pathname.h> 56 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/sockio.h> 60 #include <netinet/in.h> 61 #include <sys/un.h> 62 #include <sys/strsun.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 68 69 #include <c2/audit.h> 70 71 #include <inet/common.h> 72 #include <inet/ip.h> 73 #include <inet/ip6.h> 74 #include <inet/tcp.h> 75 #include <inet/udp_impl.h> 76 77 #include <sys/zone.h> 78 79 #include <fs/sockfs/nl7c.h> 80 #include <fs/sockfs/nl7curi.h> 81 82 #include <inet/kssl/ksslapi.h> 83 84 #include <fs/sockfs/sockcommon.h> 85 #include <fs/sockfs/socktpi.h> 86 #include <fs/sockfs/socktpi_impl.h> 87 88 /* 89 * Possible failures when memory can't be allocated. The documented behavior: 90 * 91 * 5.5: 4.X: XNET: 92 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 93 * EINTR 94 * (4.X does not document EINTR but returns it) 95 * bind: ENOSR - ENOBUFS/ENOSR 96 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 97 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 98 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 99 * (4.X getpeername and getsockname do not fail in practice) 100 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 101 * listen: - - ENOBUFS 102 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 107 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 108 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 109 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 110 * 111 * Resolution. When allocation fails: 112 * recv: return EINTR 113 * send: return EINTR 114 * connect, accept: EINTR 115 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 116 * socket, socketpair: ENOBUFS 117 * getpeername, getsockname: sleep 118 * getsockopt, setsockopt: sleep 119 */ 120 121 #ifdef SOCK_TEST 122 /* 123 * Variables that make sockfs do something other than the standard TPI 124 * for the AF_INET transports. 125 * 126 * solisten_tpi_tcp: 127 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 128 * the transport is already bound. This is needed to avoid loosing the 129 * port number should listen() do a T_UNBIND_REQ followed by a 130 * O_T_BIND_REQ. 131 * 132 * soconnect_tpi_udp: 133 * UDP and ICMP can handle a T_CONN_REQ. 134 * This is needed to make the sequence of connect(), getsockname() 135 * return the local IP address used to send packets to the connected to 136 * destination. 137 * 138 * soconnect_tpi_tcp: 139 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 140 * Set this to non-zero to send TPI conformant messages to TCP in this 141 * respect. This is a performance optimization. 142 * 143 * soaccept_tpi_tcp: 144 * TCP can handle a T_CONN_REQ without the acceptor being bound. 145 * This is a performance optimization that has been picked up in XTI. 146 * 147 * soaccept_tpi_multioptions: 148 * When inheriting SOL_SOCKET options from the listener to the accepting 149 * socket send them as a single message for AF_INET{,6}. 150 */ 151 int solisten_tpi_tcp = 0; 152 int soconnect_tpi_udp = 0; 153 int soconnect_tpi_tcp = 0; 154 int soaccept_tpi_tcp = 0; 155 int soaccept_tpi_multioptions = 1; 156 #else /* SOCK_TEST */ 157 #define soconnect_tpi_tcp 0 158 #define soconnect_tpi_udp 0 159 #define solisten_tpi_tcp 0 160 #define soaccept_tpi_tcp 0 161 #define soaccept_tpi_multioptions 1 162 #endif /* SOCK_TEST */ 163 164 #ifdef SOCK_TEST 165 extern int do_useracc; 166 extern clock_t sock_test_timelimit; 167 #endif /* SOCK_TEST */ 168 169 /* 170 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 171 * applications working. Turn on this flag to disable these checks. 172 */ 173 int xnet_skip_checks = 0; 174 int xnet_check_print = 0; 175 int xnet_truncate_print = 0; 176 177 static void sotpi_destroy(struct sonode *); 178 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 179 int, int *, cred_t *cr); 180 181 static boolean_t sotpi_info_create(struct sonode *, int); 182 static void sotpi_info_init(struct sonode *); 183 static void sotpi_info_fini(struct sonode *); 184 static void sotpi_info_destroy(struct sonode *); 185 186 /* 187 * Do direct function call to the transport layer below; this would 188 * also allow the transport to utilize read-side synchronous stream 189 * interface if necessary. This is a /etc/system tunable that must 190 * not be modified on a running system. By default this is enabled 191 * for performance reasons and may be disabled for debugging purposes. 192 */ 193 boolean_t socktpi_direct = B_TRUE; 194 195 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 196 197 extern void sigintr(k_sigset_t *, int); 198 extern void sigunintr(k_sigset_t *); 199 200 /* Sockets acting as an in-kernel SSL proxy */ 201 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 202 strsigset_t *, strsigset_t *, strpollset_t *); 203 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 204 strsigset_t *, strsigset_t *, strpollset_t *); 205 206 static int sotpi_unbind(struct sonode *, int); 207 208 /* TPI sockfs sonode operations */ 209 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 210 int); 211 static int sotpi_accept(struct sonode *, int, struct cred *, 212 struct sonode **); 213 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 214 int, struct cred *); 215 static int sotpi_listen(struct sonode *, int, struct cred *); 216 static int sotpi_connect(struct sonode *, struct sockaddr *, 217 socklen_t, int, int, struct cred *); 218 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 219 struct uio *, struct cred *); 220 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 221 struct uio *, struct cred *); 222 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 223 struct cred *, mblk_t **); 224 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 225 struct uio *, void *, t_uscalar_t, int); 226 static int sodgram_direct(struct sonode *, struct sockaddr *, 227 socklen_t, struct uio *, int); 228 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 229 socklen_t *, boolean_t, struct cred *); 230 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 231 socklen_t *, struct cred *); 232 static int sotpi_shutdown(struct sonode *, int, struct cred *); 233 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 234 socklen_t *, int, struct cred *); 235 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 236 socklen_t, struct cred *); 237 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 238 int32_t *); 239 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 240 struct cred *, int32_t *); 241 static int sotpi_poll(struct sonode *, short, int, short *, 242 struct pollhead **); 243 static int sotpi_close(struct sonode *, int, struct cred *); 244 245 static int i_sotpi_info_constructor(sotpi_info_t *); 246 static void i_sotpi_info_destructor(sotpi_info_t *); 247 248 sonodeops_t sotpi_sonodeops = { 249 sotpi_init, /* sop_init */ 250 sotpi_accept, /* sop_accept */ 251 sotpi_bind, /* sop_bind */ 252 sotpi_listen, /* sop_listen */ 253 sotpi_connect, /* sop_connect */ 254 sotpi_recvmsg, /* sop_recvmsg */ 255 sotpi_sendmsg, /* sop_sendmsg */ 256 sotpi_sendmblk, /* sop_sendmblk */ 257 sotpi_getpeername, /* sop_getpeername */ 258 sotpi_getsockname, /* sop_getsockname */ 259 sotpi_shutdown, /* sop_shutdown */ 260 sotpi_getsockopt, /* sop_getsockopt */ 261 sotpi_setsockopt, /* sop_setsockopt */ 262 sotpi_ioctl, /* sop_ioctl */ 263 sotpi_poll, /* sop_poll */ 264 sotpi_close, /* sop_close */ 265 }; 266 267 /* 268 * Return a TPI socket vnode. 269 * 270 * Note that sockets assume that the driver will clone (either itself 271 * or by using the clone driver) i.e. a socket() call will always 272 * result in a new vnode being created. 273 */ 274 275 /* 276 * Common create code for socket and accept. If tso is set the values 277 * from that node is used instead of issuing a T_INFO_REQ. 278 */ 279 280 /* ARGSUSED */ 281 static struct sonode * 282 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 283 int version, int sflags, int *errorp, cred_t *cr) 284 { 285 struct sonode *so; 286 kmem_cache_t *cp; 287 int sfamily = family; 288 289 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 290 291 if (family == AF_NCA) { 292 /* 293 * The request is for an NCA socket so for NL7C use the 294 * INET domain instead and mark NL7C_AF_NCA below. 295 */ 296 family = AF_INET; 297 /* 298 * NL7C is not supported in the non-global zone, 299 * we enforce this restriction here. 300 */ 301 if (getzoneid() != GLOBAL_ZONEID) { 302 *errorp = ENOTSUP; 303 return (NULL); 304 } 305 } 306 307 /* 308 * to be compatible with old tpi socket implementation ignore 309 * sleep flag (sflags) passed in 310 */ 311 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 312 so = kmem_cache_alloc(cp, KM_SLEEP); 313 if (so == NULL) { 314 *errorp = ENOMEM; 315 return (NULL); 316 } 317 318 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 319 sotpi_info_init(so); 320 321 if (sfamily == AF_NCA) { 322 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 323 } 324 325 if (version == SOV_DEFAULT) 326 version = so_default_version; 327 328 so->so_version = (short)version; 329 *errorp = 0; 330 331 return (so); 332 } 333 334 static void 335 sotpi_destroy(struct sonode *so) 336 { 337 kmem_cache_t *cp; 338 struct sockparams *origsp; 339 340 /* 341 * If there is a new dealloc function (ie. smod_destroy_func), 342 * then it should check the correctness of the ops. 343 */ 344 345 ASSERT(so->so_ops == &sotpi_sonodeops); 346 347 origsp = SOTOTPI(so)->sti_orig_sp; 348 349 sotpi_info_fini(so); 350 351 if (so->so_state & SS_FALLBACK_COMP) { 352 /* 353 * A fallback happend, which means that a sotpi_info_t struct 354 * was allocated (as opposed to being allocated from the TPI 355 * sonode cache. Therefore we explicitly free the struct 356 * here. 357 */ 358 sotpi_info_destroy(so); 359 ASSERT(origsp != NULL); 360 361 origsp->sp_smod_info->smod_sock_destroy_func(so); 362 SOCKPARAMS_DEC_REF(origsp); 363 } else { 364 sonode_fini(so); 365 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 366 socktpi_cache; 367 kmem_cache_free(cp, so); 368 } 369 } 370 371 /* ARGSUSED1 */ 372 int 373 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 374 { 375 major_t maj; 376 dev_t newdev; 377 struct vnode *vp; 378 int error = 0; 379 struct stdata *stp; 380 381 sotpi_info_t *sti = SOTOTPI(so); 382 383 dprint(1, ("sotpi_init()\n")); 384 385 /* 386 * over write the sleep flag passed in but that is ok 387 * as tpi socket does not honor sleep flag. 388 */ 389 flags |= FREAD|FWRITE; 390 391 /* 392 * Record in so_flag that it is a clone. 393 */ 394 if (getmajor(sti->sti_dev) == clone_major) 395 so->so_flag |= SOCLONE; 396 397 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 398 (so->so_family == AF_INET || so->so_family == AF_INET6) && 399 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 400 so->so_protocol == IPPROTO_IP)) { 401 /* Tell tcp or udp that it's talking to sockets */ 402 flags |= SO_SOCKSTR; 403 404 /* 405 * Here we indicate to socktpi_open() our attempt to 406 * make direct calls between sockfs and transport. 407 * The final decision is left to socktpi_open(). 408 */ 409 sti->sti_direct = 1; 410 411 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 412 if (so->so_type == SOCK_STREAM && tso != NULL) { 413 if (SOTOTPI(tso)->sti_direct) { 414 /* 415 * Inherit sti_direct from listener and pass 416 * SO_ACCEPTOR open flag to tcp, indicating 417 * that this is an accept fast-path instance. 418 */ 419 flags |= SO_ACCEPTOR; 420 } else { 421 /* 422 * sti_direct is not set on listener, meaning 423 * that the listener has been converted from 424 * a socket to a stream. Ensure that the 425 * acceptor inherits these settings. 426 */ 427 sti->sti_direct = 0; 428 flags &= ~SO_SOCKSTR; 429 } 430 } 431 } 432 433 /* 434 * Tell local transport that it is talking to sockets. 435 */ 436 if (so->so_family == AF_UNIX) { 437 flags |= SO_SOCKSTR; 438 } 439 440 vp = SOTOV(so); 441 newdev = vp->v_rdev; 442 maj = getmajor(newdev); 443 ASSERT(STREAMSTAB(maj)); 444 445 error = stropen(vp, &newdev, flags, cr); 446 447 stp = vp->v_stream; 448 if (error == 0) { 449 if (so->so_flag & SOCLONE) 450 ASSERT(newdev != vp->v_rdev); 451 mutex_enter(&so->so_lock); 452 sti->sti_dev = newdev; 453 vp->v_rdev = newdev; 454 mutex_exit(&so->so_lock); 455 456 if (stp->sd_flag & STRISTTY) { 457 /* 458 * this is a post SVR4 tty driver - a socket can not 459 * be a controlling terminal. Fail the open. 460 */ 461 (void) sotpi_close(so, flags, cr); 462 return (ENOTTY); /* XXX */ 463 } 464 465 ASSERT(stp->sd_wrq != NULL); 466 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 467 468 /* 469 * If caller is interested in doing direct function call 470 * interface to/from transport module, probe the module 471 * directly beneath the streamhead to see if it qualifies. 472 * 473 * We turn off the direct interface when qualifications fail. 474 * In the acceptor case, we simply turn off the sti_direct 475 * flag on the socket. We do the fallback after the accept 476 * has completed, before the new socket is returned to the 477 * application. 478 */ 479 if (sti->sti_direct) { 480 queue_t *tq = stp->sd_wrq->q_next; 481 482 /* 483 * sti_direct is currently supported and tested 484 * only for tcp/udp; this is the main reason to 485 * have the following assertions. 486 */ 487 ASSERT(so->so_family == AF_INET || 488 so->so_family == AF_INET6); 489 ASSERT(so->so_protocol == IPPROTO_UDP || 490 so->so_protocol == IPPROTO_TCP || 491 so->so_protocol == IPPROTO_IP); 492 ASSERT(so->so_type == SOCK_DGRAM || 493 so->so_type == SOCK_STREAM); 494 495 /* 496 * Abort direct call interface if the module directly 497 * underneath the stream head is not defined with the 498 * _D_DIRECT flag. This could happen in the tcp or 499 * udp case, when some other module is autopushed 500 * above it, or for some reasons the expected module 501 * isn't purely D_MP (which is the main requirement). 502 */ 503 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 504 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 505 int rval; 506 507 /* Continue on without direct calls */ 508 sti->sti_direct = 0; 509 510 /* 511 * Cannot issue ioctl on fallback socket since 512 * there is no conn associated with the queue. 513 * The fallback downcall will notify the proto 514 * of the change. 515 */ 516 if (!(flags & SO_ACCEPTOR) && 517 !(flags & SO_FALLBACK)) { 518 if ((error = strioctl(vp, 519 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 520 cr, &rval)) != 0) { 521 (void) sotpi_close(so, flags, 522 cr); 523 return (error); 524 } 525 } 526 } 527 } 528 529 if (flags & SO_FALLBACK) { 530 /* 531 * The stream created does not have a conn. 532 * do stream set up after conn has been assigned 533 */ 534 return (error); 535 } 536 if (error = so_strinit(so, tso)) { 537 (void) sotpi_close(so, flags, cr); 538 return (error); 539 } 540 541 /* Wildcard */ 542 if (so->so_protocol != so->so_sockparams->sp_protocol) { 543 int protocol = so->so_protocol; 544 /* 545 * Issue SO_PROTOTYPE setsockopt. 546 */ 547 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 548 &protocol, (t_uscalar_t)sizeof (protocol), cr); 549 if (error != 0) { 550 (void) sotpi_close(so, flags, cr); 551 /* 552 * Setsockopt often fails with ENOPROTOOPT but 553 * socket() should fail with 554 * EPROTONOSUPPORT/EPROTOTYPE. 555 */ 556 return (EPROTONOSUPPORT); 557 } 558 } 559 560 } else { 561 /* 562 * While the same socket can not be reopened (unlike specfs) 563 * the stream head sets STREOPENFAIL when the autopush fails. 564 */ 565 if ((stp != NULL) && 566 (stp->sd_flag & STREOPENFAIL)) { 567 /* 568 * Open failed part way through. 569 */ 570 mutex_enter(&stp->sd_lock); 571 stp->sd_flag &= ~STREOPENFAIL; 572 mutex_exit(&stp->sd_lock); 573 (void) sotpi_close(so, flags, cr); 574 return (error); 575 /*NOTREACHED*/ 576 } 577 ASSERT(stp == NULL); 578 } 579 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 580 "sockfs open:maj %d vp %p so %p error %d", 581 maj, vp, so, error); 582 return (error); 583 } 584 585 /* 586 * Bind the socket to an unspecified address in sockfs only. 587 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 588 * required in all cases. 589 */ 590 static void 591 so_automatic_bind(struct sonode *so) 592 { 593 sotpi_info_t *sti = SOTOTPI(so); 594 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 595 596 ASSERT(MUTEX_HELD(&so->so_lock)); 597 ASSERT(!(so->so_state & SS_ISBOUND)); 598 ASSERT(sti->sti_unbind_mp); 599 600 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 601 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 602 sti->sti_laddr_sa->sa_family = so->so_family; 603 so->so_state |= SS_ISBOUND; 604 } 605 606 607 /* 608 * bind the socket. 609 * 610 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 611 * are passed in we allow rebinding. Note that for backwards compatibility 612 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 613 * Thus the rebinding code is currently not executed. 614 * 615 * The constraints for rebinding are: 616 * - it is a SOCK_DGRAM, or 617 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 618 * and no listen() has been done. 619 * This rebinding code was added based on some language in the XNET book 620 * about not returning EINVAL it the protocol allows rebinding. However, 621 * this language is not present in the Posix socket draft. Thus maybe the 622 * rebinding logic should be deleted from the source. 623 * 624 * A null "name" can be used to unbind the socket if: 625 * - it is a SOCK_DGRAM, or 626 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 627 * and no listen() has been done. 628 */ 629 /* ARGSUSED */ 630 static int 631 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 632 socklen_t namelen, int backlog, int flags, struct cred *cr) 633 { 634 struct T_bind_req bind_req; 635 struct T_bind_ack *bind_ack; 636 int error = 0; 637 mblk_t *mp; 638 void *addr; 639 t_uscalar_t addrlen; 640 int unbind_on_err = 1; 641 boolean_t clear_acceptconn_on_err = B_FALSE; 642 boolean_t restore_backlog_on_err = B_FALSE; 643 int save_so_backlog; 644 t_scalar_t PRIM_type = O_T_BIND_REQ; 645 boolean_t tcp_udp_xport; 646 void *nl7c = NULL; 647 sotpi_info_t *sti = SOTOTPI(so); 648 649 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 650 (void *)so, (void *)name, namelen, backlog, flags, 651 pr_state(so->so_state, so->so_mode))); 652 653 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 654 655 if (!(flags & _SOBIND_LOCK_HELD)) { 656 mutex_enter(&so->so_lock); 657 so_lock_single(so); /* Set SOLOCKED */ 658 } else { 659 ASSERT(MUTEX_HELD(&so->so_lock)); 660 ASSERT(so->so_flag & SOLOCKED); 661 } 662 663 /* 664 * Make sure that there is a preallocated unbind_req message 665 * before binding. This message allocated when the socket is 666 * created but it might be have been consumed. 667 */ 668 if (sti->sti_unbind_mp == NULL) { 669 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 670 /* NOTE: holding so_lock while sleeping */ 671 sti->sti_unbind_mp = 672 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 673 cr); 674 } 675 676 if (flags & _SOBIND_REBIND) { 677 /* 678 * Called from solisten after doing an sotpi_unbind() or 679 * potentially without the unbind (latter for AF_INET{,6}). 680 */ 681 ASSERT(name == NULL && namelen == 0); 682 683 if (so->so_family == AF_UNIX) { 684 ASSERT(sti->sti_ux_bound_vp); 685 addr = &sti->sti_ux_laddr; 686 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 687 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 688 "addr 0x%p, vp %p\n", 689 addrlen, 690 (void *)((struct so_ux_addr *)addr)->soua_vp, 691 (void *)sti->sti_ux_bound_vp)); 692 } else { 693 addr = sti->sti_laddr_sa; 694 addrlen = (t_uscalar_t)sti->sti_laddr_len; 695 } 696 } else if (flags & _SOBIND_UNSPEC) { 697 ASSERT(name == NULL && namelen == 0); 698 699 /* 700 * The caller checked SS_ISBOUND but not necessarily 701 * under so_lock 702 */ 703 if (so->so_state & SS_ISBOUND) { 704 /* No error */ 705 goto done; 706 } 707 708 /* Set an initial local address */ 709 switch (so->so_family) { 710 case AF_UNIX: 711 /* 712 * Use an address with same size as struct sockaddr 713 * just like BSD. 714 */ 715 sti->sti_laddr_len = 716 (socklen_t)sizeof (struct sockaddr); 717 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 718 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 719 sti->sti_laddr_sa->sa_family = so->so_family; 720 721 /* 722 * Pass down an address with the implicit bind 723 * magic number and the rest all zeros. 724 * The transport will return a unique address. 725 */ 726 sti->sti_ux_laddr.soua_vp = NULL; 727 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 728 addr = &sti->sti_ux_laddr; 729 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 730 break; 731 732 case AF_INET: 733 case AF_INET6: 734 /* 735 * An unspecified bind in TPI has a NULL address. 736 * Set the address in sockfs to have the sa_family. 737 */ 738 sti->sti_laddr_len = (so->so_family == AF_INET) ? 739 (socklen_t)sizeof (sin_t) : 740 (socklen_t)sizeof (sin6_t); 741 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 742 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 743 sti->sti_laddr_sa->sa_family = so->so_family; 744 addr = NULL; 745 addrlen = 0; 746 break; 747 748 default: 749 /* 750 * An unspecified bind in TPI has a NULL address. 751 * Set the address in sockfs to be zero length. 752 * 753 * Can not assume there is a sa_family for all 754 * protocol families. For example, AF_X25 does not 755 * have a family field. 756 */ 757 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 758 sti->sti_laddr_len = 0; /* XXX correct? */ 759 addr = NULL; 760 addrlen = 0; 761 break; 762 } 763 764 } else { 765 if (so->so_state & SS_ISBOUND) { 766 /* 767 * If it is ok to rebind the socket, first unbind 768 * with the transport. A rebind to the NULL address 769 * is interpreted as an unbind. 770 * Note that a bind to NULL in BSD does unbind the 771 * socket but it fails with EINVAL. 772 * Note that regular sockets set SOV_SOCKBSD i.e. 773 * _SOBIND_SOCKBSD gets set here hence no type of 774 * socket does currently allow rebinding. 775 * 776 * If the name is NULL just do an unbind. 777 */ 778 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 779 name != NULL) { 780 error = EINVAL; 781 unbind_on_err = 0; 782 eprintsoline(so, error); 783 goto done; 784 } 785 if ((so->so_mode & SM_CONNREQUIRED) && 786 (so->so_state & SS_CANTREBIND)) { 787 error = EINVAL; 788 unbind_on_err = 0; 789 eprintsoline(so, error); 790 goto done; 791 } 792 error = sotpi_unbind(so, 0); 793 if (error) { 794 eprintsoline(so, error); 795 goto done; 796 } 797 ASSERT(!(so->so_state & SS_ISBOUND)); 798 if (name == NULL) { 799 so->so_state &= 800 ~(SS_ISCONNECTED|SS_ISCONNECTING); 801 goto done; 802 } 803 } 804 805 /* X/Open requires this check */ 806 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 807 if (xnet_check_print) { 808 printf("sockfs: X/Open bind state check " 809 "caused EINVAL\n"); 810 } 811 error = EINVAL; 812 goto done; 813 } 814 815 switch (so->so_family) { 816 case AF_UNIX: 817 /* 818 * All AF_UNIX addresses are nul terminated 819 * when copied (copyin_name) in so the minimum 820 * length is 3 bytes. 821 */ 822 if (name == NULL || 823 (ssize_t)namelen <= sizeof (short) + 1) { 824 error = EISDIR; 825 eprintsoline(so, error); 826 goto done; 827 } 828 /* 829 * Verify so_family matches the bound family. 830 * BSD does not check this for AF_UNIX resulting 831 * in funny mknods. 832 */ 833 if (name->sa_family != so->so_family) { 834 error = EAFNOSUPPORT; 835 goto done; 836 } 837 break; 838 case AF_INET: 839 if (name == NULL) { 840 error = EINVAL; 841 eprintsoline(so, error); 842 goto done; 843 } 844 if ((size_t)namelen != sizeof (sin_t)) { 845 error = name->sa_family != so->so_family ? 846 EAFNOSUPPORT : EINVAL; 847 eprintsoline(so, error); 848 goto done; 849 } 850 if ((flags & _SOBIND_XPG4_2) && 851 (name->sa_family != so->so_family)) { 852 /* 853 * This check has to be made for X/Open 854 * sockets however application failures have 855 * been observed when it is applied to 856 * all sockets. 857 */ 858 error = EAFNOSUPPORT; 859 eprintsoline(so, error); 860 goto done; 861 } 862 /* 863 * Force a zero sa_family to match so_family. 864 * 865 * Some programs like inetd(1M) don't set the 866 * family field. Other programs leave 867 * sin_family set to garbage - SunOS 4.X does 868 * not check the family field on a bind. 869 * We use the family field that 870 * was passed in to the socket() call. 871 */ 872 name->sa_family = so->so_family; 873 break; 874 875 case AF_INET6: { 876 #ifdef DEBUG 877 sin6_t *sin6 = (sin6_t *)name; 878 #endif /* DEBUG */ 879 880 if (name == NULL) { 881 error = EINVAL; 882 eprintsoline(so, error); 883 goto done; 884 } 885 if ((size_t)namelen != sizeof (sin6_t)) { 886 error = name->sa_family != so->so_family ? 887 EAFNOSUPPORT : EINVAL; 888 eprintsoline(so, error); 889 goto done; 890 } 891 if (name->sa_family != so->so_family) { 892 /* 893 * With IPv6 we require the family to match 894 * unlike in IPv4. 895 */ 896 error = EAFNOSUPPORT; 897 eprintsoline(so, error); 898 goto done; 899 } 900 #ifdef DEBUG 901 /* 902 * Verify that apps don't forget to clear 903 * sin6_scope_id etc 904 */ 905 if (sin6->sin6_scope_id != 0 && 906 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 907 zcmn_err(getzoneid(), CE_WARN, 908 "bind with uninitialized sin6_scope_id " 909 "(%d) on socket. Pid = %d\n", 910 (int)sin6->sin6_scope_id, 911 (int)curproc->p_pid); 912 } 913 if (sin6->__sin6_src_id != 0) { 914 zcmn_err(getzoneid(), CE_WARN, 915 "bind with uninitialized __sin6_src_id " 916 "(%d) on socket. Pid = %d\n", 917 (int)sin6->__sin6_src_id, 918 (int)curproc->p_pid); 919 } 920 #endif /* DEBUG */ 921 break; 922 } 923 default: 924 /* 925 * Don't do any length or sa_family check to allow 926 * non-sockaddr style addresses. 927 */ 928 if (name == NULL) { 929 error = EINVAL; 930 eprintsoline(so, error); 931 goto done; 932 } 933 break; 934 } 935 936 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 937 error = ENAMETOOLONG; 938 eprintsoline(so, error); 939 goto done; 940 } 941 /* 942 * Save local address. 943 */ 944 sti->sti_laddr_len = (socklen_t)namelen; 945 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 946 bcopy(name, sti->sti_laddr_sa, namelen); 947 948 addr = sti->sti_laddr_sa; 949 addrlen = (t_uscalar_t)sti->sti_laddr_len; 950 switch (so->so_family) { 951 case AF_INET6: 952 case AF_INET: 953 break; 954 case AF_UNIX: { 955 struct sockaddr_un *soun = 956 (struct sockaddr_un *)sti->sti_laddr_sa; 957 struct vnode *vp, *rvp; 958 struct vattr vattr; 959 960 ASSERT(sti->sti_ux_bound_vp == NULL); 961 /* 962 * Create vnode for the specified path name. 963 * Keep vnode held with a reference in sti_ux_bound_vp. 964 * Use the vnode pointer as the address used in the 965 * bind with the transport. 966 * 967 * Use the same mode as in BSD. In particular this does 968 * not observe the umask. 969 */ 970 /* MAXPATHLEN + soun_family + nul termination */ 971 if (sti->sti_laddr_len > 972 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 973 error = ENAMETOOLONG; 974 eprintsoline(so, error); 975 goto done; 976 } 977 vattr.va_type = VSOCK; 978 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 979 vattr.va_mask = AT_TYPE|AT_MODE; 980 /* NOTE: holding so_lock */ 981 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 982 EXCL, 0, &vp, CRMKNOD, 0, 0); 983 if (error) { 984 if (error == EEXIST) 985 error = EADDRINUSE; 986 eprintsoline(so, error); 987 goto done; 988 } 989 /* 990 * Establish pointer from the underlying filesystem 991 * vnode to the socket node. 992 * sti_ux_bound_vp and v_stream->sd_vnode form the 993 * cross-linkage between the underlying filesystem 994 * node and the socket node. 995 */ 996 997 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 998 VN_HOLD(rvp); 999 VN_RELE(vp); 1000 vp = rvp; 1001 } 1002 1003 ASSERT(SOTOV(so)->v_stream); 1004 mutex_enter(&vp->v_lock); 1005 vp->v_stream = SOTOV(so)->v_stream; 1006 sti->sti_ux_bound_vp = vp; 1007 mutex_exit(&vp->v_lock); 1008 1009 /* 1010 * Use the vnode pointer value as a unique address 1011 * (together with the magic number to avoid conflicts 1012 * with implicit binds) in the transport provider. 1013 */ 1014 sti->sti_ux_laddr.soua_vp = 1015 (void *)sti->sti_ux_bound_vp; 1016 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1017 addr = &sti->sti_ux_laddr; 1018 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1019 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1020 addrlen, 1021 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1022 break; 1023 } 1024 } /* end switch (so->so_family) */ 1025 } 1026 1027 /* 1028 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1029 * the transport can start passing up T_CONN_IND messages 1030 * as soon as it receives the bind req and strsock_proto() 1031 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1032 */ 1033 if (flags & _SOBIND_LISTEN) { 1034 if ((so->so_state & SS_ACCEPTCONN) == 0) 1035 clear_acceptconn_on_err = B_TRUE; 1036 save_so_backlog = so->so_backlog; 1037 restore_backlog_on_err = B_TRUE; 1038 so->so_state |= SS_ACCEPTCONN; 1039 so->so_backlog = backlog; 1040 } 1041 1042 /* 1043 * If NL7C addr(s) have been configured check for addr/port match, 1044 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1045 * 1046 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1047 * family sockets only. If match mark as such. 1048 */ 1049 if (nl7c_enabled && ((addr != NULL && 1050 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1051 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1052 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1053 /* 1054 * NL7C is not supported in non-global zones, 1055 * we enforce this restriction here. 1056 */ 1057 if (so->so_zoneid == GLOBAL_ZONEID) { 1058 /* An NL7C socket, mark it */ 1059 sti->sti_nl7c_flags |= NL7C_ENABLED; 1060 if (nl7c == NULL) { 1061 /* 1062 * Was an AF_NCA bind() so add it to the 1063 * addr list for reporting purposes. 1064 */ 1065 nl7c = nl7c_add_addr(addr, addrlen); 1066 } 1067 } else 1068 nl7c = NULL; 1069 } 1070 1071 /* 1072 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1073 * for other transports we will send in a O_T_BIND_REQ. 1074 */ 1075 if (tcp_udp_xport && 1076 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1077 PRIM_type = T_BIND_REQ; 1078 1079 bind_req.PRIM_type = PRIM_type; 1080 bind_req.ADDR_length = addrlen; 1081 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1082 bind_req.CONIND_number = backlog; 1083 /* NOTE: holding so_lock while sleeping */ 1084 mp = soallocproto2(&bind_req, sizeof (bind_req), 1085 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1086 sti->sti_laddr_valid = 0; 1087 1088 /* Done using sti_laddr_sa - can drop the lock */ 1089 mutex_exit(&so->so_lock); 1090 1091 /* 1092 * Intercept the bind_req message here to check if this <address/port> 1093 * was configured as an SSL proxy server, or if another endpoint was 1094 * already configured to act as a proxy for us. 1095 * 1096 * Note, only if NL7C not enabled for this socket. 1097 */ 1098 if (nl7c == NULL && 1099 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1100 so->so_type == SOCK_STREAM) { 1101 1102 if (sti->sti_kssl_ent != NULL) { 1103 kssl_release_ent(sti->sti_kssl_ent, so, 1104 sti->sti_kssl_type); 1105 sti->sti_kssl_ent = NULL; 1106 } 1107 1108 sti->sti_kssl_type = kssl_check_proxy(mp, so, 1109 &sti->sti_kssl_ent); 1110 switch (sti->sti_kssl_type) { 1111 case KSSL_NO_PROXY: 1112 break; 1113 1114 case KSSL_HAS_PROXY: 1115 mutex_enter(&so->so_lock); 1116 goto skip_transport; 1117 1118 case KSSL_IS_PROXY: 1119 break; 1120 } 1121 } 1122 1123 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1124 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1125 if (error) { 1126 eprintsoline(so, error); 1127 mutex_enter(&so->so_lock); 1128 goto done; 1129 } 1130 1131 mutex_enter(&so->so_lock); 1132 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1133 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1134 if (error) { 1135 eprintsoline(so, error); 1136 goto done; 1137 } 1138 skip_transport: 1139 ASSERT(mp); 1140 /* 1141 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1142 * strsock_proto while the lock was dropped above, the bind 1143 * is allowed to complete. 1144 */ 1145 1146 /* Mark as bound. This will be undone if we detect errors below. */ 1147 if (flags & _SOBIND_NOXLATE) { 1148 ASSERT(so->so_family == AF_UNIX); 1149 sti->sti_faddr_noxlate = 1; 1150 } 1151 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1152 so->so_state |= SS_ISBOUND; 1153 ASSERT(sti->sti_unbind_mp); 1154 1155 /* note that we've already set SS_ACCEPTCONN above */ 1156 1157 /* 1158 * Recompute addrlen - an unspecied bind sent down an 1159 * address of length zero but we expect the appropriate length 1160 * in return. 1161 */ 1162 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1163 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1164 1165 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1166 /* 1167 * The alignment restriction is really too strict but 1168 * we want enough alignment to inspect the fields of 1169 * a sockaddr_in. 1170 */ 1171 addr = sogetoff(mp, bind_ack->ADDR_offset, 1172 bind_ack->ADDR_length, 1173 __TPI_ALIGN_SIZE); 1174 if (addr == NULL) { 1175 freemsg(mp); 1176 error = EPROTO; 1177 eprintsoline(so, error); 1178 goto done; 1179 } 1180 if (!(flags & _SOBIND_UNSPEC)) { 1181 /* 1182 * Verify that the transport didn't return something we 1183 * did not want e.g. an address other than what we asked for. 1184 * 1185 * NOTE: These checks would go away if/when we switch to 1186 * using the new TPI (in which the transport would fail 1187 * the request instead of assigning a different address). 1188 * 1189 * NOTE2: For protocols that we don't know (i.e. any 1190 * other than AF_INET6, AF_INET and AF_UNIX), we 1191 * cannot know if the transport should be expected to 1192 * return the same address as that requested. 1193 * 1194 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1195 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1196 * 1197 * For example, in the case of netatalk it may be 1198 * inappropriate for the transport to return the 1199 * requested address (as it may have allocated a local 1200 * port number in behaviour similar to that of an 1201 * AF_INET bind request with a port number of zero). 1202 * 1203 * Given the definition of O_T_BIND_REQ, where the 1204 * transport may bind to an address other than the 1205 * requested address, it's not possible to determine 1206 * whether a returned address that differs from the 1207 * requested address is a reason to fail (because the 1208 * requested address was not available) or succeed 1209 * (because the transport allocated an appropriate 1210 * address and/or port). 1211 * 1212 * sockfs currently requires that the transport return 1213 * the requested address in the T_BIND_ACK, unless 1214 * there is code here to allow for any discrepancy. 1215 * Such code exists for AF_INET and AF_INET6. 1216 * 1217 * Netatalk chooses to return the requested address 1218 * rather than the (correct) allocated address. This 1219 * means that netatalk violates the TPI specification 1220 * (and would not function correctly if used from a 1221 * TLI application), but it does mean that it works 1222 * with sockfs. 1223 * 1224 * As noted above, using the newer XTI bind primitive 1225 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1226 * allow sockfs to be more sure about whether or not 1227 * the bind request had succeeded (as transports are 1228 * not permitted to bind to a different address than 1229 * that requested - they must return failure). 1230 * Unfortunately, support for T_BIND_REQ may not be 1231 * present in all transport implementations (netatalk, 1232 * for example, doesn't have it), making the 1233 * transition difficult. 1234 */ 1235 if (bind_ack->ADDR_length != addrlen) { 1236 /* Assumes that the requested address was in use */ 1237 freemsg(mp); 1238 error = EADDRINUSE; 1239 eprintsoline(so, error); 1240 goto done; 1241 } 1242 1243 switch (so->so_family) { 1244 case AF_INET6: 1245 case AF_INET: { 1246 sin_t *rname, *aname; 1247 1248 rname = (sin_t *)addr; 1249 aname = (sin_t *)sti->sti_laddr_sa; 1250 1251 /* 1252 * Take advantage of the alignment 1253 * of sin_port and sin6_port which fall 1254 * in the same place in their data structures. 1255 * Just use sin_port for either address family. 1256 * 1257 * This may become a problem if (heaven forbid) 1258 * there's a separate ipv6port_reserved... :-P 1259 * 1260 * Binding to port 0 has the semantics of letting 1261 * the transport bind to any port. 1262 * 1263 * If the transport is TCP or UDP since we had sent 1264 * a T_BIND_REQ we would not get a port other than 1265 * what we asked for. 1266 */ 1267 if (tcp_udp_xport) { 1268 /* 1269 * Pick up the new port number if we bound to 1270 * port 0. 1271 */ 1272 if (aname->sin_port == 0) 1273 aname->sin_port = rname->sin_port; 1274 sti->sti_laddr_valid = 1; 1275 break; 1276 } 1277 if (aname->sin_port != 0 && 1278 aname->sin_port != rname->sin_port) { 1279 freemsg(mp); 1280 error = EADDRINUSE; 1281 eprintsoline(so, error); 1282 goto done; 1283 } 1284 /* 1285 * Pick up the new port number if we bound to port 0. 1286 */ 1287 aname->sin_port = rname->sin_port; 1288 1289 /* 1290 * Unfortunately, addresses aren't _quite_ the same. 1291 */ 1292 if (so->so_family == AF_INET) { 1293 if (aname->sin_addr.s_addr != 1294 rname->sin_addr.s_addr) { 1295 freemsg(mp); 1296 error = EADDRNOTAVAIL; 1297 eprintsoline(so, error); 1298 goto done; 1299 } 1300 } else { 1301 sin6_t *rname6 = (sin6_t *)rname; 1302 sin6_t *aname6 = (sin6_t *)aname; 1303 1304 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1305 &rname6->sin6_addr)) { 1306 freemsg(mp); 1307 error = EADDRNOTAVAIL; 1308 eprintsoline(so, error); 1309 goto done; 1310 } 1311 } 1312 break; 1313 } 1314 case AF_UNIX: 1315 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1316 freemsg(mp); 1317 error = EADDRINUSE; 1318 eprintsoline(so, error); 1319 eprintso(so, 1320 ("addrlen %d, addr 0x%x, vp %p\n", 1321 addrlen, *((int *)addr), 1322 (void *)sti->sti_ux_bound_vp)); 1323 goto done; 1324 } 1325 sti->sti_laddr_valid = 1; 1326 break; 1327 default: 1328 /* 1329 * NOTE: This assumes that addresses can be 1330 * byte-compared for equivalence. 1331 */ 1332 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1333 freemsg(mp); 1334 error = EADDRINUSE; 1335 eprintsoline(so, error); 1336 goto done; 1337 } 1338 /* 1339 * Don't mark sti_laddr_valid, as we cannot be 1340 * sure that the returned address is the real 1341 * bound address when talking to an unknown 1342 * transport. 1343 */ 1344 break; 1345 } 1346 } else { 1347 /* 1348 * Save for returned address for getsockname. 1349 * Needed for unspecific bind unless transport supports 1350 * the TI_GETMYNAME ioctl. 1351 * Do this for AF_INET{,6} even though they do, as 1352 * caching info here is much better performance than 1353 * a TPI/STREAMS trip to the transport for getsockname. 1354 * Any which can't for some reason _must_ _not_ set 1355 * sti_laddr_valid here for the caching version of 1356 * getsockname to not break; 1357 */ 1358 switch (so->so_family) { 1359 case AF_UNIX: 1360 /* 1361 * Record the address bound with the transport 1362 * for use by socketpair. 1363 */ 1364 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1365 sti->sti_laddr_valid = 1; 1366 break; 1367 case AF_INET: 1368 case AF_INET6: 1369 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1370 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1371 sti->sti_laddr_valid = 1; 1372 break; 1373 default: 1374 /* 1375 * Don't mark sti_laddr_valid, as we cannot be 1376 * sure that the returned address is the real 1377 * bound address when talking to an unknown 1378 * transport. 1379 */ 1380 break; 1381 } 1382 } 1383 1384 if (nl7c != NULL) { 1385 /* Register listen()er sonode pointer with NL7C */ 1386 nl7c_listener_addr(nl7c, so); 1387 } 1388 1389 freemsg(mp); 1390 1391 done: 1392 if (error) { 1393 /* reset state & backlog to values held on entry */ 1394 if (clear_acceptconn_on_err == B_TRUE) 1395 so->so_state &= ~SS_ACCEPTCONN; 1396 if (restore_backlog_on_err == B_TRUE) 1397 so->so_backlog = save_so_backlog; 1398 1399 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1400 int err; 1401 1402 err = sotpi_unbind(so, 0); 1403 /* LINTED - statement has no consequent: if */ 1404 if (err) { 1405 eprintsoline(so, error); 1406 } else { 1407 ASSERT(!(so->so_state & SS_ISBOUND)); 1408 } 1409 } 1410 } 1411 if (!(flags & _SOBIND_LOCK_HELD)) { 1412 so_unlock_single(so, SOLOCKED); 1413 mutex_exit(&so->so_lock); 1414 } else { 1415 ASSERT(MUTEX_HELD(&so->so_lock)); 1416 ASSERT(so->so_flag & SOLOCKED); 1417 } 1418 return (error); 1419 } 1420 1421 /* bind the socket */ 1422 static int 1423 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1424 int flags, struct cred *cr) 1425 { 1426 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1427 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1428 1429 flags &= ~_SOBIND_SOCKETPAIR; 1430 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1431 } 1432 1433 /* 1434 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1435 * address, or when listen needs to unbind and bind. 1436 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1437 * so that a sobind can pick them up. 1438 */ 1439 static int 1440 sotpi_unbind(struct sonode *so, int flags) 1441 { 1442 struct T_unbind_req unbind_req; 1443 int error = 0; 1444 mblk_t *mp; 1445 sotpi_info_t *sti = SOTOTPI(so); 1446 1447 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1448 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1449 1450 ASSERT(MUTEX_HELD(&so->so_lock)); 1451 ASSERT(so->so_flag & SOLOCKED); 1452 1453 if (!(so->so_state & SS_ISBOUND)) { 1454 error = EINVAL; 1455 eprintsoline(so, error); 1456 goto done; 1457 } 1458 1459 mutex_exit(&so->so_lock); 1460 1461 /* 1462 * Flush the read and write side (except stream head read queue) 1463 * and send down T_UNBIND_REQ. 1464 */ 1465 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1466 1467 unbind_req.PRIM_type = T_UNBIND_REQ; 1468 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1469 0, _ALLOC_SLEEP, CRED()); 1470 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1471 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1472 mutex_enter(&so->so_lock); 1473 if (error) { 1474 eprintsoline(so, error); 1475 goto done; 1476 } 1477 1478 error = sowaitokack(so, T_UNBIND_REQ); 1479 if (error) { 1480 eprintsoline(so, error); 1481 goto done; 1482 } 1483 1484 /* 1485 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1486 * strsock_proto while the lock was dropped above, the unbind 1487 * is allowed to complete. 1488 */ 1489 if (!(flags & _SOUNBIND_REBIND)) { 1490 /* 1491 * Clear out bound address. 1492 */ 1493 vnode_t *vp; 1494 1495 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1496 1497 /* Undo any SSL proxy setup */ 1498 if ((so->so_family == AF_INET || 1499 so->so_family == AF_INET6) && 1500 (so->so_type == SOCK_STREAM) && 1501 (sti->sti_kssl_ent != NULL)) { 1502 kssl_release_ent(sti->sti_kssl_ent, so, 1503 sti->sti_kssl_type); 1504 sti->sti_kssl_ent = NULL; 1505 sti->sti_kssl_type = KSSL_NO_PROXY; 1506 } 1507 sti->sti_ux_bound_vp = NULL; 1508 vn_rele_stream(vp); 1509 } 1510 /* Clear out address */ 1511 sti->sti_laddr_len = 0; 1512 } 1513 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1514 sti->sti_laddr_valid = 0; 1515 1516 done: 1517 1518 /* If the caller held the lock don't release it here */ 1519 ASSERT(MUTEX_HELD(&so->so_lock)); 1520 ASSERT(so->so_flag & SOLOCKED); 1521 1522 return (error); 1523 } 1524 1525 /* 1526 * listen on the socket. 1527 * For TPI conforming transports this has to first unbind with the transport 1528 * and then bind again using the new backlog. 1529 */ 1530 /* ARGSUSED */ 1531 int 1532 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1533 { 1534 int error = 0; 1535 sotpi_info_t *sti = SOTOTPI(so); 1536 1537 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1538 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1539 1540 if (sti->sti_serv_type == T_CLTS) 1541 return (EOPNOTSUPP); 1542 1543 /* 1544 * If the socket is ready to accept connections already, then 1545 * return without doing anything. This avoids a problem where 1546 * a second listen() call fails if a connection is pending and 1547 * leaves the socket unbound. Only when we are not unbinding 1548 * with the transport can we safely increase the backlog. 1549 */ 1550 if (so->so_state & SS_ACCEPTCONN && 1551 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1552 /*CONSTCOND*/ 1553 !solisten_tpi_tcp)) 1554 return (0); 1555 1556 if (so->so_state & SS_ISCONNECTED) 1557 return (EINVAL); 1558 1559 mutex_enter(&so->so_lock); 1560 so_lock_single(so); /* Set SOLOCKED */ 1561 1562 /* 1563 * If the listen doesn't change the backlog we do nothing. 1564 * This avoids an EPROTO error from the transport. 1565 */ 1566 if ((so->so_state & SS_ACCEPTCONN) && 1567 so->so_backlog == backlog) 1568 goto done; 1569 1570 if (!(so->so_state & SS_ISBOUND)) { 1571 /* 1572 * Must have been explicitly bound in the UNIX domain. 1573 */ 1574 if (so->so_family == AF_UNIX) { 1575 error = EINVAL; 1576 goto done; 1577 } 1578 error = sotpi_bindlisten(so, NULL, 0, backlog, 1579 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1580 } else if (backlog > 0) { 1581 /* 1582 * AF_INET{,6} hack to avoid losing the port. 1583 * Assumes that all AF_INET{,6} transports can handle a 1584 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1585 * has already bound thus it is possible to avoid the unbind. 1586 */ 1587 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1588 /*CONSTCOND*/ 1589 !solisten_tpi_tcp)) { 1590 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1591 if (error) 1592 goto done; 1593 } 1594 error = sotpi_bindlisten(so, NULL, 0, backlog, 1595 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1596 } else { 1597 so->so_state |= SS_ACCEPTCONN; 1598 so->so_backlog = backlog; 1599 } 1600 if (error) 1601 goto done; 1602 ASSERT(so->so_state & SS_ACCEPTCONN); 1603 done: 1604 so_unlock_single(so, SOLOCKED); 1605 mutex_exit(&so->so_lock); 1606 return (error); 1607 } 1608 1609 /* 1610 * Disconnect either a specified seqno or all (-1). 1611 * The former is used on listening sockets only. 1612 * 1613 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1614 * the current use of sodisconnect(seqno == -1) is only for shutdown 1615 * so there is no point (and potentially incorrect) to unbind. 1616 */ 1617 static int 1618 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1619 { 1620 struct T_discon_req discon_req; 1621 int error = 0; 1622 mblk_t *mp; 1623 1624 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1625 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1626 1627 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1628 mutex_enter(&so->so_lock); 1629 so_lock_single(so); /* Set SOLOCKED */ 1630 } else { 1631 ASSERT(MUTEX_HELD(&so->so_lock)); 1632 ASSERT(so->so_flag & SOLOCKED); 1633 } 1634 1635 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1636 error = EINVAL; 1637 eprintsoline(so, error); 1638 goto done; 1639 } 1640 1641 mutex_exit(&so->so_lock); 1642 /* 1643 * Flush the write side (unless this is a listener) 1644 * and then send down a T_DISCON_REQ. 1645 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1646 * and other messages.) 1647 */ 1648 if (!(so->so_state & SS_ACCEPTCONN)) 1649 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1650 1651 discon_req.PRIM_type = T_DISCON_REQ; 1652 discon_req.SEQ_number = seqno; 1653 mp = soallocproto1(&discon_req, sizeof (discon_req), 1654 0, _ALLOC_SLEEP, CRED()); 1655 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1656 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1657 mutex_enter(&so->so_lock); 1658 if (error) { 1659 eprintsoline(so, error); 1660 goto done; 1661 } 1662 1663 error = sowaitokack(so, T_DISCON_REQ); 1664 if (error) { 1665 eprintsoline(so, error); 1666 goto done; 1667 } 1668 /* 1669 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1670 * strsock_proto while the lock was dropped above, the disconnect 1671 * is allowed to complete. However, it is not possible to 1672 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1673 */ 1674 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1675 SOTOTPI(so)->sti_laddr_valid = 0; 1676 SOTOTPI(so)->sti_faddr_valid = 0; 1677 done: 1678 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1679 so_unlock_single(so, SOLOCKED); 1680 mutex_exit(&so->so_lock); 1681 } else { 1682 /* If the caller held the lock don't release it here */ 1683 ASSERT(MUTEX_HELD(&so->so_lock)); 1684 ASSERT(so->so_flag & SOLOCKED); 1685 } 1686 return (error); 1687 } 1688 1689 /* ARGSUSED */ 1690 int 1691 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1692 struct sonode **nsop) 1693 { 1694 struct T_conn_ind *conn_ind; 1695 struct T_conn_res *conn_res; 1696 int error = 0; 1697 mblk_t *mp, *ctxmp, *ack_mp; 1698 struct sonode *nso; 1699 vnode_t *nvp; 1700 void *src; 1701 t_uscalar_t srclen; 1702 void *opt; 1703 t_uscalar_t optlen; 1704 t_scalar_t PRIM_type; 1705 t_scalar_t SEQ_number; 1706 size_t sinlen; 1707 sotpi_info_t *sti = SOTOTPI(so); 1708 sotpi_info_t *nsti; 1709 1710 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1711 (void *)so, fflag, (void *)nsop, 1712 pr_state(so->so_state, so->so_mode))); 1713 1714 /* 1715 * Defer single-threading the accepting socket until 1716 * the T_CONN_IND has been received and parsed and the 1717 * new sonode has been opened. 1718 */ 1719 1720 /* Check that we are not already connected */ 1721 if ((so->so_state & SS_ACCEPTCONN) == 0) 1722 goto conn_bad; 1723 again: 1724 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1725 goto e_bad; 1726 1727 ASSERT(mp != NULL); 1728 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1729 ctxmp = mp->b_cont; 1730 1731 /* 1732 * Save SEQ_number for error paths. 1733 */ 1734 SEQ_number = conn_ind->SEQ_number; 1735 1736 srclen = conn_ind->SRC_length; 1737 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1738 if (src == NULL) { 1739 error = EPROTO; 1740 freemsg(mp); 1741 eprintsoline(so, error); 1742 goto disconnect_unlocked; 1743 } 1744 optlen = conn_ind->OPT_length; 1745 switch (so->so_family) { 1746 case AF_INET: 1747 case AF_INET6: 1748 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1749 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1750 &opt, conn_ind->OPT_length); 1751 } else { 1752 /* 1753 * The transport (in this case TCP) hasn't sent up 1754 * a pointer to an instance for the accept fast-path. 1755 * Disable fast-path completely because the call to 1756 * sotpi_create() below would otherwise create an 1757 * incomplete TCP instance, which would lead to 1758 * problems when sockfs sends a normal T_CONN_RES 1759 * message down the new stream. 1760 */ 1761 if (sti->sti_direct) { 1762 int rval; 1763 /* 1764 * For consistency we inform tcp to disable 1765 * direct interface on the listener, though 1766 * we can certainly live without doing this 1767 * because no data will ever travel upstream 1768 * on the listening socket. 1769 */ 1770 sti->sti_direct = 0; 1771 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1772 0, 0, K_TO_K, cr, &rval); 1773 } 1774 opt = NULL; 1775 optlen = 0; 1776 } 1777 break; 1778 case AF_UNIX: 1779 default: 1780 if (optlen != 0) { 1781 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1782 __TPI_ALIGN_SIZE); 1783 if (opt == NULL) { 1784 error = EPROTO; 1785 freemsg(mp); 1786 eprintsoline(so, error); 1787 goto disconnect_unlocked; 1788 } 1789 } 1790 if (so->so_family == AF_UNIX) { 1791 if (!sti->sti_faddr_noxlate) { 1792 src = NULL; 1793 srclen = 0; 1794 } 1795 /* Extract src address from options */ 1796 if (optlen != 0) 1797 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1798 } 1799 break; 1800 } 1801 1802 /* 1803 * Create the new socket. 1804 */ 1805 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1806 if (nso == NULL) { 1807 ASSERT(error != 0); 1808 /* 1809 * Accept can not fail with ENOBUFS. sotpi_create 1810 * sleeps waiting for memory until a signal is caught 1811 * so return EINTR. 1812 */ 1813 freemsg(mp); 1814 if (error == ENOBUFS) 1815 error = EINTR; 1816 goto e_disc_unl; 1817 } 1818 nvp = SOTOV(nso); 1819 nsti = SOTOTPI(nso); 1820 1821 /* 1822 * If the transport sent up an SSL connection context, then attach 1823 * it the new socket, and set the (sd_wputdatafunc)() and 1824 * (sd_rputdatafunc)() stream head hooks to intercept and process 1825 * SSL records. 1826 */ 1827 if (ctxmp != NULL) { 1828 /* 1829 * This kssl_ctx_t is already held for us by the transport. 1830 * So, we don't need to do a kssl_hold_ctx() here. 1831 */ 1832 nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1833 freemsg(ctxmp); 1834 mp->b_cont = NULL; 1835 strsetrwputdatahooks(nvp, strsock_kssl_input, 1836 strsock_kssl_output); 1837 } 1838 #ifdef DEBUG 1839 /* 1840 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1841 * it's inherited early to allow debugging of the accept code itself. 1842 */ 1843 nso->so_options |= so->so_options & SO_DEBUG; 1844 #endif /* DEBUG */ 1845 1846 /* 1847 * Save the SRC address from the T_CONN_IND 1848 * for getpeername to work on AF_UNIX and on transports that do not 1849 * support TI_GETPEERNAME. 1850 * 1851 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1852 * copyin_name(). 1853 */ 1854 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1855 error = EINVAL; 1856 freemsg(mp); 1857 eprintsoline(so, error); 1858 goto disconnect_vp_unlocked; 1859 } 1860 nsti->sti_faddr_len = (socklen_t)srclen; 1861 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1862 bcopy(src, nsti->sti_faddr_sa, srclen); 1863 nsti->sti_faddr_valid = 1; 1864 1865 /* 1866 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1867 */ 1868 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1869 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1870 cred_t *cr; 1871 pid_t cpid; 1872 1873 cr = msg_getcred(mp, &cpid); 1874 if (cr != NULL) { 1875 crhold(cr); 1876 nso->so_peercred = cr; 1877 nso->so_cpid = cpid; 1878 } 1879 freemsg(mp); 1880 1881 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1882 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1883 if (mp == NULL) { 1884 /* 1885 * Accept can not fail with ENOBUFS. 1886 * A signal was caught so return EINTR. 1887 */ 1888 error = EINTR; 1889 eprintsoline(so, error); 1890 goto disconnect_vp_unlocked; 1891 } 1892 conn_res = (struct T_conn_res *)mp->b_rptr; 1893 } else { 1894 /* 1895 * For efficency reasons we use msg_extractcred; no crhold 1896 * needed since db_credp is cleared (i.e., we move the cred 1897 * from the message to so_peercred. 1898 */ 1899 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1900 1901 mp->b_rptr = DB_BASE(mp); 1902 conn_res = (struct T_conn_res *)mp->b_rptr; 1903 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1904 1905 mblk_setcred(mp, cr, curproc->p_pid); 1906 } 1907 1908 /* 1909 * New socket must be bound at least in sockfs and, except for AF_INET, 1910 * (or AF_INET6) it also has to be bound in the transport provider. 1911 * We set the local address in the sonode from the T_OK_ACK of the 1912 * T_CONN_RES. For this reason the address we bind to here isn't 1913 * important. 1914 */ 1915 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1916 /*CONSTCOND*/ 1917 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1918 /* 1919 * Optimization for AF_INET{,6} transports 1920 * that can handle a T_CONN_RES without being bound. 1921 */ 1922 mutex_enter(&nso->so_lock); 1923 so_automatic_bind(nso); 1924 mutex_exit(&nso->so_lock); 1925 } else { 1926 /* Perform NULL bind with the transport provider. */ 1927 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1928 cr)) != 0) { 1929 ASSERT(error != ENOBUFS); 1930 freemsg(mp); 1931 eprintsoline(nso, error); 1932 goto disconnect_vp_unlocked; 1933 } 1934 } 1935 1936 /* 1937 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1938 * so that any data arriving on the new socket will cause the 1939 * appropriate signals to be delivered for the new socket. 1940 * 1941 * No other thread (except strsock_proto and strsock_misc) 1942 * can access the new socket thus we relax the locking. 1943 */ 1944 nso->so_pgrp = so->so_pgrp; 1945 nso->so_state |= so->so_state & SS_ASYNC; 1946 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1947 1948 if (nso->so_pgrp != 0) { 1949 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1950 eprintsoline(nso, error); 1951 error = 0; 1952 nso->so_pgrp = 0; 1953 } 1954 } 1955 1956 /* 1957 * Make note of the socket level options. TCP and IP level options 1958 * are already inherited. We could do all this after accept is 1959 * successful but doing it here simplifies code and no harm done 1960 * for error case. 1961 */ 1962 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1963 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1964 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1965 nso->so_sndbuf = so->so_sndbuf; 1966 nso->so_rcvbuf = so->so_rcvbuf; 1967 if (nso->so_options & SO_LINGER) 1968 nso->so_linger = so->so_linger; 1969 1970 /* 1971 * Note that the following sti_direct code path should be 1972 * removed once we are confident that the direct sockets 1973 * do not result in any degradation. 1974 */ 1975 if (sti->sti_direct) { 1976 1977 ASSERT(opt != NULL); 1978 1979 conn_res->OPT_length = optlen; 1980 conn_res->OPT_offset = MBLKL(mp); 1981 bcopy(&opt, mp->b_wptr, optlen); 1982 mp->b_wptr += optlen; 1983 conn_res->PRIM_type = T_CONN_RES; 1984 conn_res->ACCEPTOR_id = 0; 1985 PRIM_type = T_CONN_RES; 1986 1987 /* Send down the T_CONN_RES on acceptor STREAM */ 1988 error = kstrputmsg(SOTOV(nso), mp, NULL, 1989 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1990 if (error) { 1991 mutex_enter(&so->so_lock); 1992 so_lock_single(so); 1993 eprintsoline(so, error); 1994 goto disconnect_vp; 1995 } 1996 mutex_enter(&nso->so_lock); 1997 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1998 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1999 if (error) { 2000 mutex_exit(&nso->so_lock); 2001 mutex_enter(&so->so_lock); 2002 so_lock_single(so); 2003 eprintsoline(so, error); 2004 goto disconnect_vp; 2005 } 2006 if (nso->so_family == AF_INET) { 2007 sin_t *sin; 2008 2009 sin = (sin_t *)(ack_mp->b_rptr + 2010 sizeof (struct T_ok_ack)); 2011 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 2012 nsti->sti_laddr_len = sizeof (sin_t); 2013 } else { 2014 sin6_t *sin6; 2015 2016 sin6 = (sin6_t *)(ack_mp->b_rptr + 2017 sizeof (struct T_ok_ack)); 2018 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 2019 nsti->sti_laddr_len = sizeof (sin6_t); 2020 } 2021 freemsg(ack_mp); 2022 2023 nso->so_state |= SS_ISCONNECTED; 2024 nso->so_proto_handle = (sock_lower_handle_t)opt; 2025 nsti->sti_laddr_valid = 1; 2026 2027 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 2028 /* 2029 * A NL7C marked listen()er so the new socket 2030 * inherits the listen()er's NL7C state, except 2031 * for NL7C_POLLIN. 2032 * 2033 * Only call NL7C to process the new socket if 2034 * the listen socket allows blocking i/o. 2035 */ 2036 nsti->sti_nl7c_flags = 2037 sti->sti_nl7c_flags & (~NL7C_POLLIN); 2038 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 2039 /* 2040 * Nonblocking accept() just make it 2041 * persist to defer processing to the 2042 * read-side syscall (e.g. read). 2043 */ 2044 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 2045 } else if (nl7c_process(nso, B_FALSE)) { 2046 /* 2047 * NL7C has completed processing on the 2048 * socket, close the socket and back to 2049 * the top to await the next T_CONN_IND. 2050 */ 2051 mutex_exit(&nso->so_lock); 2052 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 2053 cr, NULL); 2054 VN_RELE(nvp); 2055 goto again; 2056 } 2057 /* Pass the new socket out */ 2058 } 2059 2060 mutex_exit(&nso->so_lock); 2061 2062 /* 2063 * It's possible, through the use of autopush for example, 2064 * that the acceptor stream may not support sti_direct 2065 * semantics. If the new socket does not support sti_direct 2066 * we issue a _SIOCSOCKFALLBACK to inform the transport 2067 * as we would in the I_PUSH case. 2068 */ 2069 if (nsti->sti_direct == 0) { 2070 int rval; 2071 2072 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2073 0, 0, K_TO_K, cr, &rval)) != 0) { 2074 mutex_enter(&so->so_lock); 2075 so_lock_single(so); 2076 eprintsoline(so, error); 2077 goto disconnect_vp; 2078 } 2079 } 2080 2081 /* 2082 * Pass out new socket. 2083 */ 2084 if (nsop != NULL) 2085 *nsop = nso; 2086 2087 return (0); 2088 } 2089 2090 /* 2091 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2092 * which don't support the FireEngine accept fast-path. It is also 2093 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2094 * again. Neither sockfs nor TCP attempt to find out if some other 2095 * random module has been inserted in between (in which case we 2096 * should follow TLI accept behaviour). We blindly assume the worst 2097 * case and revert back to old behaviour i.e. TCP will not send us 2098 * any option (eager) and the accept should happen on the listener 2099 * queue. Any queued T_conn_ind have already got their options removed 2100 * by so_sock2_stream() when "sockmod" was I_POP'd. 2101 */ 2102 /* 2103 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2104 */ 2105 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2106 #ifdef _ILP32 2107 queue_t *q; 2108 2109 /* 2110 * Find read queue in driver 2111 * Can safely do this since we "own" nso/nvp. 2112 */ 2113 q = strvp2wq(nvp)->q_next; 2114 while (SAMESTR(q)) 2115 q = q->q_next; 2116 q = RD(q); 2117 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2118 #else 2119 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2120 #endif /* _ILP32 */ 2121 conn_res->PRIM_type = O_T_CONN_RES; 2122 PRIM_type = O_T_CONN_RES; 2123 } else { 2124 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2125 conn_res->PRIM_type = T_CONN_RES; 2126 PRIM_type = T_CONN_RES; 2127 } 2128 conn_res->SEQ_number = SEQ_number; 2129 conn_res->OPT_length = 0; 2130 conn_res->OPT_offset = 0; 2131 2132 mutex_enter(&so->so_lock); 2133 so_lock_single(so); /* Set SOLOCKED */ 2134 mutex_exit(&so->so_lock); 2135 2136 error = kstrputmsg(SOTOV(so), mp, NULL, 2137 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2138 mutex_enter(&so->so_lock); 2139 if (error) { 2140 eprintsoline(so, error); 2141 goto disconnect_vp; 2142 } 2143 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2144 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2145 if (error) { 2146 eprintsoline(so, error); 2147 goto disconnect_vp; 2148 } 2149 /* 2150 * If there is a sin/sin6 appended onto the T_OK_ACK use 2151 * that to set the local address. If this is not present 2152 * then we zero out the address and don't set the 2153 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2154 * the pathname from the listening socket. 2155 */ 2156 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2157 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2158 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2159 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2160 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2161 nsti->sti_laddr_len = sinlen; 2162 nsti->sti_laddr_valid = 1; 2163 } else if (nso->so_family == AF_UNIX) { 2164 ASSERT(so->so_family == AF_UNIX); 2165 nsti->sti_laddr_len = sti->sti_laddr_len; 2166 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2167 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2168 nsti->sti_laddr_len); 2169 nsti->sti_laddr_valid = 1; 2170 } else { 2171 nsti->sti_laddr_len = sti->sti_laddr_len; 2172 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2173 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2174 nsti->sti_laddr_sa->sa_family = nso->so_family; 2175 } 2176 freemsg(ack_mp); 2177 2178 so_unlock_single(so, SOLOCKED); 2179 mutex_exit(&so->so_lock); 2180 2181 nso->so_state |= SS_ISCONNECTED; 2182 2183 /* 2184 * Pass out new socket. 2185 */ 2186 if (nsop != NULL) 2187 *nsop = nso; 2188 2189 return (0); 2190 2191 2192 eproto_disc_unl: 2193 error = EPROTO; 2194 e_disc_unl: 2195 eprintsoline(so, error); 2196 goto disconnect_unlocked; 2197 2198 pr_disc_vp_unl: 2199 eprintsoline(so, error); 2200 disconnect_vp_unlocked: 2201 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2202 VN_RELE(nvp); 2203 disconnect_unlocked: 2204 (void) sodisconnect(so, SEQ_number, 0); 2205 return (error); 2206 2207 pr_disc_vp: 2208 eprintsoline(so, error); 2209 disconnect_vp: 2210 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2211 so_unlock_single(so, SOLOCKED); 2212 mutex_exit(&so->so_lock); 2213 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2214 VN_RELE(nvp); 2215 return (error); 2216 2217 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2218 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2219 ? EOPNOTSUPP : EINVAL; 2220 e_bad: 2221 eprintsoline(so, error); 2222 return (error); 2223 } 2224 2225 /* 2226 * connect a socket. 2227 * 2228 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2229 * unconnect (by specifying a null address). 2230 */ 2231 int 2232 sotpi_connect(struct sonode *so, 2233 struct sockaddr *name, 2234 socklen_t namelen, 2235 int fflag, 2236 int flags, 2237 struct cred *cr) 2238 { 2239 struct T_conn_req conn_req; 2240 int error = 0; 2241 mblk_t *mp; 2242 void *src; 2243 socklen_t srclen; 2244 void *addr; 2245 socklen_t addrlen; 2246 boolean_t need_unlock; 2247 sotpi_info_t *sti = SOTOTPI(so); 2248 2249 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2250 (void *)so, (void *)name, namelen, fflag, flags, 2251 pr_state(so->so_state, so->so_mode))); 2252 2253 /* 2254 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2255 * avoid sleeping for memory with SOLOCKED held. 2256 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2257 * + sizeof (struct T_opthdr). 2258 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2259 * exceed sti_faddr_maxlen). 2260 */ 2261 mp = soallocproto(sizeof (struct T_conn_req) + 2262 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2263 cr); 2264 if (mp == NULL) { 2265 /* 2266 * Connect can not fail with ENOBUFS. A signal was 2267 * caught so return EINTR. 2268 */ 2269 error = EINTR; 2270 eprintsoline(so, error); 2271 return (error); 2272 } 2273 2274 mutex_enter(&so->so_lock); 2275 /* 2276 * Make sure there is a preallocated T_unbind_req message 2277 * before any binding. This message is allocated when the 2278 * socket is created. Since another thread can consume 2279 * so_unbind_mp by the time we return from so_lock_single(), 2280 * we should check the availability of so_unbind_mp after 2281 * we return from so_lock_single(). 2282 */ 2283 2284 so_lock_single(so); /* Set SOLOCKED */ 2285 need_unlock = B_TRUE; 2286 2287 if (sti->sti_unbind_mp == NULL) { 2288 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2289 /* NOTE: holding so_lock while sleeping */ 2290 sti->sti_unbind_mp = 2291 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2292 if (sti->sti_unbind_mp == NULL) { 2293 error = EINTR; 2294 goto done; 2295 } 2296 } 2297 2298 /* 2299 * Can't have done a listen before connecting. 2300 */ 2301 if (so->so_state & SS_ACCEPTCONN) { 2302 error = EOPNOTSUPP; 2303 goto done; 2304 } 2305 2306 /* 2307 * Must be bound with the transport 2308 */ 2309 if (!(so->so_state & SS_ISBOUND)) { 2310 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2311 /*CONSTCOND*/ 2312 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2313 /* 2314 * Optimization for AF_INET{,6} transports 2315 * that can handle a T_CONN_REQ without being bound. 2316 */ 2317 so_automatic_bind(so); 2318 } else { 2319 error = sotpi_bind(so, NULL, 0, 2320 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2321 if (error) 2322 goto done; 2323 } 2324 ASSERT(so->so_state & SS_ISBOUND); 2325 flags |= _SOCONNECT_DID_BIND; 2326 } 2327 2328 /* 2329 * Handle a connect to a name parameter of type AF_UNSPEC like a 2330 * connect to a null address. This is the portable method to 2331 * unconnect a socket. 2332 */ 2333 if ((namelen >= sizeof (sa_family_t)) && 2334 (name->sa_family == AF_UNSPEC)) { 2335 name = NULL; 2336 namelen = 0; 2337 } 2338 2339 /* 2340 * Check that we are not already connected. 2341 * A connection-oriented socket cannot be reconnected. 2342 * A connected connection-less socket can be 2343 * - connected to a different address by a subsequent connect 2344 * - "unconnected" by a connect to the NULL address 2345 */ 2346 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2347 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2348 if (so->so_mode & SM_CONNREQUIRED) { 2349 /* Connection-oriented socket */ 2350 error = so->so_state & SS_ISCONNECTED ? 2351 EISCONN : EALREADY; 2352 goto done; 2353 } 2354 /* Connection-less socket */ 2355 if (name == NULL) { 2356 /* 2357 * Remove the connected state and clear SO_DGRAM_ERRIND 2358 * since it was set when the socket was connected. 2359 * If this is UDP also send down a T_DISCON_REQ. 2360 */ 2361 int val; 2362 2363 if ((so->so_family == AF_INET || 2364 so->so_family == AF_INET6) && 2365 (so->so_type == SOCK_DGRAM || 2366 so->so_type == SOCK_RAW) && 2367 /*CONSTCOND*/ 2368 !soconnect_tpi_udp) { 2369 /* XXX What about implicitly unbinding here? */ 2370 error = sodisconnect(so, -1, 2371 _SODISCONNECT_LOCK_HELD); 2372 } else { 2373 so->so_state &= 2374 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2375 sti->sti_faddr_valid = 0; 2376 sti->sti_faddr_len = 0; 2377 } 2378 2379 /* Remove SOLOCKED since setsockopt will grab it */ 2380 so_unlock_single(so, SOLOCKED); 2381 mutex_exit(&so->so_lock); 2382 2383 val = 0; 2384 (void) sotpi_setsockopt(so, SOL_SOCKET, 2385 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2386 cr); 2387 2388 mutex_enter(&so->so_lock); 2389 so_lock_single(so); /* Set SOLOCKED */ 2390 goto done; 2391 } 2392 } 2393 ASSERT(so->so_state & SS_ISBOUND); 2394 2395 if (name == NULL || namelen == 0) { 2396 error = EINVAL; 2397 goto done; 2398 } 2399 /* 2400 * Mark the socket if sti_faddr_sa represents the transport level 2401 * address. 2402 */ 2403 if (flags & _SOCONNECT_NOXLATE) { 2404 struct sockaddr_ux *soaddr_ux; 2405 2406 ASSERT(so->so_family == AF_UNIX); 2407 if (namelen != sizeof (struct sockaddr_ux)) { 2408 error = EINVAL; 2409 goto done; 2410 } 2411 soaddr_ux = (struct sockaddr_ux *)name; 2412 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2413 namelen = sizeof (soaddr_ux->sou_addr); 2414 sti->sti_faddr_noxlate = 1; 2415 } 2416 2417 /* 2418 * Length and family checks. 2419 */ 2420 error = so_addr_verify(so, name, namelen); 2421 if (error) 2422 goto bad; 2423 2424 /* 2425 * Save foreign address. Needed for AF_UNIX as well as 2426 * transport providers that do not support TI_GETPEERNAME. 2427 * Also used for cached foreign address for TCP and UDP. 2428 */ 2429 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2430 error = EINVAL; 2431 goto done; 2432 } 2433 sti->sti_faddr_len = (socklen_t)namelen; 2434 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2435 bcopy(name, sti->sti_faddr_sa, namelen); 2436 sti->sti_faddr_valid = 1; 2437 2438 if (so->so_family == AF_UNIX) { 2439 if (sti->sti_faddr_noxlate) { 2440 /* 2441 * Already have a transport internal address. Do not 2442 * pass any (transport internal) source address. 2443 */ 2444 addr = sti->sti_faddr_sa; 2445 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2446 src = NULL; 2447 srclen = 0; 2448 } else { 2449 /* 2450 * Pass the sockaddr_un source address as an option 2451 * and translate the remote address. 2452 * Holding so_lock thus sti_laddr_sa can not change. 2453 */ 2454 src = sti->sti_laddr_sa; 2455 srclen = (t_uscalar_t)sti->sti_laddr_len; 2456 dprintso(so, 1, 2457 ("sotpi_connect UNIX: srclen %d, src %p\n", 2458 srclen, src)); 2459 error = so_ux_addr_xlate(so, 2460 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2461 (flags & _SOCONNECT_XPG4_2), 2462 &addr, &addrlen); 2463 if (error) 2464 goto bad; 2465 } 2466 } else { 2467 addr = sti->sti_faddr_sa; 2468 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2469 src = NULL; 2470 srclen = 0; 2471 } 2472 /* 2473 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2474 * option which asks the transport provider to send T_UDERR_IND 2475 * messages. These T_UDERR_IND messages are used to return connected 2476 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2477 * 2478 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2479 * we send down a T_CONN_REQ. This is needed to let the 2480 * transport assign a local address that is consistent with 2481 * the remote address. Applications depend on a getsockname() 2482 * after a connect() to retrieve the "source" IP address for 2483 * the connected socket. Invalidate the cached local address 2484 * to force getsockname() to enquire of the transport. 2485 */ 2486 if (!(so->so_mode & SM_CONNREQUIRED)) { 2487 /* 2488 * Datagram socket. 2489 */ 2490 int32_t val; 2491 2492 so_unlock_single(so, SOLOCKED); 2493 mutex_exit(&so->so_lock); 2494 2495 val = 1; 2496 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2497 &val, (t_uscalar_t)sizeof (val), cr); 2498 2499 mutex_enter(&so->so_lock); 2500 so_lock_single(so); /* Set SOLOCKED */ 2501 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2502 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2503 soconnect_tpi_udp) { 2504 soisconnected(so); 2505 goto done; 2506 } 2507 /* 2508 * Send down T_CONN_REQ etc. 2509 * Clear fflag to avoid returning EWOULDBLOCK. 2510 */ 2511 fflag = 0; 2512 ASSERT(so->so_family != AF_UNIX); 2513 sti->sti_laddr_valid = 0; 2514 } else if (sti->sti_laddr_len != 0) { 2515 /* 2516 * If the local address or port was "any" then it may be 2517 * changed by the transport as a result of the 2518 * connect. Invalidate the cached version if we have one. 2519 */ 2520 switch (so->so_family) { 2521 case AF_INET: 2522 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2523 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2524 INADDR_ANY || 2525 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2526 sti->sti_laddr_valid = 0; 2527 break; 2528 2529 case AF_INET6: 2530 ASSERT(sti->sti_laddr_len == 2531 (socklen_t)sizeof (sin6_t)); 2532 if (IN6_IS_ADDR_UNSPECIFIED( 2533 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2534 IN6_IS_ADDR_V4MAPPED_ANY( 2535 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2536 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2537 sti->sti_laddr_valid = 0; 2538 break; 2539 2540 default: 2541 break; 2542 } 2543 } 2544 2545 /* 2546 * Check for failure of an earlier call 2547 */ 2548 if (so->so_error != 0) 2549 goto so_bad; 2550 2551 /* 2552 * Send down T_CONN_REQ. Message was allocated above. 2553 */ 2554 conn_req.PRIM_type = T_CONN_REQ; 2555 conn_req.DEST_length = addrlen; 2556 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2557 if (srclen == 0) { 2558 conn_req.OPT_length = 0; 2559 conn_req.OPT_offset = 0; 2560 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2561 soappendmsg(mp, addr, addrlen); 2562 } else { 2563 /* 2564 * There is a AF_UNIX sockaddr_un to include as a source 2565 * address option. 2566 */ 2567 struct T_opthdr toh; 2568 2569 toh.level = SOL_SOCKET; 2570 toh.name = SO_SRCADDR; 2571 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2572 toh.status = 0; 2573 conn_req.OPT_length = 2574 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2575 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2576 _TPI_ALIGN_TOPT(addrlen)); 2577 2578 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2579 soappendmsg(mp, addr, addrlen); 2580 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2581 soappendmsg(mp, &toh, sizeof (toh)); 2582 soappendmsg(mp, src, srclen); 2583 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2584 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2585 } 2586 /* 2587 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2588 * in order to have the right state when the T_CONN_CON shows up. 2589 */ 2590 soisconnecting(so); 2591 mutex_exit(&so->so_lock); 2592 2593 if (AU_AUDITING()) 2594 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2595 2596 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2597 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2598 mp = NULL; 2599 mutex_enter(&so->so_lock); 2600 if (error != 0) 2601 goto bad; 2602 2603 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2604 goto bad; 2605 2606 /* Allow other threads to access the socket */ 2607 so_unlock_single(so, SOLOCKED); 2608 need_unlock = B_FALSE; 2609 2610 /* 2611 * Wait until we get a T_CONN_CON or an error 2612 */ 2613 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2614 so_lock_single(so); /* Set SOLOCKED */ 2615 need_unlock = B_TRUE; 2616 } 2617 2618 done: 2619 freemsg(mp); 2620 switch (error) { 2621 case EINPROGRESS: 2622 case EALREADY: 2623 case EISCONN: 2624 case EINTR: 2625 /* Non-fatal errors */ 2626 sti->sti_laddr_valid = 0; 2627 /* FALLTHRU */ 2628 case 0: 2629 break; 2630 default: 2631 ASSERT(need_unlock); 2632 /* 2633 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2634 * and invalidate local-address cache 2635 */ 2636 so->so_state &= ~SS_ISCONNECTING; 2637 sti->sti_laddr_valid = 0; 2638 /* A discon_ind might have already unbound us */ 2639 if ((flags & _SOCONNECT_DID_BIND) && 2640 (so->so_state & SS_ISBOUND)) { 2641 int err; 2642 2643 err = sotpi_unbind(so, 0); 2644 /* LINTED - statement has no conseq */ 2645 if (err) { 2646 eprintsoline(so, err); 2647 } 2648 } 2649 break; 2650 } 2651 if (need_unlock) 2652 so_unlock_single(so, SOLOCKED); 2653 mutex_exit(&so->so_lock); 2654 return (error); 2655 2656 so_bad: error = sogeterr(so, B_TRUE); 2657 bad: eprintsoline(so, error); 2658 goto done; 2659 } 2660 2661 /* ARGSUSED */ 2662 int 2663 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2664 { 2665 struct T_ordrel_req ordrel_req; 2666 mblk_t *mp; 2667 uint_t old_state, state_change; 2668 int error = 0; 2669 sotpi_info_t *sti = SOTOTPI(so); 2670 2671 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2672 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2673 2674 mutex_enter(&so->so_lock); 2675 so_lock_single(so); /* Set SOLOCKED */ 2676 2677 /* 2678 * SunOS 4.X has no check for datagram sockets. 2679 * 5.X checks that it is connected (ENOTCONN) 2680 * X/Open requires that we check the connected state. 2681 */ 2682 if (!(so->so_state & SS_ISCONNECTED)) { 2683 if (!xnet_skip_checks) { 2684 error = ENOTCONN; 2685 if (xnet_check_print) { 2686 printf("sockfs: X/Open shutdown check " 2687 "caused ENOTCONN\n"); 2688 } 2689 } 2690 goto done; 2691 } 2692 /* 2693 * Record the current state and then perform any state changes. 2694 * Then use the difference between the old and new states to 2695 * determine which messages need to be sent. 2696 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2697 * duplicate calls to shutdown(). 2698 */ 2699 old_state = so->so_state; 2700 2701 switch (how) { 2702 case 0: 2703 socantrcvmore(so); 2704 break; 2705 case 1: 2706 socantsendmore(so); 2707 break; 2708 case 2: 2709 socantsendmore(so); 2710 socantrcvmore(so); 2711 break; 2712 default: 2713 error = EINVAL; 2714 goto done; 2715 } 2716 2717 /* 2718 * Assumes that the SS_CANT* flags are never cleared in the above code. 2719 */ 2720 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2721 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2722 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2723 2724 switch (state_change) { 2725 case 0: 2726 dprintso(so, 1, 2727 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2728 so->so_state)); 2729 goto done; 2730 2731 case SS_CANTRCVMORE: 2732 mutex_exit(&so->so_lock); 2733 strseteof(SOTOV(so), 1); 2734 /* 2735 * strseteof takes care of read side wakeups, 2736 * pollwakeups, and signals. 2737 */ 2738 /* 2739 * Get the read lock before flushing data to avoid problems 2740 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2741 */ 2742 mutex_enter(&so->so_lock); 2743 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2744 mutex_exit(&so->so_lock); 2745 2746 /* Flush read side queue */ 2747 strflushrq(SOTOV(so), FLUSHALL); 2748 2749 mutex_enter(&so->so_lock); 2750 so_unlock_read(so); /* Clear SOREADLOCKED */ 2751 break; 2752 2753 case SS_CANTSENDMORE: 2754 mutex_exit(&so->so_lock); 2755 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2756 mutex_enter(&so->so_lock); 2757 break; 2758 2759 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2760 mutex_exit(&so->so_lock); 2761 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2762 strseteof(SOTOV(so), 1); 2763 /* 2764 * strseteof takes care of read side wakeups, 2765 * pollwakeups, and signals. 2766 */ 2767 /* 2768 * Get the read lock before flushing data to avoid problems 2769 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2770 */ 2771 mutex_enter(&so->so_lock); 2772 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2773 mutex_exit(&so->so_lock); 2774 2775 /* Flush read side queue */ 2776 strflushrq(SOTOV(so), FLUSHALL); 2777 2778 mutex_enter(&so->so_lock); 2779 so_unlock_read(so); /* Clear SOREADLOCKED */ 2780 break; 2781 } 2782 2783 ASSERT(MUTEX_HELD(&so->so_lock)); 2784 2785 /* 2786 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2787 * was set due to this call and the new state has both of them set: 2788 * Send the AF_UNIX close indication 2789 * For T_COTS send a discon_ind 2790 * 2791 * If cantsend was set due to this call: 2792 * For T_COTSORD send an ordrel_ind 2793 * 2794 * Note that for T_CLTS there is no message sent here. 2795 */ 2796 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2797 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2798 /* 2799 * For SunOS 4.X compatibility we tell the other end 2800 * that we are unable to receive at this point. 2801 */ 2802 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2803 so_unix_close(so); 2804 2805 if (sti->sti_serv_type == T_COTS) 2806 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2807 } 2808 if ((state_change & SS_CANTSENDMORE) && 2809 (sti->sti_serv_type == T_COTS_ORD)) { 2810 /* Send an orderly release */ 2811 ordrel_req.PRIM_type = T_ORDREL_REQ; 2812 2813 mutex_exit(&so->so_lock); 2814 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2815 0, _ALLOC_SLEEP, cr); 2816 /* 2817 * Send down the T_ORDREL_REQ even if there is flow control. 2818 * This prevents shutdown from blocking. 2819 * Note that there is no T_OK_ACK for ordrel_req. 2820 */ 2821 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2822 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2823 mutex_enter(&so->so_lock); 2824 if (error) { 2825 eprintsoline(so, error); 2826 goto done; 2827 } 2828 } 2829 2830 done: 2831 so_unlock_single(so, SOLOCKED); 2832 mutex_exit(&so->so_lock); 2833 return (error); 2834 } 2835 2836 /* 2837 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2838 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2839 * that we have closed. 2840 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2841 * T_UNITDATA_REQ containing the same option. 2842 * 2843 * For SOCK_DGRAM half-connections (somebody connected to this end 2844 * but this end is not connect) we don't know where to send any 2845 * SO_UNIX_CLOSE. 2846 * 2847 * We have to ignore stream head errors just in case there has been 2848 * a shutdown(output). 2849 * Ignore any flow control to try to get the message more quickly to the peer. 2850 * While locally ignoring flow control solves the problem when there 2851 * is only the loopback transport on the stream it would not provide 2852 * the correct AF_UNIX socket semantics when one or more modules have 2853 * been pushed. 2854 */ 2855 void 2856 so_unix_close(struct sonode *so) 2857 { 2858 int error; 2859 struct T_opthdr toh; 2860 mblk_t *mp; 2861 sotpi_info_t *sti = SOTOTPI(so); 2862 2863 ASSERT(MUTEX_HELD(&so->so_lock)); 2864 2865 ASSERT(so->so_family == AF_UNIX); 2866 2867 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2868 (SS_ISCONNECTED|SS_ISBOUND)) 2869 return; 2870 2871 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2872 (void *)so, pr_state(so->so_state, so->so_mode))); 2873 2874 toh.level = SOL_SOCKET; 2875 toh.name = SO_UNIX_CLOSE; 2876 2877 /* zero length + header */ 2878 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2879 toh.status = 0; 2880 2881 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2882 struct T_optdata_req tdr; 2883 2884 tdr.PRIM_type = T_OPTDATA_REQ; 2885 tdr.DATA_flag = 0; 2886 2887 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2888 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2889 2890 /* NOTE: holding so_lock while sleeping */ 2891 mp = soallocproto2(&tdr, sizeof (tdr), 2892 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2893 } else { 2894 struct T_unitdata_req tudr; 2895 void *addr; 2896 socklen_t addrlen; 2897 void *src; 2898 socklen_t srclen; 2899 struct T_opthdr toh2; 2900 t_scalar_t size; 2901 2902 /* Connecteded DGRAM socket */ 2903 2904 /* 2905 * For AF_UNIX the destination address is translated to 2906 * an internal name and the source address is passed as 2907 * an option. 2908 */ 2909 /* 2910 * Length and family checks. 2911 */ 2912 error = so_addr_verify(so, sti->sti_faddr_sa, 2913 (t_uscalar_t)sti->sti_faddr_len); 2914 if (error) { 2915 eprintsoline(so, error); 2916 return; 2917 } 2918 if (sti->sti_faddr_noxlate) { 2919 /* 2920 * Already have a transport internal address. Do not 2921 * pass any (transport internal) source address. 2922 */ 2923 addr = sti->sti_faddr_sa; 2924 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2925 src = NULL; 2926 srclen = 0; 2927 } else { 2928 /* 2929 * Pass the sockaddr_un source address as an option 2930 * and translate the remote address. 2931 * Holding so_lock thus sti_laddr_sa can not change. 2932 */ 2933 src = sti->sti_laddr_sa; 2934 srclen = (socklen_t)sti->sti_laddr_len; 2935 dprintso(so, 1, 2936 ("so_ux_close: srclen %d, src %p\n", 2937 srclen, src)); 2938 error = so_ux_addr_xlate(so, 2939 sti->sti_faddr_sa, 2940 (socklen_t)sti->sti_faddr_len, 0, 2941 &addr, &addrlen); 2942 if (error) { 2943 eprintsoline(so, error); 2944 return; 2945 } 2946 } 2947 tudr.PRIM_type = T_UNITDATA_REQ; 2948 tudr.DEST_length = addrlen; 2949 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2950 if (srclen == 0) { 2951 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2952 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2953 _TPI_ALIGN_TOPT(addrlen)); 2954 2955 size = tudr.OPT_offset + tudr.OPT_length; 2956 /* NOTE: holding so_lock while sleeping */ 2957 mp = soallocproto2(&tudr, sizeof (tudr), 2958 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2959 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2960 soappendmsg(mp, &toh, sizeof (toh)); 2961 } else { 2962 /* 2963 * There is a AF_UNIX sockaddr_un to include as a 2964 * source address option. 2965 */ 2966 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2967 _TPI_ALIGN_TOPT(srclen)); 2968 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2969 _TPI_ALIGN_TOPT(addrlen)); 2970 2971 toh2.level = SOL_SOCKET; 2972 toh2.name = SO_SRCADDR; 2973 toh2.len = (t_uscalar_t)(srclen + 2974 sizeof (struct T_opthdr)); 2975 toh2.status = 0; 2976 2977 size = tudr.OPT_offset + tudr.OPT_length; 2978 2979 /* NOTE: holding so_lock while sleeping */ 2980 mp = soallocproto2(&tudr, sizeof (tudr), 2981 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2982 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2983 soappendmsg(mp, &toh, sizeof (toh)); 2984 soappendmsg(mp, &toh2, sizeof (toh2)); 2985 soappendmsg(mp, src, srclen); 2986 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2987 } 2988 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2989 } 2990 mutex_exit(&so->so_lock); 2991 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2992 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2993 mutex_enter(&so->so_lock); 2994 } 2995 2996 /* 2997 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2998 * In addition, the caller typically verifies that there is some 2999 * potential state to clear by checking 3000 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 3001 * before calling this routine. 3002 * Note that such a check can be made without holding so_lock since 3003 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 3004 * decrements sti_oobsigcnt. 3005 * 3006 * When data is read *after* the point that all pending 3007 * oob data has been consumed the oob indication is cleared. 3008 * 3009 * This logic keeps select/poll returning POLLRDBAND and 3010 * SIOCATMARK returning true until we have read past 3011 * the mark. 3012 */ 3013 static void 3014 sorecv_update_oobstate(struct sonode *so) 3015 { 3016 sotpi_info_t *sti = SOTOTPI(so); 3017 3018 mutex_enter(&so->so_lock); 3019 ASSERT(so_verify_oobstate(so)); 3020 dprintso(so, 1, 3021 ("sorecv_update_oobstate: counts %d/%d state %s\n", 3022 sti->sti_oobsigcnt, 3023 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 3024 if (sti->sti_oobsigcnt == 0) { 3025 /* No more pending oob indications */ 3026 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 3027 freemsg(so->so_oobmsg); 3028 so->so_oobmsg = NULL; 3029 } 3030 ASSERT(so_verify_oobstate(so)); 3031 mutex_exit(&so->so_lock); 3032 } 3033 3034 /* 3035 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 3036 */ 3037 static int 3038 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 3039 { 3040 sotpi_info_t *sti = SOTOTPI(so); 3041 int error = 0; 3042 mblk_t *tmp = NULL; 3043 mblk_t *pmp = NULL; 3044 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 3045 3046 ASSERT(nmp != NULL); 3047 3048 while (nmp != NULL && uiop->uio_resid > 0) { 3049 ssize_t n; 3050 3051 if (DB_TYPE(nmp) == M_DATA) { 3052 /* 3053 * We have some data, uiomove up to resid bytes. 3054 */ 3055 n = MIN(MBLKL(nmp), uiop->uio_resid); 3056 if (n > 0) 3057 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3058 nmp->b_rptr += n; 3059 if (nmp->b_rptr == nmp->b_wptr) { 3060 pmp = nmp; 3061 nmp = nmp->b_cont; 3062 } 3063 if (error) 3064 break; 3065 } else { 3066 /* 3067 * We only handle data, save for caller to handle. 3068 */ 3069 if (pmp != NULL) { 3070 pmp->b_cont = nmp->b_cont; 3071 } 3072 nmp->b_cont = NULL; 3073 if (*rmp == NULL) { 3074 *rmp = nmp; 3075 } else { 3076 tmp->b_cont = nmp; 3077 } 3078 nmp = nmp->b_cont; 3079 tmp = nmp; 3080 } 3081 } 3082 if (pmp != NULL) { 3083 /* Free any mblk_t(s) which we have consumed */ 3084 pmp->b_cont = NULL; 3085 freemsg(sti->sti_nl7c_rcv_mp); 3086 } 3087 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3088 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3089 if (error == 0) { 3090 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3091 3092 error = p->r_v.r_v2; 3093 p->r_v.r_v2 = 0; 3094 } 3095 rp->r_vals = sti->sti_nl7c_rcv_rval; 3096 sti->sti_nl7c_rcv_rval = 0; 3097 } else { 3098 /* More mblk_t(s) to process so no rval to return */ 3099 rp->r_vals = 0; 3100 } 3101 return (error); 3102 } 3103 /* 3104 * Receive the next message on the queue. 3105 * If msg_controllen is non-zero when called the caller is interested in 3106 * any received control info (options). 3107 * If msg_namelen is non-zero when called the caller is interested in 3108 * any received source address. 3109 * The routine returns with msg_control and msg_name pointing to 3110 * kmem_alloc'ed memory which the caller has to free. 3111 */ 3112 /* ARGSUSED */ 3113 int 3114 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3115 struct cred *cr) 3116 { 3117 union T_primitives *tpr; 3118 mblk_t *mp; 3119 uchar_t pri; 3120 int pflag, opflag; 3121 void *control; 3122 t_uscalar_t controllen; 3123 t_uscalar_t namelen; 3124 int so_state = so->so_state; /* Snapshot */ 3125 ssize_t saved_resid; 3126 rval_t rval; 3127 int flags; 3128 clock_t timout; 3129 int error = 0; 3130 sotpi_info_t *sti = SOTOTPI(so); 3131 3132 flags = msg->msg_flags; 3133 msg->msg_flags = 0; 3134 3135 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3136 (void *)so, (void *)msg, flags, 3137 pr_state(so->so_state, so->so_mode), so->so_error)); 3138 3139 if (so->so_version == SOV_STREAM) { 3140 so_update_attrs(so, SOACC); 3141 /* The imaginary "sockmod" has been popped - act as a stream */ 3142 return (strread(SOTOV(so), uiop, cr)); 3143 } 3144 3145 /* 3146 * If we are not connected because we have never been connected 3147 * we return ENOTCONN. If we have been connected (but are no longer 3148 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3149 * the EOF. 3150 * 3151 * An alternative would be to post an ENOTCONN error in stream head 3152 * (read+write) and clear it when we're connected. However, that error 3153 * would cause incorrect poll/select behavior! 3154 */ 3155 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3156 (so->so_mode & SM_CONNREQUIRED)) { 3157 return (ENOTCONN); 3158 } 3159 3160 /* 3161 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3162 * after checking that the read queue is empty) and returns zero. 3163 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3164 * is zero. 3165 */ 3166 3167 if (flags & MSG_OOB) { 3168 /* Check that the transport supports OOB */ 3169 if (!(so->so_mode & SM_EXDATA)) 3170 return (EOPNOTSUPP); 3171 so_update_attrs(so, SOACC); 3172 return (sorecvoob(so, msg, uiop, flags, 3173 (so->so_options & SO_OOBINLINE))); 3174 } 3175 3176 so_update_attrs(so, SOACC); 3177 3178 /* 3179 * Set msg_controllen and msg_namelen to zero here to make it 3180 * simpler in the cases that no control or name is returned. 3181 */ 3182 controllen = msg->msg_controllen; 3183 namelen = msg->msg_namelen; 3184 msg->msg_controllen = 0; 3185 msg->msg_namelen = 0; 3186 3187 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3188 namelen, controllen)); 3189 3190 mutex_enter(&so->so_lock); 3191 /* 3192 * If an NL7C enabled socket and not waiting for write data. 3193 */ 3194 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3195 NL7C_ENABLED) { 3196 if (sti->sti_nl7c_uri) { 3197 /* Close uri processing for a previous request */ 3198 nl7c_close(so); 3199 } 3200 if ((so_state & SS_CANTRCVMORE) && 3201 sti->sti_nl7c_rcv_mp == NULL) { 3202 /* Nothing to process, EOF */ 3203 mutex_exit(&so->so_lock); 3204 return (0); 3205 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3206 /* Persistent NL7C socket, try to process request */ 3207 boolean_t ret; 3208 3209 ret = nl7c_process(so, 3210 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3211 rval.r_vals = sti->sti_nl7c_rcv_rval; 3212 error = rval.r_v.r_v2; 3213 if (error) { 3214 /* Error of some sort, return it */ 3215 mutex_exit(&so->so_lock); 3216 return (error); 3217 } 3218 if (sti->sti_nl7c_flags && 3219 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3220 /* 3221 * Still an NL7C socket and no data 3222 * to pass up to the caller. 3223 */ 3224 mutex_exit(&so->so_lock); 3225 if (ret) { 3226 /* EOF */ 3227 return (0); 3228 } else { 3229 /* Need more data */ 3230 return (EAGAIN); 3231 } 3232 } 3233 } else { 3234 /* 3235 * Not persistent so no further NL7C processing. 3236 */ 3237 sti->sti_nl7c_flags = 0; 3238 } 3239 } 3240 /* 3241 * Only one reader is allowed at any given time. This is needed 3242 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3243 * 3244 * This is slightly different that BSD behavior in that it fails with 3245 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3246 * is single-threaded using sblock(), which is dropped while waiting 3247 * for data to appear. The difference shows up e.g. if one 3248 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3249 * does use nonblocking io and different threads are reading each 3250 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3251 * in this case as long as the read queue doesn't get empty. 3252 * In this implementation the thread using nonblocking io can 3253 * get an EWOULDBLOCK error due to the blocking thread executing 3254 * e.g. in the uiomove in kstrgetmsg. 3255 * This difference is not believed to be significant. 3256 */ 3257 /* Set SOREADLOCKED */ 3258 error = so_lock_read_intr(so, 3259 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3260 mutex_exit(&so->so_lock); 3261 if (error) 3262 return (error); 3263 3264 /* 3265 * Tell kstrgetmsg to not inspect the stream head errors until all 3266 * queued data has been consumed. 3267 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3268 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3269 * 3270 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3271 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3272 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3273 */ 3274 pflag = MSG_ANY | MSG_DELAYERROR; 3275 if (flags & MSG_PEEK) { 3276 pflag |= MSG_IPEEK; 3277 flags &= ~MSG_WAITALL; 3278 } 3279 if (so->so_mode & SM_ATOMIC) 3280 pflag |= MSG_DISCARDTAIL; 3281 3282 if (flags & MSG_DONTWAIT) 3283 timout = 0; 3284 else 3285 timout = -1; 3286 opflag = pflag; 3287 retry: 3288 saved_resid = uiop->uio_resid; 3289 pri = 0; 3290 mp = NULL; 3291 if (sti->sti_nl7c_rcv_mp != NULL) { 3292 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3293 error = nl7c_sorecv(so, &mp, uiop, &rval); 3294 } else { 3295 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3296 timout, &rval); 3297 } 3298 if (error != 0) { 3299 /* kstrgetmsg returns ETIME when timeout expires */ 3300 if (error == ETIME) 3301 error = EWOULDBLOCK; 3302 goto out; 3303 } 3304 /* 3305 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3306 * For non-datagrams MOREDATA is used to set MSG_EOR. 3307 */ 3308 ASSERT(!(rval.r_val1 & MORECTL)); 3309 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3310 msg->msg_flags |= MSG_TRUNC; 3311 3312 if (mp == NULL) { 3313 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3314 /* 3315 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3316 * The draft Posix socket spec states that the mark should 3317 * not be cleared when peeking. We follow the latter. 3318 */ 3319 if ((so->so_state & 3320 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3321 (uiop->uio_resid != saved_resid) && 3322 !(flags & MSG_PEEK)) { 3323 sorecv_update_oobstate(so); 3324 } 3325 3326 mutex_enter(&so->so_lock); 3327 /* Set MSG_EOR based on MOREDATA */ 3328 if (!(rval.r_val1 & MOREDATA)) { 3329 if (so->so_state & SS_SAVEDEOR) { 3330 msg->msg_flags |= MSG_EOR; 3331 so->so_state &= ~SS_SAVEDEOR; 3332 } 3333 } 3334 /* 3335 * If some data was received (i.e. not EOF) and the 3336 * read/recv* has not been satisfied wait for some more. 3337 */ 3338 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3339 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3340 mutex_exit(&so->so_lock); 3341 pflag = opflag | MSG_NOMARK; 3342 goto retry; 3343 } 3344 goto out_locked; 3345 } 3346 3347 /* strsock_proto has already verified length and alignment */ 3348 tpr = (union T_primitives *)mp->b_rptr; 3349 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3350 3351 switch (tpr->type) { 3352 case T_DATA_IND: { 3353 if ((so->so_state & 3354 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3355 (uiop->uio_resid != saved_resid) && 3356 !(flags & MSG_PEEK)) { 3357 sorecv_update_oobstate(so); 3358 } 3359 3360 /* 3361 * Set msg_flags to MSG_EOR based on 3362 * MORE_flag and MOREDATA. 3363 */ 3364 mutex_enter(&so->so_lock); 3365 so->so_state &= ~SS_SAVEDEOR; 3366 if (!(tpr->data_ind.MORE_flag & 1)) { 3367 if (!(rval.r_val1 & MOREDATA)) 3368 msg->msg_flags |= MSG_EOR; 3369 else 3370 so->so_state |= SS_SAVEDEOR; 3371 } 3372 freemsg(mp); 3373 /* 3374 * If some data was received (i.e. not EOF) and the 3375 * read/recv* has not been satisfied wait for some more. 3376 */ 3377 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3378 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3379 mutex_exit(&so->so_lock); 3380 pflag = opflag | MSG_NOMARK; 3381 goto retry; 3382 } 3383 goto out_locked; 3384 } 3385 case T_UNITDATA_IND: { 3386 void *addr; 3387 t_uscalar_t addrlen; 3388 void *abuf; 3389 t_uscalar_t optlen; 3390 void *opt; 3391 3392 if ((so->so_state & 3393 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3394 (uiop->uio_resid != saved_resid) && 3395 !(flags & MSG_PEEK)) { 3396 sorecv_update_oobstate(so); 3397 } 3398 3399 if (namelen != 0) { 3400 /* Caller wants source address */ 3401 addrlen = tpr->unitdata_ind.SRC_length; 3402 addr = sogetoff(mp, 3403 tpr->unitdata_ind.SRC_offset, 3404 addrlen, 1); 3405 if (addr == NULL) { 3406 freemsg(mp); 3407 error = EPROTO; 3408 eprintsoline(so, error); 3409 goto out; 3410 } 3411 if (so->so_family == AF_UNIX) { 3412 /* 3413 * Can not use the transport level address. 3414 * If there is a SO_SRCADDR option carrying 3415 * the socket level address it will be 3416 * extracted below. 3417 */ 3418 addr = NULL; 3419 addrlen = 0; 3420 } 3421 } 3422 optlen = tpr->unitdata_ind.OPT_length; 3423 if (optlen != 0) { 3424 t_uscalar_t ncontrollen; 3425 3426 /* 3427 * Extract any source address option. 3428 * Determine how large cmsg buffer is needed. 3429 */ 3430 opt = sogetoff(mp, 3431 tpr->unitdata_ind.OPT_offset, 3432 optlen, __TPI_ALIGN_SIZE); 3433 3434 if (opt == NULL) { 3435 freemsg(mp); 3436 error = EPROTO; 3437 eprintsoline(so, error); 3438 goto out; 3439 } 3440 if (so->so_family == AF_UNIX) 3441 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3442 ncontrollen = so_cmsglen(mp, opt, optlen, 3443 !(flags & MSG_XPG4_2)); 3444 if (controllen != 0) 3445 controllen = ncontrollen; 3446 else if (ncontrollen != 0) 3447 msg->msg_flags |= MSG_CTRUNC; 3448 } else { 3449 controllen = 0; 3450 } 3451 3452 if (namelen != 0) { 3453 /* 3454 * Return address to caller. 3455 * Caller handles truncation if length 3456 * exceeds msg_namelen. 3457 * NOTE: AF_UNIX NUL termination is ensured by 3458 * the sender's copyin_name(). 3459 */ 3460 abuf = kmem_alloc(addrlen, KM_SLEEP); 3461 3462 bcopy(addr, abuf, addrlen); 3463 msg->msg_name = abuf; 3464 msg->msg_namelen = addrlen; 3465 } 3466 3467 if (controllen != 0) { 3468 /* 3469 * Return control msg to caller. 3470 * Caller handles truncation if length 3471 * exceeds msg_controllen. 3472 */ 3473 control = kmem_zalloc(controllen, KM_SLEEP); 3474 3475 error = so_opt2cmsg(mp, opt, optlen, 3476 !(flags & MSG_XPG4_2), 3477 control, controllen); 3478 if (error) { 3479 freemsg(mp); 3480 if (msg->msg_namelen != 0) 3481 kmem_free(msg->msg_name, 3482 msg->msg_namelen); 3483 kmem_free(control, controllen); 3484 eprintsoline(so, error); 3485 goto out; 3486 } 3487 msg->msg_control = control; 3488 msg->msg_controllen = controllen; 3489 } 3490 3491 freemsg(mp); 3492 goto out; 3493 } 3494 case T_OPTDATA_IND: { 3495 struct T_optdata_req *tdr; 3496 void *opt; 3497 t_uscalar_t optlen; 3498 3499 if ((so->so_state & 3500 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3501 (uiop->uio_resid != saved_resid) && 3502 !(flags & MSG_PEEK)) { 3503 sorecv_update_oobstate(so); 3504 } 3505 3506 tdr = (struct T_optdata_req *)mp->b_rptr; 3507 optlen = tdr->OPT_length; 3508 if (optlen != 0) { 3509 t_uscalar_t ncontrollen; 3510 /* 3511 * Determine how large cmsg buffer is needed. 3512 */ 3513 opt = sogetoff(mp, 3514 tpr->optdata_ind.OPT_offset, 3515 optlen, __TPI_ALIGN_SIZE); 3516 3517 if (opt == NULL) { 3518 freemsg(mp); 3519 error = EPROTO; 3520 eprintsoline(so, error); 3521 goto out; 3522 } 3523 3524 ncontrollen = so_cmsglen(mp, opt, optlen, 3525 !(flags & MSG_XPG4_2)); 3526 if (controllen != 0) 3527 controllen = ncontrollen; 3528 else if (ncontrollen != 0) 3529 msg->msg_flags |= MSG_CTRUNC; 3530 } else { 3531 controllen = 0; 3532 } 3533 3534 if (controllen != 0) { 3535 /* 3536 * Return control msg to caller. 3537 * Caller handles truncation if length 3538 * exceeds msg_controllen. 3539 */ 3540 control = kmem_zalloc(controllen, KM_SLEEP); 3541 3542 error = so_opt2cmsg(mp, opt, optlen, 3543 !(flags & MSG_XPG4_2), 3544 control, controllen); 3545 if (error) { 3546 freemsg(mp); 3547 kmem_free(control, controllen); 3548 eprintsoline(so, error); 3549 goto out; 3550 } 3551 msg->msg_control = control; 3552 msg->msg_controllen = controllen; 3553 } 3554 3555 /* 3556 * Set msg_flags to MSG_EOR based on 3557 * DATA_flag and MOREDATA. 3558 */ 3559 mutex_enter(&so->so_lock); 3560 so->so_state &= ~SS_SAVEDEOR; 3561 if (!(tpr->data_ind.MORE_flag & 1)) { 3562 if (!(rval.r_val1 & MOREDATA)) 3563 msg->msg_flags |= MSG_EOR; 3564 else 3565 so->so_state |= SS_SAVEDEOR; 3566 } 3567 freemsg(mp); 3568 /* 3569 * If some data was received (i.e. not EOF) and the 3570 * read/recv* has not been satisfied wait for some more. 3571 * Not possible to wait if control info was received. 3572 */ 3573 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3574 controllen == 0 && 3575 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3576 mutex_exit(&so->so_lock); 3577 pflag = opflag | MSG_NOMARK; 3578 goto retry; 3579 } 3580 goto out_locked; 3581 } 3582 case T_EXDATA_IND: { 3583 dprintso(so, 1, 3584 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3585 "state %s\n", 3586 sti->sti_oobsigcnt, sti->sti_oobcnt, 3587 saved_resid - uiop->uio_resid, 3588 pr_state(so->so_state, so->so_mode))); 3589 /* 3590 * kstrgetmsg handles MSGMARK so there is nothing to 3591 * inspect in the T_EXDATA_IND. 3592 * strsock_proto makes the stream head queue the T_EXDATA_IND 3593 * as a separate message with no M_DATA component. Furthermore, 3594 * the stream head does not consolidate M_DATA messages onto 3595 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3596 * remains a message by itself. This is needed since MSGMARK 3597 * marks both the whole message as well as the last byte 3598 * of the message. 3599 */ 3600 freemsg(mp); 3601 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3602 if (flags & MSG_PEEK) { 3603 /* 3604 * Even though we are peeking we consume the 3605 * T_EXDATA_IND thereby moving the mark information 3606 * to SS_RCVATMARK. Then the oob code below will 3607 * retry the peeking kstrgetmsg. 3608 * Note that the stream head read queue is 3609 * never flushed without holding SOREADLOCKED 3610 * thus the T_EXDATA_IND can not disappear 3611 * underneath us. 3612 */ 3613 dprintso(so, 1, 3614 ("sotpi_recvmsg: consume EXDATA_IND " 3615 "counts %d/%d state %s\n", 3616 sti->sti_oobsigcnt, 3617 sti->sti_oobcnt, 3618 pr_state(so->so_state, so->so_mode))); 3619 3620 pflag = MSG_ANY | MSG_DELAYERROR; 3621 if (so->so_mode & SM_ATOMIC) 3622 pflag |= MSG_DISCARDTAIL; 3623 3624 pri = 0; 3625 mp = NULL; 3626 3627 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3628 &pri, &pflag, (clock_t)-1, &rval); 3629 ASSERT(uiop->uio_resid == saved_resid); 3630 3631 if (error) { 3632 #ifdef SOCK_DEBUG 3633 if (error != EWOULDBLOCK && error != EINTR) { 3634 eprintsoline(so, error); 3635 } 3636 #endif /* SOCK_DEBUG */ 3637 goto out; 3638 } 3639 ASSERT(mp); 3640 tpr = (union T_primitives *)mp->b_rptr; 3641 ASSERT(tpr->type == T_EXDATA_IND); 3642 freemsg(mp); 3643 } /* end "if (flags & MSG_PEEK)" */ 3644 3645 /* 3646 * Decrement the number of queued and pending oob. 3647 * 3648 * SS_RCVATMARK is cleared when we read past a mark. 3649 * SS_HAVEOOBDATA is cleared when we've read past the 3650 * last mark. 3651 * SS_OOBPEND is cleared if we've read past the last 3652 * mark and no (new) SIGURG has been posted. 3653 */ 3654 mutex_enter(&so->so_lock); 3655 ASSERT(so_verify_oobstate(so)); 3656 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3657 ASSERT(sti->sti_oobsigcnt > 0); 3658 sti->sti_oobsigcnt--; 3659 ASSERT(sti->sti_oobcnt > 0); 3660 sti->sti_oobcnt--; 3661 /* 3662 * Since the T_EXDATA_IND has been removed from the stream 3663 * head, but we have not read data past the mark, 3664 * sockfs needs to track that the socket is still at the mark. 3665 * 3666 * Since no data was received call kstrgetmsg again to wait 3667 * for data. 3668 */ 3669 so->so_state |= SS_RCVATMARK; 3670 mutex_exit(&so->so_lock); 3671 dprintso(so, 1, 3672 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3673 sti->sti_oobsigcnt, sti->sti_oobcnt, 3674 pr_state(so->so_state, so->so_mode))); 3675 pflag = opflag; 3676 goto retry; 3677 } 3678 default: 3679 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3680 (void *)so, tpr->type, (void *)mp); 3681 ASSERT(0); 3682 freemsg(mp); 3683 error = EPROTO; 3684 eprintsoline(so, error); 3685 goto out; 3686 } 3687 /* NOTREACHED */ 3688 out: 3689 mutex_enter(&so->so_lock); 3690 out_locked: 3691 so_unlock_read(so); /* Clear SOREADLOCKED */ 3692 mutex_exit(&so->so_lock); 3693 return (error); 3694 } 3695 3696 /* 3697 * Sending data with options on a datagram socket. 3698 * Assumes caller has verified that SS_ISBOUND etc. are set. 3699 */ 3700 static int 3701 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3702 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3703 { 3704 struct T_unitdata_req tudr; 3705 mblk_t *mp; 3706 int error; 3707 void *addr; 3708 socklen_t addrlen; 3709 void *src; 3710 socklen_t srclen; 3711 ssize_t len; 3712 int size; 3713 struct T_opthdr toh; 3714 struct fdbuf *fdbuf; 3715 t_uscalar_t optlen; 3716 void *fds; 3717 int fdlen; 3718 sotpi_info_t *sti = SOTOTPI(so); 3719 3720 ASSERT(name && namelen); 3721 ASSERT(control && controllen); 3722 3723 len = uiop->uio_resid; 3724 if (len > (ssize_t)sti->sti_tidu_size) { 3725 return (EMSGSIZE); 3726 } 3727 3728 /* 3729 * For AF_UNIX the destination address is translated to an internal 3730 * name and the source address is passed as an option. 3731 * Also, file descriptors are passed as file pointers in an 3732 * option. 3733 */ 3734 3735 /* 3736 * Length and family checks. 3737 */ 3738 error = so_addr_verify(so, name, namelen); 3739 if (error) { 3740 eprintsoline(so, error); 3741 return (error); 3742 } 3743 if (so->so_family == AF_UNIX) { 3744 if (sti->sti_faddr_noxlate) { 3745 /* 3746 * Already have a transport internal address. Do not 3747 * pass any (transport internal) source address. 3748 */ 3749 addr = name; 3750 addrlen = namelen; 3751 src = NULL; 3752 srclen = 0; 3753 } else { 3754 /* 3755 * Pass the sockaddr_un source address as an option 3756 * and translate the remote address. 3757 * 3758 * Note that this code does not prevent sti_laddr_sa 3759 * from changing while it is being used. Thus 3760 * if an unbind+bind occurs concurrently with this 3761 * send the peer might see a partially new and a 3762 * partially old "from" address. 3763 */ 3764 src = sti->sti_laddr_sa; 3765 srclen = (t_uscalar_t)sti->sti_laddr_len; 3766 dprintso(so, 1, 3767 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3768 srclen, src)); 3769 error = so_ux_addr_xlate(so, name, namelen, 3770 (flags & MSG_XPG4_2), 3771 &addr, &addrlen); 3772 if (error) { 3773 eprintsoline(so, error); 3774 return (error); 3775 } 3776 } 3777 } else { 3778 addr = name; 3779 addrlen = namelen; 3780 src = NULL; 3781 srclen = 0; 3782 } 3783 optlen = so_optlen(control, controllen, 3784 !(flags & MSG_XPG4_2)); 3785 tudr.PRIM_type = T_UNITDATA_REQ; 3786 tudr.DEST_length = addrlen; 3787 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3788 if (srclen != 0) 3789 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3790 _TPI_ALIGN_TOPT(srclen)); 3791 else 3792 tudr.OPT_length = optlen; 3793 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3794 _TPI_ALIGN_TOPT(addrlen)); 3795 3796 size = tudr.OPT_offset + tudr.OPT_length; 3797 3798 /* 3799 * File descriptors only when SM_FDPASSING set. 3800 */ 3801 error = so_getfdopt(control, controllen, 3802 !(flags & MSG_XPG4_2), &fds, &fdlen); 3803 if (error) 3804 return (error); 3805 if (fdlen != -1) { 3806 if (!(so->so_mode & SM_FDPASSING)) 3807 return (EOPNOTSUPP); 3808 3809 error = fdbuf_create(fds, fdlen, &fdbuf); 3810 if (error) 3811 return (error); 3812 mp = fdbuf_allocmsg(size, fdbuf); 3813 } else { 3814 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3815 if (mp == NULL) { 3816 /* 3817 * Caught a signal waiting for memory. 3818 * Let send* return EINTR. 3819 */ 3820 return (EINTR); 3821 } 3822 } 3823 soappendmsg(mp, &tudr, sizeof (tudr)); 3824 soappendmsg(mp, addr, addrlen); 3825 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3826 3827 if (fdlen != -1) { 3828 ASSERT(fdbuf != NULL); 3829 toh.level = SOL_SOCKET; 3830 toh.name = SO_FILEP; 3831 toh.len = fdbuf->fd_size + 3832 (t_uscalar_t)sizeof (struct T_opthdr); 3833 toh.status = 0; 3834 soappendmsg(mp, &toh, sizeof (toh)); 3835 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3836 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3837 } 3838 if (srclen != 0) { 3839 /* 3840 * There is a AF_UNIX sockaddr_un to include as a source 3841 * address option. 3842 */ 3843 toh.level = SOL_SOCKET; 3844 toh.name = SO_SRCADDR; 3845 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3846 toh.status = 0; 3847 soappendmsg(mp, &toh, sizeof (toh)); 3848 soappendmsg(mp, src, srclen); 3849 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3850 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3851 } 3852 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3853 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3854 /* At most 3 bytes left in the message */ 3855 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3856 ASSERT(MBLKL(mp) <= (ssize_t)size); 3857 3858 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3859 if (AU_AUDITING()) 3860 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3861 3862 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3863 #ifdef SOCK_DEBUG 3864 if (error) { 3865 eprintsoline(so, error); 3866 } 3867 #endif /* SOCK_DEBUG */ 3868 return (error); 3869 } 3870 3871 /* 3872 * Sending data with options on a connected stream socket. 3873 * Assumes caller has verified that SS_ISCONNECTED is set. 3874 */ 3875 static int 3876 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3877 t_uscalar_t controllen, int flags) 3878 { 3879 struct T_optdata_req tdr; 3880 mblk_t *mp; 3881 int error; 3882 ssize_t iosize; 3883 int size; 3884 struct fdbuf *fdbuf; 3885 t_uscalar_t optlen; 3886 void *fds; 3887 int fdlen; 3888 struct T_opthdr toh; 3889 sotpi_info_t *sti = SOTOTPI(so); 3890 3891 dprintso(so, 1, 3892 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3893 3894 /* 3895 * Has to be bound and connected. However, since no locks are 3896 * held the state could have changed after sotpi_sendmsg checked it 3897 * thus it is not possible to ASSERT on the state. 3898 */ 3899 3900 /* Options on connection-oriented only when SM_OPTDATA set. */ 3901 if (!(so->so_mode & SM_OPTDATA)) 3902 return (EOPNOTSUPP); 3903 3904 do { 3905 /* 3906 * Set the MORE flag if uio_resid does not fit in this 3907 * message or if the caller passed in "more". 3908 * Error for transports with zero tidu_size. 3909 */ 3910 tdr.PRIM_type = T_OPTDATA_REQ; 3911 iosize = sti->sti_tidu_size; 3912 if (iosize <= 0) 3913 return (EMSGSIZE); 3914 if (uiop->uio_resid > iosize) { 3915 tdr.DATA_flag = 1; 3916 } else { 3917 if (more) 3918 tdr.DATA_flag = 1; 3919 else 3920 tdr.DATA_flag = 0; 3921 iosize = uiop->uio_resid; 3922 } 3923 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3924 tdr.DATA_flag, iosize)); 3925 3926 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3927 tdr.OPT_length = optlen; 3928 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3929 3930 size = (int)sizeof (tdr) + optlen; 3931 /* 3932 * File descriptors only when SM_FDPASSING set. 3933 */ 3934 error = so_getfdopt(control, controllen, 3935 !(flags & MSG_XPG4_2), &fds, &fdlen); 3936 if (error) 3937 return (error); 3938 if (fdlen != -1) { 3939 if (!(so->so_mode & SM_FDPASSING)) 3940 return (EOPNOTSUPP); 3941 3942 error = fdbuf_create(fds, fdlen, &fdbuf); 3943 if (error) 3944 return (error); 3945 mp = fdbuf_allocmsg(size, fdbuf); 3946 } else { 3947 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3948 if (mp == NULL) { 3949 /* 3950 * Caught a signal waiting for memory. 3951 * Let send* return EINTR. 3952 */ 3953 return (EINTR); 3954 } 3955 } 3956 soappendmsg(mp, &tdr, sizeof (tdr)); 3957 3958 if (fdlen != -1) { 3959 ASSERT(fdbuf != NULL); 3960 toh.level = SOL_SOCKET; 3961 toh.name = SO_FILEP; 3962 toh.len = fdbuf->fd_size + 3963 (t_uscalar_t)sizeof (struct T_opthdr); 3964 toh.status = 0; 3965 soappendmsg(mp, &toh, sizeof (toh)); 3966 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3967 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3968 } 3969 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3970 /* At most 3 bytes left in the message */ 3971 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3972 ASSERT(MBLKL(mp) <= (ssize_t)size); 3973 3974 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3975 3976 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3977 0, MSG_BAND, 0); 3978 if (error) { 3979 eprintsoline(so, error); 3980 return (error); 3981 } 3982 control = NULL; 3983 if (uiop->uio_resid > 0) { 3984 /* 3985 * Recheck for fatal errors. Fail write even though 3986 * some data have been written. This is consistent 3987 * with strwrite semantics and BSD sockets semantics. 3988 */ 3989 if (so->so_state & SS_CANTSENDMORE) { 3990 eprintsoline(so, error); 3991 return (EPIPE); 3992 } 3993 if (so->so_error != 0) { 3994 mutex_enter(&so->so_lock); 3995 error = sogeterr(so, B_TRUE); 3996 mutex_exit(&so->so_lock); 3997 if (error != 0) { 3998 eprintsoline(so, error); 3999 return (error); 4000 } 4001 } 4002 } 4003 } while (uiop->uio_resid > 0); 4004 return (0); 4005 } 4006 4007 /* 4008 * Sending data on a datagram socket. 4009 * Assumes caller has verified that SS_ISBOUND etc. are set. 4010 * 4011 * For AF_UNIX the destination address is translated to an internal 4012 * name and the source address is passed as an option. 4013 */ 4014 int 4015 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4016 struct uio *uiop, int flags) 4017 { 4018 struct T_unitdata_req tudr; 4019 mblk_t *mp; 4020 int error; 4021 void *addr; 4022 socklen_t addrlen; 4023 void *src; 4024 socklen_t srclen; 4025 ssize_t len; 4026 sotpi_info_t *sti = SOTOTPI(so); 4027 4028 ASSERT(name != NULL && namelen != 0); 4029 4030 len = uiop->uio_resid; 4031 if (len > sti->sti_tidu_size) { 4032 error = EMSGSIZE; 4033 goto done; 4034 } 4035 4036 /* Length and family checks */ 4037 error = so_addr_verify(so, name, namelen); 4038 if (error != 0) 4039 goto done; 4040 4041 if (sti->sti_direct) 4042 return (sodgram_direct(so, name, namelen, uiop, flags)); 4043 4044 if (so->so_family == AF_UNIX) { 4045 if (sti->sti_faddr_noxlate) { 4046 /* 4047 * Already have a transport internal address. Do not 4048 * pass any (transport internal) source address. 4049 */ 4050 addr = name; 4051 addrlen = namelen; 4052 src = NULL; 4053 srclen = 0; 4054 } else { 4055 /* 4056 * Pass the sockaddr_un source address as an option 4057 * and translate the remote address. 4058 * 4059 * Note that this code does not prevent sti_laddr_sa 4060 * from changing while it is being used. Thus 4061 * if an unbind+bind occurs concurrently with this 4062 * send the peer might see a partially new and a 4063 * partially old "from" address. 4064 */ 4065 src = sti->sti_laddr_sa; 4066 srclen = (socklen_t)sti->sti_laddr_len; 4067 dprintso(so, 1, 4068 ("sosend_dgram UNIX: srclen %d, src %p\n", 4069 srclen, src)); 4070 error = so_ux_addr_xlate(so, name, namelen, 4071 (flags & MSG_XPG4_2), 4072 &addr, &addrlen); 4073 if (error) { 4074 eprintsoline(so, error); 4075 goto done; 4076 } 4077 } 4078 } else { 4079 addr = name; 4080 addrlen = namelen; 4081 src = NULL; 4082 srclen = 0; 4083 } 4084 tudr.PRIM_type = T_UNITDATA_REQ; 4085 tudr.DEST_length = addrlen; 4086 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4087 if (srclen == 0) { 4088 tudr.OPT_length = 0; 4089 tudr.OPT_offset = 0; 4090 4091 mp = soallocproto2(&tudr, sizeof (tudr), 4092 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4093 if (mp == NULL) { 4094 /* 4095 * Caught a signal waiting for memory. 4096 * Let send* return EINTR. 4097 */ 4098 error = EINTR; 4099 goto done; 4100 } 4101 } else { 4102 /* 4103 * There is a AF_UNIX sockaddr_un to include as a source 4104 * address option. 4105 */ 4106 struct T_opthdr toh; 4107 ssize_t size; 4108 4109 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4110 _TPI_ALIGN_TOPT(srclen)); 4111 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4112 _TPI_ALIGN_TOPT(addrlen)); 4113 4114 toh.level = SOL_SOCKET; 4115 toh.name = SO_SRCADDR; 4116 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4117 toh.status = 0; 4118 4119 size = tudr.OPT_offset + tudr.OPT_length; 4120 mp = soallocproto2(&tudr, sizeof (tudr), 4121 addr, addrlen, size, _ALLOC_INTR, CRED()); 4122 if (mp == NULL) { 4123 /* 4124 * Caught a signal waiting for memory. 4125 * Let send* return EINTR. 4126 */ 4127 error = EINTR; 4128 goto done; 4129 } 4130 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4131 soappendmsg(mp, &toh, sizeof (toh)); 4132 soappendmsg(mp, src, srclen); 4133 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4134 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4135 } 4136 4137 if (AU_AUDITING()) 4138 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4139 4140 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4141 done: 4142 #ifdef SOCK_DEBUG 4143 if (error) { 4144 eprintsoline(so, error); 4145 } 4146 #endif /* SOCK_DEBUG */ 4147 return (error); 4148 } 4149 4150 /* 4151 * Sending data on a connected stream socket. 4152 * Assumes caller has verified that SS_ISCONNECTED is set. 4153 */ 4154 int 4155 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4156 int sflag) 4157 { 4158 struct T_data_req tdr; 4159 mblk_t *mp; 4160 int error; 4161 ssize_t iosize; 4162 sotpi_info_t *sti = SOTOTPI(so); 4163 4164 dprintso(so, 1, 4165 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4166 (void *)so, uiop->uio_resid, prim, sflag)); 4167 4168 /* 4169 * Has to be bound and connected. However, since no locks are 4170 * held the state could have changed after sotpi_sendmsg checked it 4171 * thus it is not possible to ASSERT on the state. 4172 */ 4173 4174 do { 4175 /* 4176 * Set the MORE flag if uio_resid does not fit in this 4177 * message or if the caller passed in "more". 4178 * Error for transports with zero tidu_size. 4179 */ 4180 tdr.PRIM_type = prim; 4181 iosize = sti->sti_tidu_size; 4182 if (iosize <= 0) 4183 return (EMSGSIZE); 4184 if (uiop->uio_resid > iosize) { 4185 tdr.MORE_flag = 1; 4186 } else { 4187 if (more) 4188 tdr.MORE_flag = 1; 4189 else 4190 tdr.MORE_flag = 0; 4191 iosize = uiop->uio_resid; 4192 } 4193 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4194 prim, tdr.MORE_flag, iosize)); 4195 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4196 if (mp == NULL) { 4197 /* 4198 * Caught a signal waiting for memory. 4199 * Let send* return EINTR. 4200 */ 4201 return (EINTR); 4202 } 4203 4204 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4205 0, sflag | MSG_BAND, 0); 4206 if (error) { 4207 eprintsoline(so, error); 4208 return (error); 4209 } 4210 if (uiop->uio_resid > 0) { 4211 /* 4212 * Recheck for fatal errors. Fail write even though 4213 * some data have been written. This is consistent 4214 * with strwrite semantics and BSD sockets semantics. 4215 */ 4216 if (so->so_state & SS_CANTSENDMORE) { 4217 eprintsoline(so, error); 4218 return (EPIPE); 4219 } 4220 if (so->so_error != 0) { 4221 mutex_enter(&so->so_lock); 4222 error = sogeterr(so, B_TRUE); 4223 mutex_exit(&so->so_lock); 4224 if (error != 0) { 4225 eprintsoline(so, error); 4226 return (error); 4227 } 4228 } 4229 } 4230 } while (uiop->uio_resid > 0); 4231 return (0); 4232 } 4233 4234 /* 4235 * Check the state for errors and call the appropriate send function. 4236 * 4237 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4238 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4239 * after sending the message. 4240 */ 4241 static int 4242 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4243 struct cred *cr) 4244 { 4245 int so_state; 4246 int so_mode; 4247 int error; 4248 struct sockaddr *name; 4249 t_uscalar_t namelen; 4250 int dontroute; 4251 int flags; 4252 sotpi_info_t *sti = SOTOTPI(so); 4253 4254 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4255 (void *)so, (void *)msg, msg->msg_flags, 4256 pr_state(so->so_state, so->so_mode), so->so_error)); 4257 4258 if (so->so_version == SOV_STREAM) { 4259 /* The imaginary "sockmod" has been popped - act as a stream */ 4260 so_update_attrs(so, SOMOD); 4261 return (strwrite(SOTOV(so), uiop, cr)); 4262 } 4263 4264 mutex_enter(&so->so_lock); 4265 so_state = so->so_state; 4266 4267 if (so_state & SS_CANTSENDMORE) { 4268 mutex_exit(&so->so_lock); 4269 return (EPIPE); 4270 } 4271 4272 if (so->so_error != 0) { 4273 error = sogeterr(so, B_TRUE); 4274 if (error != 0) { 4275 mutex_exit(&so->so_lock); 4276 return (error); 4277 } 4278 } 4279 4280 name = (struct sockaddr *)msg->msg_name; 4281 namelen = msg->msg_namelen; 4282 4283 so_mode = so->so_mode; 4284 4285 if (name == NULL) { 4286 if (!(so_state & SS_ISCONNECTED)) { 4287 mutex_exit(&so->so_lock); 4288 if (so_mode & SM_CONNREQUIRED) 4289 return (ENOTCONN); 4290 else 4291 return (EDESTADDRREQ); 4292 } 4293 if (so_mode & SM_CONNREQUIRED) { 4294 name = NULL; 4295 namelen = 0; 4296 } else { 4297 /* 4298 * Note that this code does not prevent sti_faddr_sa 4299 * from changing while it is being used. Thus 4300 * if an "unconnect"+connect occurs concurrently with 4301 * this send the datagram might be delivered to a 4302 * garbaled address. 4303 */ 4304 ASSERT(sti->sti_faddr_sa); 4305 name = sti->sti_faddr_sa; 4306 namelen = (t_uscalar_t)sti->sti_faddr_len; 4307 } 4308 } else { 4309 if (!(so_state & SS_ISCONNECTED) && 4310 (so_mode & SM_CONNREQUIRED)) { 4311 /* Required but not connected */ 4312 mutex_exit(&so->so_lock); 4313 return (ENOTCONN); 4314 } 4315 /* 4316 * Ignore the address on connection-oriented sockets. 4317 * Just like BSD this code does not generate an error for 4318 * TCP (a CONNREQUIRED socket) when sending to an address 4319 * passed in with sendto/sendmsg. Instead the data is 4320 * delivered on the connection as if no address had been 4321 * supplied. 4322 */ 4323 if ((so_state & SS_ISCONNECTED) && 4324 !(so_mode & SM_CONNREQUIRED)) { 4325 mutex_exit(&so->so_lock); 4326 return (EISCONN); 4327 } 4328 if (!(so_state & SS_ISBOUND)) { 4329 so_lock_single(so); /* Set SOLOCKED */ 4330 error = sotpi_bind(so, NULL, 0, 4331 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4332 so_unlock_single(so, SOLOCKED); 4333 if (error) { 4334 mutex_exit(&so->so_lock); 4335 eprintsoline(so, error); 4336 return (error); 4337 } 4338 } 4339 /* 4340 * Handle delayed datagram errors. These are only queued 4341 * when the application sets SO_DGRAM_ERRIND. 4342 * Return the error if we are sending to the address 4343 * that was returned in the last T_UDERROR_IND. 4344 * If sending to some other address discard the delayed 4345 * error indication. 4346 */ 4347 if (sti->sti_delayed_error) { 4348 struct T_uderror_ind *tudi; 4349 void *addr; 4350 t_uscalar_t addrlen; 4351 boolean_t match = B_FALSE; 4352 4353 ASSERT(sti->sti_eaddr_mp); 4354 error = sti->sti_delayed_error; 4355 sti->sti_delayed_error = 0; 4356 tudi = 4357 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4358 addrlen = tudi->DEST_length; 4359 addr = sogetoff(sti->sti_eaddr_mp, 4360 tudi->DEST_offset, addrlen, 1); 4361 ASSERT(addr); /* Checked by strsock_proto */ 4362 switch (so->so_family) { 4363 case AF_INET: { 4364 /* Compare just IP address and port */ 4365 sin_t *sin1 = (sin_t *)name; 4366 sin_t *sin2 = (sin_t *)addr; 4367 4368 if (addrlen == sizeof (sin_t) && 4369 namelen == addrlen && 4370 sin1->sin_port == sin2->sin_port && 4371 sin1->sin_addr.s_addr == 4372 sin2->sin_addr.s_addr) 4373 match = B_TRUE; 4374 break; 4375 } 4376 case AF_INET6: { 4377 /* Compare just IP address and port. Not flow */ 4378 sin6_t *sin1 = (sin6_t *)name; 4379 sin6_t *sin2 = (sin6_t *)addr; 4380 4381 if (addrlen == sizeof (sin6_t) && 4382 namelen == addrlen && 4383 sin1->sin6_port == sin2->sin6_port && 4384 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4385 &sin2->sin6_addr)) 4386 match = B_TRUE; 4387 break; 4388 } 4389 case AF_UNIX: 4390 default: 4391 if (namelen == addrlen && 4392 bcmp(name, addr, namelen) == 0) 4393 match = B_TRUE; 4394 } 4395 if (match) { 4396 freemsg(sti->sti_eaddr_mp); 4397 sti->sti_eaddr_mp = NULL; 4398 mutex_exit(&so->so_lock); 4399 #ifdef DEBUG 4400 dprintso(so, 0, 4401 ("sockfs delayed error %d for %s\n", 4402 error, 4403 pr_addr(so->so_family, name, namelen))); 4404 #endif /* DEBUG */ 4405 return (error); 4406 } 4407 freemsg(sti->sti_eaddr_mp); 4408 sti->sti_eaddr_mp = NULL; 4409 } 4410 } 4411 mutex_exit(&so->so_lock); 4412 4413 flags = msg->msg_flags; 4414 dontroute = 0; 4415 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4416 uint32_t val; 4417 4418 val = 1; 4419 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4420 &val, (t_uscalar_t)sizeof (val), cr); 4421 if (error) 4422 return (error); 4423 dontroute = 1; 4424 } 4425 4426 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4427 error = EOPNOTSUPP; 4428 goto done; 4429 } 4430 if (msg->msg_controllen != 0) { 4431 if (!(so_mode & SM_CONNREQUIRED)) { 4432 so_update_attrs(so, SOMOD); 4433 error = sosend_dgramcmsg(so, name, namelen, uiop, 4434 msg->msg_control, msg->msg_controllen, flags); 4435 } else { 4436 if (flags & MSG_OOB) { 4437 /* Can't generate T_EXDATA_REQ with options */ 4438 error = EOPNOTSUPP; 4439 goto done; 4440 } 4441 so_update_attrs(so, SOMOD); 4442 error = sosend_svccmsg(so, uiop, 4443 !(flags & MSG_EOR), 4444 msg->msg_control, msg->msg_controllen, 4445 flags); 4446 } 4447 goto done; 4448 } 4449 4450 so_update_attrs(so, SOMOD); 4451 if (!(so_mode & SM_CONNREQUIRED)) { 4452 /* 4453 * If there is no SO_DONTROUTE to turn off return immediately 4454 * from send_dgram. This can allow tail-call optimizations. 4455 */ 4456 if (!dontroute) { 4457 return (sosend_dgram(so, name, namelen, uiop, flags)); 4458 } 4459 error = sosend_dgram(so, name, namelen, uiop, flags); 4460 } else { 4461 t_scalar_t prim; 4462 int sflag; 4463 4464 /* Ignore msg_name in the connected state */ 4465 if (flags & MSG_OOB) { 4466 prim = T_EXDATA_REQ; 4467 /* 4468 * Send down T_EXDATA_REQ even if there is flow 4469 * control for data. 4470 */ 4471 sflag = MSG_IGNFLOW; 4472 } else { 4473 if (so_mode & SM_BYTESTREAM) { 4474 /* Byte stream transport - use write */ 4475 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4476 4477 /* Send M_DATA messages */ 4478 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4479 (error = nl7c_data(so, uiop)) >= 0) { 4480 /* NL7C consumed the data */ 4481 return (error); 4482 } 4483 /* 4484 * If there is no SO_DONTROUTE to turn off, 4485 * sti_direct is on, and there is no flow 4486 * control, we can take the fast path. 4487 */ 4488 if (!dontroute && sti->sti_direct != 0 && 4489 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4490 return (sostream_direct(so, uiop, 4491 NULL, cr)); 4492 } 4493 error = strwrite(SOTOV(so), uiop, cr); 4494 goto done; 4495 } 4496 prim = T_DATA_REQ; 4497 sflag = 0; 4498 } 4499 /* 4500 * If there is no SO_DONTROUTE to turn off return immediately 4501 * from sosend_svc. This can allow tail-call optimizations. 4502 */ 4503 if (!dontroute) 4504 return (sosend_svc(so, uiop, prim, 4505 !(flags & MSG_EOR), sflag)); 4506 error = sosend_svc(so, uiop, prim, 4507 !(flags & MSG_EOR), sflag); 4508 } 4509 ASSERT(dontroute); 4510 done: 4511 if (dontroute) { 4512 uint32_t val; 4513 4514 val = 0; 4515 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4516 &val, (t_uscalar_t)sizeof (val), cr); 4517 } 4518 return (error); 4519 } 4520 4521 /* 4522 * kstrwritemp() has very similar semantics as that of strwrite(). 4523 * The main difference is it obtains mblks from the caller and also 4524 * does not do any copy as done in strwrite() from user buffers to 4525 * kernel buffers. 4526 * 4527 * Currently, this routine is used by sendfile to send data allocated 4528 * within the kernel without any copying. This interface does not use the 4529 * synchronous stream interface as synch. stream interface implies 4530 * copying. 4531 */ 4532 int 4533 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4534 { 4535 struct stdata *stp; 4536 struct queue *wqp; 4537 mblk_t *newmp; 4538 char waitflag; 4539 int tempmode; 4540 int error = 0; 4541 int done = 0; 4542 struct sonode *so; 4543 boolean_t direct; 4544 4545 ASSERT(vp->v_stream); 4546 stp = vp->v_stream; 4547 4548 so = VTOSO(vp); 4549 direct = _SOTOTPI(so)->sti_direct; 4550 4551 /* 4552 * This is the sockfs direct fast path. canputnext() need 4553 * not be accurate so we don't grab the sd_lock here. If 4554 * we get flow-controlled, we grab sd_lock just before the 4555 * do..while loop below to emulate what strwrite() does. 4556 */ 4557 wqp = stp->sd_wrq; 4558 if (canputnext(wqp) && direct && 4559 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4560 return (sostream_direct(so, NULL, mp, CRED())); 4561 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4562 /* Fast check of flags before acquiring the lock */ 4563 mutex_enter(&stp->sd_lock); 4564 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4565 mutex_exit(&stp->sd_lock); 4566 if (error != 0) { 4567 if (!(stp->sd_flag & STPLEX) && 4568 (stp->sd_wput_opt & SW_SIGPIPE)) { 4569 error = EPIPE; 4570 } 4571 return (error); 4572 } 4573 } 4574 4575 waitflag = WRITEWAIT; 4576 if (stp->sd_flag & OLDNDELAY) 4577 tempmode = fmode & ~FNDELAY; 4578 else 4579 tempmode = fmode; 4580 4581 mutex_enter(&stp->sd_lock); 4582 do { 4583 if (canputnext(wqp)) { 4584 mutex_exit(&stp->sd_lock); 4585 if (stp->sd_wputdatafunc != NULL) { 4586 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4587 NULL, NULL, NULL); 4588 if (newmp == NULL) { 4589 /* The caller will free mp */ 4590 return (ECOMM); 4591 } 4592 mp = newmp; 4593 } 4594 putnext(wqp, mp); 4595 return (0); 4596 } 4597 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4598 &done); 4599 } while (error == 0 && !done); 4600 4601 mutex_exit(&stp->sd_lock); 4602 /* 4603 * EAGAIN tells the application to try again. ENOMEM 4604 * is returned only if the memory allocation size 4605 * exceeds the physical limits of the system. ENOMEM 4606 * can't be true here. 4607 */ 4608 if (error == ENOMEM) 4609 error = EAGAIN; 4610 return (error); 4611 } 4612 4613 /* ARGSUSED */ 4614 static int 4615 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4616 struct cred *cr, mblk_t **mpp) 4617 { 4618 int error; 4619 4620 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4621 return (EAFNOSUPPORT); 4622 4623 if (so->so_state & SS_CANTSENDMORE) 4624 return (EPIPE); 4625 4626 if (so->so_type != SOCK_STREAM) 4627 return (EOPNOTSUPP); 4628 4629 if ((so->so_state & SS_ISCONNECTED) == 0) 4630 return (ENOTCONN); 4631 4632 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4633 if (error == 0) 4634 *mpp = NULL; 4635 return (error); 4636 } 4637 4638 /* 4639 * Sending data on a datagram socket. 4640 * Assumes caller has verified that SS_ISBOUND etc. are set. 4641 */ 4642 /* ARGSUSED */ 4643 static int 4644 sodgram_direct(struct sonode *so, struct sockaddr *name, 4645 socklen_t namelen, struct uio *uiop, int flags) 4646 { 4647 struct T_unitdata_req tudr; 4648 mblk_t *mp = NULL; 4649 int error = 0; 4650 void *addr; 4651 socklen_t addrlen; 4652 ssize_t len; 4653 struct stdata *stp = SOTOV(so)->v_stream; 4654 int so_state; 4655 queue_t *udp_wq; 4656 boolean_t connected; 4657 mblk_t *mpdata = NULL; 4658 sotpi_info_t *sti = SOTOTPI(so); 4659 uint32_t auditing = AU_AUDITING(); 4660 4661 ASSERT(name != NULL && namelen != 0); 4662 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4663 ASSERT(!(so->so_mode & SM_EXDATA)); 4664 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4665 ASSERT(SOTOV(so)->v_type == VSOCK); 4666 4667 /* Caller checked for proper length */ 4668 len = uiop->uio_resid; 4669 ASSERT(len <= sti->sti_tidu_size); 4670 4671 /* Length and family checks have been done by caller */ 4672 ASSERT(name->sa_family == so->so_family); 4673 ASSERT(so->so_family == AF_INET || 4674 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4675 ASSERT(so->so_family == AF_INET6 || 4676 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4677 4678 addr = name; 4679 addrlen = namelen; 4680 4681 if (stp->sd_sidp != NULL && 4682 (error = straccess(stp, JCWRITE)) != 0) 4683 goto done; 4684 4685 so_state = so->so_state; 4686 4687 connected = so_state & SS_ISCONNECTED; 4688 if (!connected) { 4689 tudr.PRIM_type = T_UNITDATA_REQ; 4690 tudr.DEST_length = addrlen; 4691 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4692 tudr.OPT_length = 0; 4693 tudr.OPT_offset = 0; 4694 4695 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4696 _ALLOC_INTR, CRED()); 4697 if (mp == NULL) { 4698 /* 4699 * Caught a signal waiting for memory. 4700 * Let send* return EINTR. 4701 */ 4702 error = EINTR; 4703 goto done; 4704 } 4705 } 4706 4707 /* 4708 * For UDP we don't break up the copyin into smaller pieces 4709 * as in the TCP case. That means if ENOMEM is returned by 4710 * mcopyinuio() then the uio vector has not been modified at 4711 * all and we fallback to either strwrite() or kstrputmsg() 4712 * below. Note also that we never generate priority messages 4713 * from here. 4714 */ 4715 udp_wq = stp->sd_wrq->q_next; 4716 if (canput(udp_wq) && 4717 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4718 ASSERT(DB_TYPE(mpdata) == M_DATA); 4719 ASSERT(uiop->uio_resid == 0); 4720 if (!connected) 4721 linkb(mp, mpdata); 4722 else 4723 mp = mpdata; 4724 if (auditing) 4725 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4726 4727 udp_wput(udp_wq, mp); 4728 return (0); 4729 } 4730 4731 ASSERT(mpdata == NULL); 4732 if (error != 0 && error != ENOMEM) { 4733 freemsg(mp); 4734 return (error); 4735 } 4736 4737 /* 4738 * For connected, let strwrite() handle the blocking case. 4739 * Otherwise we fall thru and use kstrputmsg(). 4740 */ 4741 if (connected) 4742 return (strwrite(SOTOV(so), uiop, CRED())); 4743 4744 if (auditing) 4745 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4746 4747 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4748 done: 4749 #ifdef SOCK_DEBUG 4750 if (error != 0) { 4751 eprintsoline(so, error); 4752 } 4753 #endif /* SOCK_DEBUG */ 4754 return (error); 4755 } 4756 4757 int 4758 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4759 { 4760 struct stdata *stp = SOTOV(so)->v_stream; 4761 ssize_t iosize, rmax, maxblk; 4762 queue_t *tcp_wq = stp->sd_wrq->q_next; 4763 mblk_t *newmp; 4764 int error = 0, wflag = 0; 4765 4766 ASSERT(so->so_mode & SM_BYTESTREAM); 4767 ASSERT(SOTOV(so)->v_type == VSOCK); 4768 4769 if (stp->sd_sidp != NULL && 4770 (error = straccess(stp, JCWRITE)) != 0) 4771 return (error); 4772 4773 if (uiop == NULL) { 4774 /* 4775 * kstrwritemp() should have checked sd_flag and 4776 * flow-control before coming here. If we end up 4777 * here it means that we can simply pass down the 4778 * data to tcp. 4779 */ 4780 ASSERT(mp != NULL); 4781 if (stp->sd_wputdatafunc != NULL) { 4782 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4783 NULL, NULL, NULL); 4784 if (newmp == NULL) { 4785 /* The caller will free mp */ 4786 return (ECOMM); 4787 } 4788 mp = newmp; 4789 } 4790 tcp_wput(tcp_wq, mp); 4791 return (0); 4792 } 4793 4794 /* Fallback to strwrite() to do proper error handling */ 4795 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4796 return (strwrite(SOTOV(so), uiop, cr)); 4797 4798 rmax = stp->sd_qn_maxpsz; 4799 ASSERT(rmax >= 0 || rmax == INFPSZ); 4800 if (rmax == 0 || uiop->uio_resid <= 0) 4801 return (0); 4802 4803 if (rmax == INFPSZ) 4804 rmax = uiop->uio_resid; 4805 4806 maxblk = stp->sd_maxblk; 4807 4808 for (;;) { 4809 iosize = MIN(uiop->uio_resid, rmax); 4810 4811 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4812 if (mp == NULL) { 4813 /* 4814 * Fallback to strwrite() for ENOMEM; if this 4815 * is our first time in this routine and the uio 4816 * vector has not been modified, we will end up 4817 * calling strwrite() without any flag set. 4818 */ 4819 if (error == ENOMEM) 4820 goto slow_send; 4821 else 4822 return (error); 4823 } 4824 ASSERT(uiop->uio_resid >= 0); 4825 /* 4826 * If mp is non-NULL and ENOMEM is set, it means that 4827 * mcopyinuio() was able to break down some of the user 4828 * data into one or more mblks. Send the partial data 4829 * to tcp and let the rest be handled in strwrite(). 4830 */ 4831 ASSERT(error == 0 || error == ENOMEM); 4832 if (stp->sd_wputdatafunc != NULL) { 4833 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4834 NULL, NULL, NULL); 4835 if (newmp == NULL) { 4836 /* The caller will free mp */ 4837 return (ECOMM); 4838 } 4839 mp = newmp; 4840 } 4841 tcp_wput(tcp_wq, mp); 4842 4843 wflag |= NOINTR; 4844 4845 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4846 ASSERT(error == 0); 4847 break; 4848 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4849 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4850 slow_send: 4851 /* 4852 * We were able to send down partial data using 4853 * the direct call interface, but are now relying 4854 * on strwrite() to handle the non-fastpath cases. 4855 * If the socket is blocking we will sleep in 4856 * strwaitq() until write is permitted, otherwise, 4857 * we will need to return the amount of bytes 4858 * written so far back to the app. This is the 4859 * reason why we pass NOINTR flag to strwrite() 4860 * for non-blocking socket, because we don't want 4861 * to return EAGAIN when portion of the user data 4862 * has actually been sent down. 4863 */ 4864 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4865 } 4866 } 4867 return (0); 4868 } 4869 4870 /* 4871 * Update sti_faddr by asking the transport (unless AF_UNIX). 4872 */ 4873 /* ARGSUSED */ 4874 int 4875 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4876 boolean_t accept, struct cred *cr) 4877 { 4878 struct strbuf strbuf; 4879 int error = 0, res; 4880 void *addr; 4881 t_uscalar_t addrlen; 4882 k_sigset_t smask; 4883 sotpi_info_t *sti = SOTOTPI(so); 4884 4885 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4886 (void *)so, pr_state(so->so_state, so->so_mode))); 4887 4888 ASSERT(*namelen > 0); 4889 mutex_enter(&so->so_lock); 4890 so_lock_single(so); /* Set SOLOCKED */ 4891 4892 if (accept) { 4893 bcopy(sti->sti_faddr_sa, name, 4894 MIN(*namelen, sti->sti_faddr_len)); 4895 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4896 goto done; 4897 } 4898 4899 if (!(so->so_state & SS_ISCONNECTED)) { 4900 error = ENOTCONN; 4901 goto done; 4902 } 4903 /* Added this check for X/Open */ 4904 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4905 error = EINVAL; 4906 if (xnet_check_print) { 4907 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4908 } 4909 goto done; 4910 } 4911 4912 if (sti->sti_faddr_valid) { 4913 bcopy(sti->sti_faddr_sa, name, 4914 MIN(*namelen, sti->sti_faddr_len)); 4915 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4916 goto done; 4917 } 4918 4919 #ifdef DEBUG 4920 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4921 pr_addr(so->so_family, sti->sti_faddr_sa, 4922 (t_uscalar_t)sti->sti_faddr_len))); 4923 #endif /* DEBUG */ 4924 4925 if (so->so_family == AF_UNIX) { 4926 /* Transport has different name space - return local info */ 4927 if (sti->sti_faddr_noxlate) 4928 *namelen = 0; 4929 error = 0; 4930 goto done; 4931 } 4932 4933 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4934 4935 ASSERT(sti->sti_faddr_sa); 4936 /* Allocate local buffer to use with ioctl */ 4937 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4938 mutex_exit(&so->so_lock); 4939 addr = kmem_alloc(addrlen, KM_SLEEP); 4940 4941 /* 4942 * Issue TI_GETPEERNAME with signals masked. 4943 * Put the result in sti_faddr_sa so that getpeername works after 4944 * a shutdown(output). 4945 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4946 * back to the socket. 4947 */ 4948 strbuf.buf = addr; 4949 strbuf.maxlen = addrlen; 4950 strbuf.len = 0; 4951 4952 sigintr(&smask, 0); 4953 res = 0; 4954 ASSERT(cr); 4955 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4956 0, K_TO_K, cr, &res); 4957 sigunintr(&smask); 4958 4959 mutex_enter(&so->so_lock); 4960 /* 4961 * If there is an error record the error in so_error put don't fail 4962 * the getpeername. Instead fallback on the recorded 4963 * sti->sti_faddr_sa. 4964 */ 4965 if (error) { 4966 /* 4967 * Various stream head errors can be returned to the ioctl. 4968 * However, it is impossible to determine which ones of 4969 * these are really socket level errors that were incorrectly 4970 * consumed by the ioctl. Thus this code silently ignores the 4971 * error - to code explicitly does not reinstate the error 4972 * using soseterror(). 4973 * Experiments have shows that at least this set of 4974 * errors are reported and should not be reinstated on the 4975 * socket: 4976 * EINVAL E.g. if an I_LINK was in effect when 4977 * getpeername was called. 4978 * EPIPE The ioctl error semantics prefer the write 4979 * side error over the read side error. 4980 * ENOTCONN The transport just got disconnected but 4981 * sockfs had not yet seen the T_DISCON_IND 4982 * when issuing the ioctl. 4983 */ 4984 error = 0; 4985 } else if (res == 0 && strbuf.len > 0 && 4986 (so->so_state & SS_ISCONNECTED)) { 4987 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4988 sti->sti_faddr_len = (socklen_t)strbuf.len; 4989 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4990 sti->sti_faddr_valid = 1; 4991 4992 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4993 *namelen = sti->sti_faddr_len; 4994 } 4995 kmem_free(addr, addrlen); 4996 #ifdef DEBUG 4997 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4998 pr_addr(so->so_family, sti->sti_faddr_sa, 4999 (t_uscalar_t)sti->sti_faddr_len))); 5000 #endif /* DEBUG */ 5001 done: 5002 so_unlock_single(so, SOLOCKED); 5003 mutex_exit(&so->so_lock); 5004 return (error); 5005 } 5006 5007 /* 5008 * Update sti_laddr by asking the transport (unless AF_UNIX). 5009 */ 5010 int 5011 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5012 struct cred *cr) 5013 { 5014 struct strbuf strbuf; 5015 int error = 0, res; 5016 void *addr; 5017 t_uscalar_t addrlen; 5018 k_sigset_t smask; 5019 sotpi_info_t *sti = SOTOTPI(so); 5020 5021 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5022 (void *)so, pr_state(so->so_state, so->so_mode))); 5023 5024 ASSERT(*namelen > 0); 5025 mutex_enter(&so->so_lock); 5026 so_lock_single(so); /* Set SOLOCKED */ 5027 5028 #ifdef DEBUG 5029 5030 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5031 pr_addr(so->so_family, sti->sti_laddr_sa, 5032 (t_uscalar_t)sti->sti_laddr_len))); 5033 #endif /* DEBUG */ 5034 if (sti->sti_laddr_valid) { 5035 bcopy(sti->sti_laddr_sa, name, 5036 MIN(*namelen, sti->sti_laddr_len)); 5037 *namelen = sti->sti_laddr_len; 5038 goto done; 5039 } 5040 5041 if (so->so_family == AF_UNIX) { 5042 /* Transport has different name space - return local info */ 5043 error = 0; 5044 *namelen = 0; 5045 goto done; 5046 } 5047 if (!(so->so_state & SS_ISBOUND)) { 5048 /* If not bound, then nothing to return. */ 5049 error = 0; 5050 goto done; 5051 } 5052 5053 /* Allocate local buffer to use with ioctl */ 5054 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5055 mutex_exit(&so->so_lock); 5056 addr = kmem_alloc(addrlen, KM_SLEEP); 5057 5058 /* 5059 * Issue TI_GETMYNAME with signals masked. 5060 * Put the result in sti_laddr_sa so that getsockname works after 5061 * a shutdown(output). 5062 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5063 * back to the socket. 5064 */ 5065 strbuf.buf = addr; 5066 strbuf.maxlen = addrlen; 5067 strbuf.len = 0; 5068 5069 sigintr(&smask, 0); 5070 res = 0; 5071 ASSERT(cr); 5072 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5073 0, K_TO_K, cr, &res); 5074 sigunintr(&smask); 5075 5076 mutex_enter(&so->so_lock); 5077 /* 5078 * If there is an error record the error in so_error put don't fail 5079 * the getsockname. Instead fallback on the recorded 5080 * sti->sti_laddr_sa. 5081 */ 5082 if (error) { 5083 /* 5084 * Various stream head errors can be returned to the ioctl. 5085 * However, it is impossible to determine which ones of 5086 * these are really socket level errors that were incorrectly 5087 * consumed by the ioctl. Thus this code silently ignores the 5088 * error - to code explicitly does not reinstate the error 5089 * using soseterror(). 5090 * Experiments have shows that at least this set of 5091 * errors are reported and should not be reinstated on the 5092 * socket: 5093 * EINVAL E.g. if an I_LINK was in effect when 5094 * getsockname was called. 5095 * EPIPE The ioctl error semantics prefer the write 5096 * side error over the read side error. 5097 */ 5098 error = 0; 5099 } else if (res == 0 && strbuf.len > 0 && 5100 (so->so_state & SS_ISBOUND)) { 5101 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5102 sti->sti_laddr_len = (socklen_t)strbuf.len; 5103 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5104 sti->sti_laddr_valid = 1; 5105 5106 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5107 *namelen = sti->sti_laddr_len; 5108 } 5109 kmem_free(addr, addrlen); 5110 #ifdef DEBUG 5111 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5112 pr_addr(so->so_family, sti->sti_laddr_sa, 5113 (t_uscalar_t)sti->sti_laddr_len))); 5114 #endif /* DEBUG */ 5115 done: 5116 so_unlock_single(so, SOLOCKED); 5117 mutex_exit(&so->so_lock); 5118 return (error); 5119 } 5120 5121 /* 5122 * Get socket options. For SOL_SOCKET options some options are handled 5123 * by the sockfs while others use the value recorded in the sonode as a 5124 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5125 * 5126 * On the return most *optlenp bytes are copied to optval. 5127 */ 5128 /* ARGSUSED */ 5129 int 5130 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5131 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5132 { 5133 struct T_optmgmt_req optmgmt_req; 5134 struct T_optmgmt_ack *optmgmt_ack; 5135 struct opthdr oh; 5136 struct opthdr *opt_res; 5137 mblk_t *mp = NULL; 5138 int error = 0; 5139 void *option = NULL; /* Set if fallback value */ 5140 t_uscalar_t maxlen = *optlenp; 5141 t_uscalar_t len; 5142 uint32_t value; 5143 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5144 struct timeval32 tmo_val32; 5145 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5146 5147 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5148 (void *)so, level, option_name, optval, (void *)optlenp, 5149 pr_state(so->so_state, so->so_mode))); 5150 5151 mutex_enter(&so->so_lock); 5152 so_lock_single(so); /* Set SOLOCKED */ 5153 5154 /* 5155 * Check for SOL_SOCKET options. 5156 * Certain SOL_SOCKET options are returned directly whereas 5157 * others only provide a default (fallback) value should 5158 * the T_SVR4_OPTMGMT_REQ fail. 5159 */ 5160 if (level == SOL_SOCKET) { 5161 /* Check parameters */ 5162 switch (option_name) { 5163 case SO_TYPE: 5164 case SO_ERROR: 5165 case SO_DEBUG: 5166 case SO_ACCEPTCONN: 5167 case SO_REUSEADDR: 5168 case SO_KEEPALIVE: 5169 case SO_DONTROUTE: 5170 case SO_BROADCAST: 5171 case SO_USELOOPBACK: 5172 case SO_OOBINLINE: 5173 case SO_SNDBUF: 5174 case SO_RCVBUF: 5175 #ifdef notyet 5176 case SO_SNDLOWAT: 5177 case SO_RCVLOWAT: 5178 #endif /* notyet */ 5179 case SO_DOMAIN: 5180 case SO_DGRAM_ERRIND: 5181 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5182 error = EINVAL; 5183 eprintsoline(so, error); 5184 goto done2; 5185 } 5186 break; 5187 case SO_RCVTIMEO: 5188 case SO_SNDTIMEO: 5189 if (get_udatamodel() == DATAMODEL_NONE || 5190 get_udatamodel() == DATAMODEL_NATIVE) { 5191 if (maxlen < sizeof (struct timeval)) { 5192 error = EINVAL; 5193 eprintsoline(so, error); 5194 goto done2; 5195 } 5196 } else { 5197 if (maxlen < sizeof (struct timeval32)) { 5198 error = EINVAL; 5199 eprintsoline(so, error); 5200 goto done2; 5201 } 5202 5203 } 5204 break; 5205 case SO_LINGER: 5206 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5207 error = EINVAL; 5208 eprintsoline(so, error); 5209 goto done2; 5210 } 5211 break; 5212 case SO_SND_BUFINFO: 5213 if (maxlen < (t_uscalar_t) 5214 sizeof (struct so_snd_bufinfo)) { 5215 error = EINVAL; 5216 eprintsoline(so, error); 5217 goto done2; 5218 } 5219 break; 5220 } 5221 5222 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5223 5224 switch (option_name) { 5225 case SO_TYPE: 5226 value = so->so_type; 5227 option = &value; 5228 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5229 5230 case SO_ERROR: 5231 value = sogeterr(so, B_TRUE); 5232 option = &value; 5233 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5234 5235 case SO_ACCEPTCONN: 5236 if (so->so_state & SS_ACCEPTCONN) 5237 value = SO_ACCEPTCONN; 5238 else 5239 value = 0; 5240 #ifdef DEBUG 5241 if (value) { 5242 dprintso(so, 1, 5243 ("sotpi_getsockopt: 0x%x is set\n", 5244 option_name)); 5245 } else { 5246 dprintso(so, 1, 5247 ("sotpi_getsockopt: 0x%x not set\n", 5248 option_name)); 5249 } 5250 #endif /* DEBUG */ 5251 option = &value; 5252 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5253 5254 case SO_DEBUG: 5255 case SO_REUSEADDR: 5256 case SO_KEEPALIVE: 5257 case SO_DONTROUTE: 5258 case SO_BROADCAST: 5259 case SO_USELOOPBACK: 5260 case SO_OOBINLINE: 5261 case SO_DGRAM_ERRIND: 5262 value = (so->so_options & option_name); 5263 #ifdef DEBUG 5264 if (value) { 5265 dprintso(so, 1, 5266 ("sotpi_getsockopt: 0x%x is set\n", 5267 option_name)); 5268 } else { 5269 dprintso(so, 1, 5270 ("sotpi_getsockopt: 0x%x not set\n", 5271 option_name)); 5272 } 5273 #endif /* DEBUG */ 5274 option = &value; 5275 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5276 5277 /* 5278 * The following options are only returned by sockfs when the 5279 * T_SVR4_OPTMGMT_REQ fails. 5280 */ 5281 case SO_LINGER: 5282 option = &so->so_linger; 5283 len = (t_uscalar_t)sizeof (struct linger); 5284 break; 5285 case SO_SNDBUF: { 5286 ssize_t lvalue; 5287 5288 /* 5289 * If the option has not been set then get a default 5290 * value from the read queue. This value is 5291 * returned if the transport fails 5292 * the T_SVR4_OPTMGMT_REQ. 5293 */ 5294 lvalue = so->so_sndbuf; 5295 if (lvalue == 0) { 5296 mutex_exit(&so->so_lock); 5297 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5298 QHIWAT, 0, &lvalue); 5299 mutex_enter(&so->so_lock); 5300 dprintso(so, 1, 5301 ("got SO_SNDBUF %ld from q\n", lvalue)); 5302 } 5303 value = (int)lvalue; 5304 option = &value; 5305 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5306 break; 5307 } 5308 case SO_RCVBUF: { 5309 ssize_t lvalue; 5310 5311 /* 5312 * If the option has not been set then get a default 5313 * value from the read queue. This value is 5314 * returned if the transport fails 5315 * the T_SVR4_OPTMGMT_REQ. 5316 * 5317 * XXX If SO_RCVBUF has been set and this is an 5318 * XPG 4.2 application then do not ask the transport 5319 * since the transport might adjust the value and not 5320 * return exactly what was set by the application. 5321 * For non-XPG 4.2 application we return the value 5322 * that the transport is actually using. 5323 */ 5324 lvalue = so->so_rcvbuf; 5325 if (lvalue == 0) { 5326 mutex_exit(&so->so_lock); 5327 (void) strqget(RD(strvp2wq(SOTOV(so))), 5328 QHIWAT, 0, &lvalue); 5329 mutex_enter(&so->so_lock); 5330 dprintso(so, 1, 5331 ("got SO_RCVBUF %ld from q\n", lvalue)); 5332 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5333 value = (int)lvalue; 5334 option = &value; 5335 goto copyout; /* skip asking transport */ 5336 } 5337 value = (int)lvalue; 5338 option = &value; 5339 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5340 break; 5341 } 5342 case SO_DOMAIN: 5343 value = so->so_family; 5344 option = &value; 5345 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5346 5347 #ifdef notyet 5348 /* 5349 * We do not implement the semantics of these options 5350 * thus we shouldn't implement the options either. 5351 */ 5352 case SO_SNDLOWAT: 5353 value = so->so_sndlowat; 5354 option = &value; 5355 break; 5356 case SO_RCVLOWAT: 5357 value = so->so_rcvlowat; 5358 option = &value; 5359 break; 5360 #endif /* notyet */ 5361 case SO_SNDTIMEO: 5362 case SO_RCVTIMEO: { 5363 clock_t val; 5364 5365 if (option_name == SO_RCVTIMEO) 5366 val = drv_hztousec(so->so_rcvtimeo); 5367 else 5368 val = drv_hztousec(so->so_sndtimeo); 5369 tmo_val.tv_sec = val / (1000 * 1000); 5370 tmo_val.tv_usec = val % (1000 * 1000); 5371 if (get_udatamodel() == DATAMODEL_NONE || 5372 get_udatamodel() == DATAMODEL_NATIVE) { 5373 option = &tmo_val; 5374 len = sizeof (struct timeval); 5375 } else { 5376 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5377 option = &tmo_val32; 5378 len = sizeof (struct timeval32); 5379 } 5380 break; 5381 } 5382 case SO_SND_BUFINFO: { 5383 snd_bufinfo.sbi_wroff = 5384 (so->so_proto_props).sopp_wroff; 5385 snd_bufinfo.sbi_maxblk = 5386 (so->so_proto_props).sopp_maxblk; 5387 snd_bufinfo.sbi_maxpsz = 5388 (so->so_proto_props).sopp_maxpsz; 5389 snd_bufinfo.sbi_tail = 5390 (so->so_proto_props).sopp_tail; 5391 option = &snd_bufinfo; 5392 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5393 break; 5394 } 5395 } 5396 } 5397 5398 mutex_exit(&so->so_lock); 5399 5400 /* Send request */ 5401 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5402 optmgmt_req.MGMT_flags = T_CHECK; 5403 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5404 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5405 5406 oh.level = level; 5407 oh.name = option_name; 5408 oh.len = maxlen; 5409 5410 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5411 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5412 /* Let option management work in the presence of data flow control */ 5413 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5414 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5415 mp = NULL; 5416 mutex_enter(&so->so_lock); 5417 if (error) { 5418 eprintsoline(so, error); 5419 goto done2; 5420 } 5421 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5422 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5423 if (error) { 5424 if (option != NULL) { 5425 /* We have a fallback value */ 5426 error = 0; 5427 goto copyout; 5428 } 5429 eprintsoline(so, error); 5430 goto done2; 5431 } 5432 ASSERT(mp); 5433 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5434 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5435 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5436 if (opt_res == NULL) { 5437 if (option != NULL) { 5438 /* We have a fallback value */ 5439 error = 0; 5440 goto copyout; 5441 } 5442 error = EPROTO; 5443 eprintsoline(so, error); 5444 goto done; 5445 } 5446 option = &opt_res[1]; 5447 5448 /* check to ensure that the option is within bounds */ 5449 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5450 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5451 if (option != NULL) { 5452 /* We have a fallback value */ 5453 error = 0; 5454 goto copyout; 5455 } 5456 error = EPROTO; 5457 eprintsoline(so, error); 5458 goto done; 5459 } 5460 5461 len = opt_res->len; 5462 5463 copyout: { 5464 t_uscalar_t size = MIN(len, maxlen); 5465 bcopy(option, optval, size); 5466 bcopy(&size, optlenp, sizeof (size)); 5467 } 5468 done: 5469 freemsg(mp); 5470 done2: 5471 so_unlock_single(so, SOLOCKED); 5472 mutex_exit(&so->so_lock); 5473 5474 return (error); 5475 } 5476 5477 /* 5478 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5479 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5480 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5481 * setsockopt has to work even if the transport does not support the option. 5482 */ 5483 /* ARGSUSED */ 5484 int 5485 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5486 const void *optval, t_uscalar_t optlen, struct cred *cr) 5487 { 5488 struct T_optmgmt_req optmgmt_req; 5489 struct opthdr oh; 5490 mblk_t *mp; 5491 int error = 0; 5492 boolean_t handled = B_FALSE; 5493 5494 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5495 (void *)so, level, option_name, optval, optlen, 5496 pr_state(so->so_state, so->so_mode))); 5497 5498 /* X/Open requires this check */ 5499 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5500 if (xnet_check_print) 5501 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5502 return (EINVAL); 5503 } 5504 5505 mutex_enter(&so->so_lock); 5506 so_lock_single(so); /* Set SOLOCKED */ 5507 mutex_exit(&so->so_lock); 5508 5509 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5510 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5511 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5512 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5513 5514 oh.level = level; 5515 oh.name = option_name; 5516 oh.len = optlen; 5517 5518 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5519 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5520 /* Let option management work in the presence of data flow control */ 5521 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5522 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5523 mp = NULL; 5524 mutex_enter(&so->so_lock); 5525 if (error) { 5526 eprintsoline(so, error); 5527 goto done2; 5528 } 5529 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5530 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5531 if (error) { 5532 eprintsoline(so, error); 5533 goto done; 5534 } 5535 ASSERT(mp); 5536 /* No need to verify T_optmgmt_ack */ 5537 freemsg(mp); 5538 done: 5539 /* 5540 * Check for SOL_SOCKET options and record their values. 5541 * If we know about a SOL_SOCKET parameter and the transport 5542 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5543 * EPROTO) we let the setsockopt succeed. 5544 */ 5545 if (level == SOL_SOCKET) { 5546 /* Check parameters */ 5547 switch (option_name) { 5548 case SO_DEBUG: 5549 case SO_REUSEADDR: 5550 case SO_KEEPALIVE: 5551 case SO_DONTROUTE: 5552 case SO_BROADCAST: 5553 case SO_USELOOPBACK: 5554 case SO_OOBINLINE: 5555 case SO_SNDBUF: 5556 case SO_RCVBUF: 5557 #ifdef notyet 5558 case SO_SNDLOWAT: 5559 case SO_RCVLOWAT: 5560 #endif /* notyet */ 5561 case SO_DGRAM_ERRIND: 5562 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5563 error = EINVAL; 5564 eprintsoline(so, error); 5565 goto done2; 5566 } 5567 ASSERT(optval); 5568 handled = B_TRUE; 5569 break; 5570 case SO_SNDTIMEO: 5571 case SO_RCVTIMEO: 5572 if (get_udatamodel() == DATAMODEL_NONE || 5573 get_udatamodel() == DATAMODEL_NATIVE) { 5574 if (optlen != sizeof (struct timeval)) { 5575 error = EINVAL; 5576 eprintsoline(so, error); 5577 goto done2; 5578 } 5579 } else { 5580 if (optlen != sizeof (struct timeval32)) { 5581 error = EINVAL; 5582 eprintsoline(so, error); 5583 goto done2; 5584 } 5585 } 5586 ASSERT(optval); 5587 handled = B_TRUE; 5588 break; 5589 case SO_LINGER: 5590 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5591 error = EINVAL; 5592 eprintsoline(so, error); 5593 goto done2; 5594 } 5595 ASSERT(optval); 5596 handled = B_TRUE; 5597 break; 5598 } 5599 5600 #define intvalue (*(int32_t *)optval) 5601 5602 switch (option_name) { 5603 case SO_TYPE: 5604 case SO_ERROR: 5605 case SO_ACCEPTCONN: 5606 /* Can't be set */ 5607 error = ENOPROTOOPT; 5608 goto done2; 5609 case SO_LINGER: { 5610 struct linger *l = (struct linger *)optval; 5611 5612 so->so_linger.l_linger = l->l_linger; 5613 if (l->l_onoff) { 5614 so->so_linger.l_onoff = SO_LINGER; 5615 so->so_options |= SO_LINGER; 5616 } else { 5617 so->so_linger.l_onoff = 0; 5618 so->so_options &= ~SO_LINGER; 5619 } 5620 break; 5621 } 5622 5623 case SO_DEBUG: 5624 #ifdef SOCK_TEST 5625 if (intvalue & 2) 5626 sock_test_timelimit = 10 * hz; 5627 else 5628 sock_test_timelimit = 0; 5629 5630 if (intvalue & 4) 5631 do_useracc = 0; 5632 else 5633 do_useracc = 1; 5634 #endif /* SOCK_TEST */ 5635 /* FALLTHRU */ 5636 case SO_REUSEADDR: 5637 case SO_KEEPALIVE: 5638 case SO_DONTROUTE: 5639 case SO_BROADCAST: 5640 case SO_USELOOPBACK: 5641 case SO_OOBINLINE: 5642 case SO_DGRAM_ERRIND: 5643 if (intvalue != 0) { 5644 dprintso(so, 1, 5645 ("socket_setsockopt: setting 0x%x\n", 5646 option_name)); 5647 so->so_options |= option_name; 5648 } else { 5649 dprintso(so, 1, 5650 ("socket_setsockopt: clearing 0x%x\n", 5651 option_name)); 5652 so->so_options &= ~option_name; 5653 } 5654 break; 5655 /* 5656 * The following options are only returned by us when the 5657 * transport layer fails. 5658 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5659 * since the transport might adjust the value and not 5660 * return exactly what was set by the application. 5661 */ 5662 case SO_SNDBUF: 5663 so->so_sndbuf = intvalue; 5664 break; 5665 case SO_RCVBUF: 5666 so->so_rcvbuf = intvalue; 5667 break; 5668 case SO_RCVPSH: 5669 so->so_rcv_timer_interval = intvalue; 5670 break; 5671 #ifdef notyet 5672 /* 5673 * We do not implement the semantics of these options 5674 * thus we shouldn't implement the options either. 5675 */ 5676 case SO_SNDLOWAT: 5677 so->so_sndlowat = intvalue; 5678 break; 5679 case SO_RCVLOWAT: 5680 so->so_rcvlowat = intvalue; 5681 break; 5682 #endif /* notyet */ 5683 case SO_SNDTIMEO: 5684 case SO_RCVTIMEO: { 5685 struct timeval tl; 5686 clock_t val; 5687 5688 if (get_udatamodel() == DATAMODEL_NONE || 5689 get_udatamodel() == DATAMODEL_NATIVE) 5690 bcopy(&tl, (struct timeval *)optval, 5691 sizeof (struct timeval)); 5692 else 5693 TIMEVAL32_TO_TIMEVAL(&tl, 5694 (struct timeval32 *)optval); 5695 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5696 if (option_name == SO_RCVTIMEO) 5697 so->so_rcvtimeo = drv_usectohz(val); 5698 else 5699 so->so_sndtimeo = drv_usectohz(val); 5700 break; 5701 } 5702 } 5703 #undef intvalue 5704 5705 if (error) { 5706 if ((error == ENOPROTOOPT || error == EPROTO || 5707 error == EINVAL) && handled) { 5708 dprintso(so, 1, 5709 ("setsockopt: ignoring error %d for 0x%x\n", 5710 error, option_name)); 5711 error = 0; 5712 } 5713 } 5714 } 5715 done2: 5716 so_unlock_single(so, SOLOCKED); 5717 mutex_exit(&so->so_lock); 5718 return (error); 5719 } 5720 5721 /* 5722 * sotpi_close() is called when the last open reference goes away. 5723 */ 5724 /* ARGSUSED */ 5725 int 5726 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5727 { 5728 struct vnode *vp = SOTOV(so); 5729 dev_t dev; 5730 int error = 0; 5731 sotpi_info_t *sti = SOTOTPI(so); 5732 5733 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5734 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5735 5736 dev = sti->sti_dev; 5737 5738 ASSERT(STREAMSTAB(getmajor(dev))); 5739 5740 mutex_enter(&so->so_lock); 5741 so_lock_single(so); /* Set SOLOCKED */ 5742 5743 ASSERT(so_verify_oobstate(so)); 5744 5745 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5746 sti->sti_nl7c_flags = 0; 5747 nl7c_close(so); 5748 } 5749 5750 if (vp->v_stream != NULL) { 5751 vnode_t *ux_vp; 5752 5753 if (so->so_family == AF_UNIX) { 5754 /* Could avoid this when CANTSENDMORE for !dgram */ 5755 so_unix_close(so); 5756 } 5757 5758 mutex_exit(&so->so_lock); 5759 /* 5760 * Disassemble the linkage from the AF_UNIX underlying file 5761 * system vnode to this socket (by atomically clearing 5762 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5763 * and frees the stream head. 5764 */ 5765 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5766 ASSERT(ux_vp->v_stream); 5767 sti->sti_ux_bound_vp = NULL; 5768 vn_rele_stream(ux_vp); 5769 } 5770 if (so->so_family == AF_INET || so->so_family == AF_INET6) { 5771 strsetrwputdatahooks(SOTOV(so), NULL, NULL); 5772 if (sti->sti_kssl_ent != NULL) { 5773 kssl_release_ent(sti->sti_kssl_ent, so, 5774 sti->sti_kssl_type); 5775 sti->sti_kssl_ent = NULL; 5776 } 5777 if (sti->sti_kssl_ctx != NULL) { 5778 kssl_release_ctx(sti->sti_kssl_ctx); 5779 sti->sti_kssl_ctx = NULL; 5780 } 5781 sti->sti_kssl_type = KSSL_NO_PROXY; 5782 } 5783 error = strclose(vp, flag, cr); 5784 vp->v_stream = NULL; 5785 mutex_enter(&so->so_lock); 5786 } 5787 5788 /* 5789 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5790 */ 5791 so_flush_discon_ind(so); 5792 5793 so_unlock_single(so, SOLOCKED); 5794 mutex_exit(&so->so_lock); 5795 5796 /* 5797 * Needed for STREAMs. 5798 * Decrement the device driver's reference count for streams 5799 * opened via the clone dip. The driver was held in clone_open(). 5800 * The absence of clone_close() forces this asymmetry. 5801 */ 5802 if (so->so_flag & SOCLONE) 5803 ddi_rele_driver(getmajor(dev)); 5804 5805 return (error); 5806 } 5807 5808 static int 5809 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5810 struct cred *cr, int32_t *rvalp) 5811 { 5812 struct vnode *vp = SOTOV(so); 5813 sotpi_info_t *sti = SOTOTPI(so); 5814 int error = 0; 5815 5816 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5817 cmd, arg, pr_state(so->so_state, so->so_mode))); 5818 5819 switch (cmd) { 5820 case SIOCSQPTR: 5821 /* 5822 * SIOCSQPTR is valid only when helper stream is created 5823 * by the protocol. 5824 */ 5825 case _I_INSERT: 5826 case _I_REMOVE: 5827 /* 5828 * Since there's no compelling reason to support these ioctls 5829 * on sockets, and doing so would increase the complexity 5830 * markedly, prevent it. 5831 */ 5832 return (EOPNOTSUPP); 5833 5834 case I_FIND: 5835 case I_LIST: 5836 case I_LOOK: 5837 case I_POP: 5838 case I_PUSH: 5839 /* 5840 * To prevent races and inconsistencies between the actual 5841 * state of the stream and the state according to the sonode, 5842 * we serialize all operations which modify or operate on the 5843 * list of modules on the socket's stream. 5844 */ 5845 mutex_enter(&sti->sti_plumb_lock); 5846 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5847 mutex_exit(&sti->sti_plumb_lock); 5848 return (error); 5849 5850 default: 5851 if (so->so_version != SOV_STREAM) 5852 break; 5853 5854 /* 5855 * The imaginary "sockmod" has been popped; act as a stream. 5856 */ 5857 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5858 } 5859 5860 ASSERT(so->so_version != SOV_STREAM); 5861 5862 /* 5863 * Process socket-specific ioctls. 5864 */ 5865 switch (cmd) { 5866 case FIONBIO: { 5867 int32_t value; 5868 5869 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5870 (mode & (int)FKIOCTL))) 5871 return (EFAULT); 5872 5873 mutex_enter(&so->so_lock); 5874 if (value) { 5875 so->so_state |= SS_NDELAY; 5876 } else { 5877 so->so_state &= ~SS_NDELAY; 5878 } 5879 mutex_exit(&so->so_lock); 5880 return (0); 5881 } 5882 5883 case FIOASYNC: { 5884 int32_t value; 5885 5886 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5887 (mode & (int)FKIOCTL))) 5888 return (EFAULT); 5889 5890 mutex_enter(&so->so_lock); 5891 /* 5892 * SS_ASYNC flag not already set correctly? 5893 * (!value != !(so->so_state & SS_ASYNC)) 5894 * but some engineers find that too hard to read. 5895 */ 5896 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5897 value != 0 && (so->so_state & SS_ASYNC) == 0) 5898 error = so_flip_async(so, vp, mode, cr); 5899 mutex_exit(&so->so_lock); 5900 return (error); 5901 } 5902 5903 case SIOCSPGRP: 5904 case FIOSETOWN: { 5905 pid_t pgrp; 5906 5907 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5908 (mode & (int)FKIOCTL))) 5909 return (EFAULT); 5910 5911 mutex_enter(&so->so_lock); 5912 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5913 /* Any change? */ 5914 if (pgrp != so->so_pgrp) 5915 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5916 mutex_exit(&so->so_lock); 5917 return (error); 5918 } 5919 case SIOCGPGRP: 5920 case FIOGETOWN: 5921 if (so_copyout(&so->so_pgrp, (void *)arg, 5922 sizeof (pid_t), (mode & (int)FKIOCTL))) 5923 return (EFAULT); 5924 return (0); 5925 5926 case SIOCATMARK: { 5927 int retval; 5928 uint_t so_state; 5929 5930 /* 5931 * strwaitmark has a finite timeout after which it 5932 * returns -1 if the mark state is undetermined. 5933 * In order to avoid any race between the mark state 5934 * in sockfs and the mark state in the stream head this 5935 * routine loops until the mark state can be determined 5936 * (or the urgent data indication has been removed by some 5937 * other thread). 5938 */ 5939 do { 5940 mutex_enter(&so->so_lock); 5941 so_state = so->so_state; 5942 mutex_exit(&so->so_lock); 5943 if (so_state & SS_RCVATMARK) { 5944 retval = 1; 5945 } else if (!(so_state & SS_OOBPEND)) { 5946 /* 5947 * No SIGURG has been generated -- there is no 5948 * pending or present urgent data. Thus can't 5949 * possibly be at the mark. 5950 */ 5951 retval = 0; 5952 } else { 5953 /* 5954 * Have the stream head wait until there is 5955 * either some messages on the read queue, or 5956 * STRATMARK or STRNOTATMARK gets set. The 5957 * STRNOTATMARK flag is used so that the 5958 * transport can send up a MSGNOTMARKNEXT 5959 * M_DATA to indicate that it is not 5960 * at the mark and additional data is not about 5961 * to be send upstream. 5962 * 5963 * If the mark state is undetermined this will 5964 * return -1 and we will loop rechecking the 5965 * socket state. 5966 */ 5967 retval = strwaitmark(vp); 5968 } 5969 } while (retval == -1); 5970 5971 if (so_copyout(&retval, (void *)arg, sizeof (int), 5972 (mode & (int)FKIOCTL))) 5973 return (EFAULT); 5974 return (0); 5975 } 5976 5977 case I_FDINSERT: 5978 case I_SENDFD: 5979 case I_RECVFD: 5980 case I_ATMARK: 5981 case _SIOCSOCKFALLBACK: 5982 /* 5983 * These ioctls do not apply to sockets. I_FDINSERT can be 5984 * used to send M_PROTO messages without modifying the socket 5985 * state. I_SENDFD/RECVFD should not be used for socket file 5986 * descriptor passing since they assume a twisted stream. 5987 * SIOCATMARK must be used instead of I_ATMARK. 5988 * 5989 * _SIOCSOCKFALLBACK from an application should never be 5990 * processed. It is only generated by socktpi_open() or 5991 * in response to I_POP or I_PUSH. 5992 */ 5993 #ifdef DEBUG 5994 zcmn_err(getzoneid(), CE_WARN, 5995 "Unsupported STREAMS ioctl 0x%x on socket. " 5996 "Pid = %d\n", cmd, curproc->p_pid); 5997 #endif /* DEBUG */ 5998 return (EOPNOTSUPP); 5999 6000 case _I_GETPEERCRED: 6001 if ((mode & FKIOCTL) == 0) 6002 return (EINVAL); 6003 6004 mutex_enter(&so->so_lock); 6005 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6006 error = ENOTSUP; 6007 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6008 error = ENOTCONN; 6009 } else if (so->so_peercred != NULL) { 6010 k_peercred_t *kp = (k_peercred_t *)arg; 6011 kp->pc_cr = so->so_peercred; 6012 kp->pc_cpid = so->so_cpid; 6013 crhold(so->so_peercred); 6014 } else { 6015 error = EINVAL; 6016 } 6017 mutex_exit(&so->so_lock); 6018 return (error); 6019 6020 default: 6021 /* 6022 * Do the higher-order bits of the ioctl cmd indicate 6023 * that it is an I_* streams ioctl? 6024 */ 6025 if ((cmd & 0xffffff00U) == STR && 6026 so->so_version == SOV_SOCKBSD) { 6027 #ifdef DEBUG 6028 zcmn_err(getzoneid(), CE_WARN, 6029 "Unsupported STREAMS ioctl 0x%x on socket. " 6030 "Pid = %d\n", cmd, curproc->p_pid); 6031 #endif /* DEBUG */ 6032 return (EOPNOTSUPP); 6033 } 6034 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6035 } 6036 } 6037 6038 /* 6039 * Handle plumbing-related ioctls. 6040 */ 6041 static int 6042 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6043 struct cred *cr, int32_t *rvalp) 6044 { 6045 static const char sockmod_name[] = "sockmod"; 6046 struct sonode *so = VTOSO(vp); 6047 char mname[FMNAMESZ + 1]; 6048 int error; 6049 sotpi_info_t *sti = SOTOTPI(so); 6050 6051 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6052 6053 if (so->so_version == SOV_SOCKBSD) 6054 return (EOPNOTSUPP); 6055 6056 if (so->so_version == SOV_STREAM) { 6057 /* 6058 * The imaginary "sockmod" has been popped - act as a stream. 6059 * If this is a push of sockmod then change back to a socket. 6060 */ 6061 if (cmd == I_PUSH) { 6062 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6063 (void *)arg, mname, sizeof (mname), NULL); 6064 6065 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6066 dprintso(so, 0, ("socktpi_ioctl: going to " 6067 "socket version\n")); 6068 so_stream2sock(so); 6069 return (0); 6070 } 6071 } 6072 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6073 } 6074 6075 switch (cmd) { 6076 case I_PUSH: 6077 if (sti->sti_direct) { 6078 mutex_enter(&so->so_lock); 6079 so_lock_single(so); 6080 mutex_exit(&so->so_lock); 6081 6082 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6083 cr, rvalp); 6084 6085 mutex_enter(&so->so_lock); 6086 if (error == 0) 6087 sti->sti_direct = 0; 6088 so_unlock_single(so, SOLOCKED); 6089 mutex_exit(&so->so_lock); 6090 6091 if (error != 0) 6092 return (error); 6093 } 6094 6095 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6096 if (error == 0) 6097 sti->sti_pushcnt++; 6098 return (error); 6099 6100 case I_POP: 6101 if (sti->sti_pushcnt == 0) { 6102 /* Emulate sockmod being popped */ 6103 dprintso(so, 0, 6104 ("socktpi_ioctl: going to STREAMS version\n")); 6105 return (so_sock2stream(so)); 6106 } 6107 6108 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6109 if (error == 0) 6110 sti->sti_pushcnt--; 6111 return (error); 6112 6113 case I_LIST: { 6114 struct str_mlist *kmlistp, *umlistp; 6115 struct str_list kstrlist; 6116 ssize_t kstrlistsize; 6117 int i, nmods; 6118 6119 STRUCT_DECL(str_list, ustrlist); 6120 STRUCT_INIT(ustrlist, mode); 6121 6122 if (arg == NULL) { 6123 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6124 if (error == 0) 6125 (*rvalp)++; /* Add one for sockmod */ 6126 return (error); 6127 } 6128 6129 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6130 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6131 if (error != 0) 6132 return (error); 6133 6134 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6135 if (nmods <= 0) 6136 return (EINVAL); 6137 /* 6138 * Ceiling nmods at nstrpush to prevent someone from 6139 * maliciously consuming lots of kernel memory. 6140 */ 6141 nmods = MIN(nmods, nstrpush); 6142 6143 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6144 kstrlist.sl_nmods = nmods; 6145 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6146 6147 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6148 cr, rvalp); 6149 if (error != 0) 6150 goto done; 6151 6152 /* 6153 * Considering the module list as a 0-based array of sl_nmods 6154 * modules, sockmod should conceptually exist at slot 6155 * sti_pushcnt. Insert sockmod at this location by sliding all 6156 * of the module names after so_pushcnt over by one. We know 6157 * that there will be room to do this since we allocated 6158 * sl_modlist with an additional slot. 6159 */ 6160 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6161 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6162 6163 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6164 kstrlist.sl_nmods++; 6165 6166 /* 6167 * Copy all of the entries out to ustrlist. 6168 */ 6169 kmlistp = kstrlist.sl_modlist; 6170 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6171 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6172 error = so_copyout(kmlistp++, umlistp++, 6173 sizeof (struct str_mlist), mode & FKIOCTL); 6174 if (error != 0) 6175 goto done; 6176 } 6177 6178 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6179 mode & FKIOCTL); 6180 if (error == 0) 6181 *rvalp = 0; 6182 done: 6183 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6184 return (error); 6185 } 6186 case I_LOOK: 6187 if (sti->sti_pushcnt == 0) { 6188 return (so_copyout(sockmod_name, (void *)arg, 6189 sizeof (sockmod_name), mode & FKIOCTL)); 6190 } 6191 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6192 6193 case I_FIND: 6194 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6195 if (error && error != EINVAL) 6196 return (error); 6197 6198 /* if not found and string was sockmod return 1 */ 6199 if (*rvalp == 0 || error == EINVAL) { 6200 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6201 (void *)arg, mname, sizeof (mname), NULL); 6202 if (error == ENAMETOOLONG) 6203 error = EINVAL; 6204 6205 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6206 *rvalp = 1; 6207 } 6208 return (error); 6209 6210 default: 6211 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6212 break; 6213 } 6214 6215 return (0); 6216 } 6217 6218 /* 6219 * Wrapper around the streams poll routine that implements socket poll 6220 * semantics. 6221 * The sockfs never calls pollwakeup itself - the stream head take care 6222 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6223 * stream head there can never be a deadlock due to holding so_lock across 6224 * pollwakeup and acquiring so_lock in this routine. 6225 * 6226 * However, since the performance of VOP_POLL is critical we avoid 6227 * acquiring so_lock here. This is based on two assumptions: 6228 * - The poll implementation holds locks to serialize the VOP_POLL call 6229 * and a pollwakeup for the same pollhead. This ensures that should 6230 * e.g. so_state change during a socktpi_poll call the pollwakeup 6231 * (which strsock_* and strrput conspire to issue) is issued after 6232 * the state change. Thus the pollwakeup will block until VOP_POLL has 6233 * returned and then wake up poll and have it call VOP_POLL again. 6234 * - The reading of so_state without holding so_lock does not result in 6235 * stale data that is older than the latest state change that has dropped 6236 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6237 * memory barrier to force the data into the coherency domain. 6238 */ 6239 static int 6240 sotpi_poll( 6241 struct sonode *so, 6242 short events, 6243 int anyyet, 6244 short *reventsp, 6245 struct pollhead **phpp) 6246 { 6247 short origevents = events; 6248 struct vnode *vp = SOTOV(so); 6249 int error; 6250 int so_state = so->so_state; /* snapshot */ 6251 sotpi_info_t *sti = SOTOTPI(so); 6252 6253 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6254 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6255 6256 ASSERT(vp->v_type == VSOCK); 6257 ASSERT(vp->v_stream != NULL); 6258 6259 if (so->so_version == SOV_STREAM) { 6260 /* The imaginary "sockmod" has been popped - act as a stream */ 6261 return (strpoll(vp->v_stream, events, anyyet, 6262 reventsp, phpp)); 6263 } 6264 6265 if (!(so_state & SS_ISCONNECTED) && 6266 (so->so_mode & SM_CONNREQUIRED)) { 6267 /* Not connected yet - turn off write side events */ 6268 events &= ~(POLLOUT|POLLWRBAND); 6269 } 6270 /* 6271 * Check for errors without calling strpoll if the caller wants them. 6272 * In sockets the errors are represented as input/output events 6273 * and there is no need to ask the stream head for this information. 6274 */ 6275 if (so->so_error != 0 && 6276 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6277 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6278 return (0); 6279 } 6280 /* 6281 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6282 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6283 * will not trigger a POLLIN event with POLLRDDATA set. 6284 * The handling of urgent data (causing POLLRDBAND) is done by 6285 * inspecting SS_OOBPEND below. 6286 */ 6287 events |= POLLRDDATA; 6288 6289 /* 6290 * After shutdown(output) a stream head write error is set. 6291 * However, we should not return output events. 6292 */ 6293 events |= POLLNOERR; 6294 error = strpoll(vp->v_stream, events, anyyet, 6295 reventsp, phpp); 6296 if (error) 6297 return (error); 6298 6299 ASSERT(!(*reventsp & POLLERR)); 6300 6301 /* 6302 * Notes on T_CONN_IND handling for sockets. 6303 * 6304 * If strpoll() returned without events, SR_POLLIN is guaranteed 6305 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6306 * 6307 * Since the so_lock is not held, soqueueconnind() may have run 6308 * and a T_CONN_IND may be waiting. We now check for any queued 6309 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6310 * to ensure poll returns. 6311 * 6312 * However: 6313 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6314 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6315 * the following actions will occur; taken together they ensure the 6316 * syscall will return. 6317 * 6318 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6319 * the accept() was run on a non-blocking socket sowaitconnind() 6320 * may have already returned EWOULDBLOCK, so not be waiting to 6321 * process the message. Additionally socktpi_poll() has probably 6322 * proceeded past the sti_conn_ind_head check below. 6323 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6324 * this thread, however that could occur before poll_common() 6325 * has entered cv_wait. 6326 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6327 * 6328 * Before proceeding to cv_wait() in poll_common() for an event, 6329 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6330 * and if set, re-calls strpoll() to ensure the late arriving 6331 * T_CONN_IND is recognized, and pollsys() returns. 6332 */ 6333 6334 if (sti->sti_conn_ind_head != NULL) 6335 *reventsp |= (POLLIN|POLLRDNORM) & events; 6336 6337 if (so->so_state & SS_OOBPEND) 6338 *reventsp |= POLLRDBAND & events; 6339 6340 if (sti->sti_nl7c_rcv_mp != NULL) { 6341 *reventsp |= (POLLIN|POLLRDNORM) & events; 6342 } 6343 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6344 ((POLLIN|POLLRDNORM) & *reventsp)) { 6345 sti->sti_nl7c_flags |= NL7C_POLLIN; 6346 } 6347 6348 return (0); 6349 } 6350 6351 /*ARGSUSED*/ 6352 static int 6353 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6354 { 6355 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6356 int error = 0; 6357 6358 error = sonode_constructor(buf, cdrarg, kmflags); 6359 if (error != 0) 6360 return (error); 6361 6362 error = i_sotpi_info_constructor(&st->st_info); 6363 if (error != 0) 6364 sonode_destructor(buf, cdrarg); 6365 6366 st->st_sonode.so_priv = &st->st_info; 6367 6368 return (error); 6369 } 6370 6371 /*ARGSUSED1*/ 6372 static void 6373 socktpi_destructor(void *buf, void *cdrarg) 6374 { 6375 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6376 6377 ASSERT(st->st_sonode.so_priv == &st->st_info); 6378 st->st_sonode.so_priv = NULL; 6379 6380 i_sotpi_info_destructor(&st->st_info); 6381 sonode_destructor(buf, cdrarg); 6382 } 6383 6384 static int 6385 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6386 { 6387 int retval; 6388 6389 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6390 struct sonode *so = (struct sonode *)buf; 6391 sotpi_info_t *sti = SOTOTPI(so); 6392 6393 mutex_enter(&socklist.sl_lock); 6394 6395 sti->sti_next_so = socklist.sl_list; 6396 sti->sti_prev_so = NULL; 6397 if (sti->sti_next_so != NULL) 6398 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6399 socklist.sl_list = so; 6400 6401 mutex_exit(&socklist.sl_lock); 6402 6403 } 6404 return (retval); 6405 } 6406 6407 static void 6408 socktpi_unix_destructor(void *buf, void *cdrarg) 6409 { 6410 struct sonode *so = (struct sonode *)buf; 6411 sotpi_info_t *sti = SOTOTPI(so); 6412 6413 mutex_enter(&socklist.sl_lock); 6414 6415 if (sti->sti_next_so != NULL) 6416 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6417 if (sti->sti_prev_so != NULL) 6418 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6419 else 6420 socklist.sl_list = sti->sti_next_so; 6421 6422 mutex_exit(&socklist.sl_lock); 6423 6424 socktpi_destructor(buf, cdrarg); 6425 } 6426 6427 int 6428 socktpi_init(void) 6429 { 6430 /* 6431 * Create sonode caches. We create a special one for AF_UNIX so 6432 * that we can track them for netstat(1m). 6433 */ 6434 socktpi_cache = kmem_cache_create("socktpi_cache", 6435 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6436 socktpi_destructor, NULL, NULL, NULL, 0); 6437 6438 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6439 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6440 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6441 6442 return (0); 6443 } 6444 6445 /* 6446 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6447 * 6448 * Caller must still update state and mode using sotpi_update_state(). 6449 */ 6450 int 6451 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6452 boolean_t *direct, queue_t **qp, struct cred *cr) 6453 { 6454 sotpi_info_t *sti; 6455 struct sockparams *origsp = so->so_sockparams; 6456 sock_lower_handle_t handle = so->so_proto_handle; 6457 struct stdata *stp; 6458 struct vnode *vp; 6459 queue_t *q; 6460 int error = 0; 6461 6462 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6463 SS_FALLBACK_PENDING); 6464 ASSERT(SOCK_IS_NONSTR(so)); 6465 6466 *qp = NULL; 6467 *direct = B_FALSE; 6468 so->so_sockparams = newsp; 6469 /* 6470 * Allocate and initalize fields required by TPI. 6471 */ 6472 (void) sotpi_info_create(so, KM_SLEEP); 6473 sotpi_info_init(so); 6474 6475 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6476 sotpi_info_fini(so); 6477 sotpi_info_destroy(so); 6478 return (error); 6479 } 6480 ASSERT(handle == so->so_proto_handle); 6481 sti = SOTOTPI(so); 6482 if (sti->sti_direct != 0) 6483 *direct = B_TRUE; 6484 6485 /* 6486 * Keep the original sp around so we can properly dispose of the 6487 * sonode when the socket is being closed. 6488 */ 6489 sti->sti_orig_sp = origsp; 6490 6491 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6492 so_alloc_addr(so, so->so_max_addr_len); 6493 6494 /* 6495 * If the application has done a SIOCSPGRP, make sure the 6496 * STREAM head is aware. This needs to take place before 6497 * the protocol start sending up messages. Otherwise we 6498 * might miss to generate SIGPOLL. 6499 * 6500 * It is possible that the application will receive duplicate 6501 * signals if some were already generated for either data or 6502 * connection indications. 6503 */ 6504 if (so->so_pgrp != 0) { 6505 if (so_set_events(so, so->so_vnode, cr) != 0) 6506 so->so_pgrp = 0; 6507 } 6508 6509 /* 6510 * Determine which queue to use. 6511 */ 6512 vp = SOTOV(so); 6513 stp = vp->v_stream; 6514 ASSERT(stp != NULL); 6515 q = stp->sd_wrq->q_next; 6516 6517 /* 6518 * Skip any modules that may have been auto pushed when the device 6519 * was opened 6520 */ 6521 while (q->q_next != NULL) 6522 q = q->q_next; 6523 *qp = _RD(q); 6524 6525 /* This is now a STREAMS sockets */ 6526 so->so_not_str = B_FALSE; 6527 6528 return (error); 6529 } 6530 6531 /* 6532 * Revert a TPI sonode. It is only allowed to revert the sonode during 6533 * the fallback process. 6534 */ 6535 void 6536 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6537 { 6538 vnode_t *vp = SOTOV(so); 6539 6540 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6541 SS_FALLBACK_PENDING); 6542 ASSERT(!SOCK_IS_NONSTR(so)); 6543 ASSERT(vp->v_stream != NULL); 6544 6545 strclean(vp); 6546 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6547 6548 /* 6549 * Restore the original sockparams. The caller is responsible for 6550 * dropping the ref to the new sp. 6551 */ 6552 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6553 6554 sotpi_info_fini(so); 6555 sotpi_info_destroy(so); 6556 6557 /* This is no longer a STREAMS sockets */ 6558 so->so_not_str = B_TRUE; 6559 } 6560 6561 void 6562 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6563 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6564 socklen_t faddrlen, short opts) 6565 { 6566 sotpi_info_t *sti = SOTOTPI(so); 6567 6568 so_proc_tcapability_ack(so, tcap); 6569 6570 so->so_options |= opts; 6571 6572 /* 6573 * Determine whether the foreign and local address are valid 6574 */ 6575 if (laddrlen != 0) { 6576 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6577 sti->sti_laddr_len = laddrlen; 6578 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6579 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6580 } 6581 6582 if (faddrlen != 0) { 6583 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6584 sti->sti_faddr_len = faddrlen; 6585 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6586 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6587 } 6588 6589 } 6590 6591 /* 6592 * Allocate enough space to cache the local and foreign addresses. 6593 */ 6594 void 6595 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6596 { 6597 sotpi_info_t *sti = SOTOTPI(so); 6598 6599 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6600 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6601 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6602 P2ROUNDUP(maxlen, KMEM_ALIGN); 6603 so->so_max_addr_len = sti->sti_laddr_maxlen; 6604 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6605 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6606 + sti->sti_laddr_maxlen); 6607 6608 if (so->so_family == AF_UNIX) { 6609 /* 6610 * Initialize AF_UNIX related fields. 6611 */ 6612 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6613 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6614 } 6615 } 6616 6617 6618 sotpi_info_t * 6619 sotpi_sototpi(struct sonode *so) 6620 { 6621 sotpi_info_t *sti; 6622 6623 ASSERT(so != NULL); 6624 6625 sti = (sotpi_info_t *)so->so_priv; 6626 6627 ASSERT(sti != NULL); 6628 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6629 6630 return (sti); 6631 } 6632 6633 static int 6634 i_sotpi_info_constructor(sotpi_info_t *sti) 6635 { 6636 sti->sti_magic = SOTPI_INFO_MAGIC; 6637 sti->sti_ack_mp = NULL; 6638 sti->sti_discon_ind_mp = NULL; 6639 sti->sti_ux_bound_vp = NULL; 6640 sti->sti_unbind_mp = NULL; 6641 6642 sti->sti_conn_ind_head = NULL; 6643 sti->sti_conn_ind_tail = NULL; 6644 6645 sti->sti_laddr_sa = NULL; 6646 sti->sti_faddr_sa = NULL; 6647 6648 sti->sti_nl7c_flags = 0; 6649 sti->sti_nl7c_uri = NULL; 6650 sti->sti_nl7c_rcv_mp = NULL; 6651 6652 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6653 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6654 6655 return (0); 6656 } 6657 6658 static void 6659 i_sotpi_info_destructor(sotpi_info_t *sti) 6660 { 6661 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6662 ASSERT(sti->sti_ack_mp == NULL); 6663 ASSERT(sti->sti_discon_ind_mp == NULL); 6664 ASSERT(sti->sti_ux_bound_vp == NULL); 6665 ASSERT(sti->sti_unbind_mp == NULL); 6666 6667 ASSERT(sti->sti_conn_ind_head == NULL); 6668 ASSERT(sti->sti_conn_ind_tail == NULL); 6669 6670 ASSERT(sti->sti_laddr_sa == NULL); 6671 ASSERT(sti->sti_faddr_sa == NULL); 6672 6673 ASSERT(sti->sti_nl7c_flags == 0); 6674 ASSERT(sti->sti_nl7c_uri == NULL); 6675 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6676 6677 mutex_destroy(&sti->sti_plumb_lock); 6678 cv_destroy(&sti->sti_ack_cv); 6679 } 6680 6681 /* 6682 * Creates and attaches TPI information to the given sonode 6683 */ 6684 static boolean_t 6685 sotpi_info_create(struct sonode *so, int kmflags) 6686 { 6687 sotpi_info_t *sti; 6688 6689 ASSERT(so->so_priv == NULL); 6690 6691 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6692 return (B_FALSE); 6693 6694 if (i_sotpi_info_constructor(sti) != 0) { 6695 kmem_free(sti, sizeof (*sti)); 6696 return (B_FALSE); 6697 } 6698 6699 so->so_priv = (void *)sti; 6700 return (B_TRUE); 6701 } 6702 6703 /* 6704 * Initializes the TPI information. 6705 */ 6706 static void 6707 sotpi_info_init(struct sonode *so) 6708 { 6709 struct vnode *vp = SOTOV(so); 6710 sotpi_info_t *sti = SOTOTPI(so); 6711 time_t now; 6712 6713 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6714 vp->v_rdev = sti->sti_dev; 6715 6716 sti->sti_orig_sp = NULL; 6717 6718 sti->sti_pushcnt = 0; 6719 6720 now = gethrestime_sec(); 6721 sti->sti_atime = now; 6722 sti->sti_mtime = now; 6723 sti->sti_ctime = now; 6724 6725 sti->sti_eaddr_mp = NULL; 6726 sti->sti_delayed_error = 0; 6727 6728 sti->sti_provinfo = NULL; 6729 6730 sti->sti_oobcnt = 0; 6731 sti->sti_oobsigcnt = 0; 6732 6733 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6734 6735 sti->sti_laddr_sa = 0; 6736 sti->sti_faddr_sa = 0; 6737 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6738 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6739 6740 sti->sti_laddr_valid = 0; 6741 sti->sti_faddr_valid = 0; 6742 sti->sti_faddr_noxlate = 0; 6743 6744 sti->sti_direct = 0; 6745 6746 ASSERT(sti->sti_ack_mp == NULL); 6747 ASSERT(sti->sti_ux_bound_vp == NULL); 6748 ASSERT(sti->sti_unbind_mp == NULL); 6749 6750 ASSERT(sti->sti_conn_ind_head == NULL); 6751 ASSERT(sti->sti_conn_ind_tail == NULL); 6752 6753 /* Initialize the kernel SSL proxy fields */ 6754 sti->sti_kssl_type = KSSL_NO_PROXY; 6755 sti->sti_kssl_ent = NULL; 6756 sti->sti_kssl_ctx = NULL; 6757 } 6758 6759 /* 6760 * Given a sonode, grab the TPI info and free any data. 6761 */ 6762 static void 6763 sotpi_info_fini(struct sonode *so) 6764 { 6765 sotpi_info_t *sti = SOTOTPI(so); 6766 mblk_t *mp; 6767 6768 ASSERT(sti->sti_discon_ind_mp == NULL); 6769 6770 if ((mp = sti->sti_conn_ind_head) != NULL) { 6771 mblk_t *mp1; 6772 6773 while (mp) { 6774 mp1 = mp->b_next; 6775 mp->b_next = NULL; 6776 freemsg(mp); 6777 mp = mp1; 6778 } 6779 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6780 } 6781 6782 /* 6783 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6784 * indirect them. It also uses so_count as a validity test. 6785 */ 6786 mutex_enter(&so->so_lock); 6787 6788 if (sti->sti_laddr_sa) { 6789 ASSERT((caddr_t)sti->sti_faddr_sa == 6790 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6791 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6792 sti->sti_laddr_valid = 0; 6793 sti->sti_faddr_valid = 0; 6794 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6795 sti->sti_laddr_sa = NULL; 6796 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6797 sti->sti_faddr_sa = NULL; 6798 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6799 } 6800 6801 mutex_exit(&so->so_lock); 6802 6803 if ((mp = sti->sti_eaddr_mp) != NULL) { 6804 freemsg(mp); 6805 sti->sti_eaddr_mp = NULL; 6806 sti->sti_delayed_error = 0; 6807 } 6808 6809 if ((mp = sti->sti_ack_mp) != NULL) { 6810 freemsg(mp); 6811 sti->sti_ack_mp = NULL; 6812 } 6813 6814 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6815 sti->sti_nl7c_rcv_mp = NULL; 6816 freemsg(mp); 6817 } 6818 sti->sti_nl7c_rcv_rval = 0; 6819 if (sti->sti_nl7c_uri != NULL) { 6820 nl7c_urifree(so); 6821 /* urifree() cleared nl7c_uri */ 6822 } 6823 if (sti->sti_nl7c_flags) { 6824 sti->sti_nl7c_flags = 0; 6825 } 6826 6827 ASSERT(sti->sti_ux_bound_vp == NULL); 6828 if ((mp = sti->sti_unbind_mp) != NULL) { 6829 freemsg(mp); 6830 sti->sti_unbind_mp = NULL; 6831 } 6832 } 6833 6834 /* 6835 * Destroys the TPI information attached to a sonode. 6836 */ 6837 static void 6838 sotpi_info_destroy(struct sonode *so) 6839 { 6840 sotpi_info_t *sti = SOTOTPI(so); 6841 6842 i_sotpi_info_destructor(sti); 6843 kmem_free(sti, sizeof (*sti)); 6844 6845 so->so_priv = NULL; 6846 } 6847 6848 /* 6849 * Create the global sotpi socket module entry. It will never be freed. 6850 */ 6851 smod_info_t * 6852 sotpi_smod_create(void) 6853 { 6854 smod_info_t *smodp; 6855 6856 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6857 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6858 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6859 /* 6860 * Initialize the smod_refcnt to 1 so it will never be freed. 6861 */ 6862 smodp->smod_refcnt = 1; 6863 smodp->smod_uc_version = SOCK_UC_VERSION; 6864 smodp->smod_dc_version = SOCK_DC_VERSION; 6865 smodp->smod_sock_create_func = &sotpi_create; 6866 smodp->smod_sock_destroy_func = &sotpi_destroy; 6867 return (smodp); 6868 } 6869