1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/kmem_impl.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/open.h> 44 #include <sys/user.h> 45 #include <sys/termios.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/suntpi.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <sys/un.h> 63 #include <sys/strsun.h> 64 65 #include <sys/tiuser.h> 66 #define _SUN_TPI_VERSION 2 67 #include <sys/tihdr.h> 68 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 69 70 #include <c2/audit.h> 71 72 #include <inet/common.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <inet/tcp.h> 76 #include <inet/udp_impl.h> 77 78 #include <sys/zone.h> 79 80 #include <fs/sockfs/nl7c.h> 81 #include <fs/sockfs/nl7curi.h> 82 83 #include <inet/kssl/ksslapi.h> 84 85 #include <fs/sockfs/sockcommon.h> 86 #include <fs/sockfs/socktpi.h> 87 #include <fs/sockfs/socktpi_impl.h> 88 89 /* 90 * Possible failures when memory can't be allocated. The documented behavior: 91 * 92 * 5.5: 4.X: XNET: 93 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 94 * EINTR 95 * (4.X does not document EINTR but returns it) 96 * bind: ENOSR - ENOBUFS/ENOSR 97 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 98 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 99 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 100 * (4.X getpeername and getsockname do not fail in practice) 101 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 102 * listen: - - ENOBUFS 103 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 104 * EINTR 105 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 106 * EINTR 107 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 108 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 109 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 110 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 111 * 112 * Resolution. When allocation fails: 113 * recv: return EINTR 114 * send: return EINTR 115 * connect, accept: EINTR 116 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 117 * socket, socketpair: ENOBUFS 118 * getpeername, getsockname: sleep 119 * getsockopt, setsockopt: sleep 120 */ 121 122 #ifdef SOCK_TEST 123 /* 124 * Variables that make sockfs do something other than the standard TPI 125 * for the AF_INET transports. 126 * 127 * solisten_tpi_tcp: 128 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 129 * the transport is already bound. This is needed to avoid loosing the 130 * port number should listen() do a T_UNBIND_REQ followed by a 131 * O_T_BIND_REQ. 132 * 133 * soconnect_tpi_udp: 134 * UDP and ICMP can handle a T_CONN_REQ. 135 * This is needed to make the sequence of connect(), getsockname() 136 * return the local IP address used to send packets to the connected to 137 * destination. 138 * 139 * soconnect_tpi_tcp: 140 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 141 * Set this to non-zero to send TPI conformant messages to TCP in this 142 * respect. This is a performance optimization. 143 * 144 * soaccept_tpi_tcp: 145 * TCP can handle a T_CONN_REQ without the acceptor being bound. 146 * This is a performance optimization that has been picked up in XTI. 147 * 148 * soaccept_tpi_multioptions: 149 * When inheriting SOL_SOCKET options from the listener to the accepting 150 * socket send them as a single message for AF_INET{,6}. 151 */ 152 int solisten_tpi_tcp = 0; 153 int soconnect_tpi_udp = 0; 154 int soconnect_tpi_tcp = 0; 155 int soaccept_tpi_tcp = 0; 156 int soaccept_tpi_multioptions = 1; 157 #else /* SOCK_TEST */ 158 #define soconnect_tpi_tcp 0 159 #define soconnect_tpi_udp 0 160 #define solisten_tpi_tcp 0 161 #define soaccept_tpi_tcp 0 162 #define soaccept_tpi_multioptions 1 163 #endif /* SOCK_TEST */ 164 165 #ifdef SOCK_TEST 166 extern int do_useracc; 167 extern clock_t sock_test_timelimit; 168 #endif /* SOCK_TEST */ 169 170 /* 171 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 172 * applications working. Turn on this flag to disable these checks. 173 */ 174 int xnet_skip_checks = 0; 175 int xnet_check_print = 0; 176 int xnet_truncate_print = 0; 177 178 static void sotpi_destroy(struct sonode *); 179 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 180 int, int *, cred_t *cr); 181 182 static boolean_t sotpi_info_create(struct sonode *, int); 183 static void sotpi_info_init(struct sonode *); 184 static void sotpi_info_fini(struct sonode *); 185 static void sotpi_info_destroy(struct sonode *); 186 187 /* 188 * Do direct function call to the transport layer below; this would 189 * also allow the transport to utilize read-side synchronous stream 190 * interface if necessary. This is a /etc/system tunable that must 191 * not be modified on a running system. By default this is enabled 192 * for performance reasons and may be disabled for debugging purposes. 193 */ 194 boolean_t socktpi_direct = B_TRUE; 195 196 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 197 198 extern void sigintr(k_sigset_t *, int); 199 extern void sigunintr(k_sigset_t *); 200 201 /* Sockets acting as an in-kernel SSL proxy */ 202 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 203 strsigset_t *, strsigset_t *, strpollset_t *); 204 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 205 strsigset_t *, strsigset_t *, strpollset_t *); 206 207 static int sotpi_unbind(struct sonode *, int); 208 209 /* TPI sockfs sonode operations */ 210 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 211 int); 212 static int sotpi_accept(struct sonode *, int, struct cred *, 213 struct sonode **); 214 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 215 int, struct cred *); 216 static int sotpi_listen(struct sonode *, int, struct cred *); 217 static int sotpi_connect(struct sonode *, const struct sockaddr *, 218 socklen_t, int, int, struct cred *); 219 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 220 struct uio *, struct cred *); 221 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 222 struct uio *, struct cred *); 223 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 224 struct cred *, mblk_t **); 225 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 226 struct uio *, void *, t_uscalar_t, int); 227 static int sodgram_direct(struct sonode *, struct sockaddr *, 228 socklen_t, struct uio *, int); 229 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 230 socklen_t *, boolean_t, struct cred *); 231 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 232 socklen_t *, struct cred *); 233 static int sotpi_shutdown(struct sonode *, int, struct cred *); 234 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 235 socklen_t *, int, struct cred *); 236 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 237 socklen_t, struct cred *); 238 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 239 int32_t *); 240 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 241 struct cred *, int32_t *); 242 static int sotpi_poll(struct sonode *, short, int, short *, 243 struct pollhead **); 244 static int sotpi_close(struct sonode *, int, struct cred *); 245 246 static int i_sotpi_info_constructor(sotpi_info_t *); 247 static void i_sotpi_info_destructor(sotpi_info_t *); 248 249 sonodeops_t sotpi_sonodeops = { 250 sotpi_init, /* sop_init */ 251 sotpi_accept, /* sop_accept */ 252 sotpi_bind, /* sop_bind */ 253 sotpi_listen, /* sop_listen */ 254 sotpi_connect, /* sop_connect */ 255 sotpi_recvmsg, /* sop_recvmsg */ 256 sotpi_sendmsg, /* sop_sendmsg */ 257 sotpi_sendmblk, /* sop_sendmblk */ 258 sotpi_getpeername, /* sop_getpeername */ 259 sotpi_getsockname, /* sop_getsockname */ 260 sotpi_shutdown, /* sop_shutdown */ 261 sotpi_getsockopt, /* sop_getsockopt */ 262 sotpi_setsockopt, /* sop_setsockopt */ 263 sotpi_ioctl, /* sop_ioctl */ 264 sotpi_poll, /* sop_poll */ 265 sotpi_close, /* sop_close */ 266 }; 267 268 /* 269 * Return a TPI socket vnode. 270 * 271 * Note that sockets assume that the driver will clone (either itself 272 * or by using the clone driver) i.e. a socket() call will always 273 * result in a new vnode being created. 274 */ 275 276 /* 277 * Common create code for socket and accept. If tso is set the values 278 * from that node is used instead of issuing a T_INFO_REQ. 279 */ 280 281 /* ARGSUSED */ 282 static struct sonode * 283 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 284 int version, int sflags, int *errorp, cred_t *cr) 285 { 286 struct sonode *so; 287 kmem_cache_t *cp; 288 int sfamily = family; 289 290 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 291 292 if (family == AF_NCA) { 293 /* 294 * The request is for an NCA socket so for NL7C use the 295 * INET domain instead and mark NL7C_AF_NCA below. 296 */ 297 family = AF_INET; 298 /* 299 * NL7C is not supported in the non-global zone, 300 * we enforce this restriction here. 301 */ 302 if (getzoneid() != GLOBAL_ZONEID) { 303 *errorp = ENOTSUP; 304 return (NULL); 305 } 306 } 307 308 /* 309 * to be compatible with old tpi socket implementation ignore 310 * sleep flag (sflags) passed in 311 */ 312 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 313 so = kmem_cache_alloc(cp, KM_SLEEP); 314 if (so == NULL) { 315 *errorp = ENOMEM; 316 return (NULL); 317 } 318 319 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 320 sotpi_info_init(so); 321 322 if (sfamily == AF_NCA) { 323 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 324 } 325 326 if (version == SOV_DEFAULT) 327 version = so_default_version; 328 329 so->so_version = (short)version; 330 *errorp = 0; 331 332 return (so); 333 } 334 335 static void 336 sotpi_destroy(struct sonode *so) 337 { 338 kmem_cache_t *cp; 339 struct sockparams *origsp; 340 341 /* 342 * If there is a new dealloc function (ie. smod_destroy_func), 343 * then it should check the correctness of the ops. 344 */ 345 346 ASSERT(so->so_ops == &sotpi_sonodeops); 347 348 origsp = SOTOTPI(so)->sti_orig_sp; 349 350 sotpi_info_fini(so); 351 352 if (so->so_state & SS_FALLBACK_COMP) { 353 /* 354 * A fallback happend, which means that a sotpi_info_t struct 355 * was allocated (as opposed to being allocated from the TPI 356 * sonode cache. Therefore we explicitly free the struct 357 * here. 358 */ 359 sotpi_info_destroy(so); 360 ASSERT(origsp != NULL); 361 362 origsp->sp_smod_info->smod_sock_destroy_func(so); 363 SOCKPARAMS_DEC_REF(origsp); 364 } else { 365 sonode_fini(so); 366 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 367 socktpi_cache; 368 kmem_cache_free(cp, so); 369 } 370 } 371 372 /* ARGSUSED1 */ 373 int 374 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 375 { 376 major_t maj; 377 dev_t newdev; 378 struct vnode *vp; 379 int error = 0; 380 struct stdata *stp; 381 382 sotpi_info_t *sti = SOTOTPI(so); 383 384 dprint(1, ("sotpi_init()\n")); 385 386 /* 387 * over write the sleep flag passed in but that is ok 388 * as tpi socket does not honor sleep flag. 389 */ 390 flags |= FREAD|FWRITE; 391 392 /* 393 * Record in so_flag that it is a clone. 394 */ 395 if (getmajor(sti->sti_dev) == clone_major) 396 so->so_flag |= SOCLONE; 397 398 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 399 (so->so_family == AF_INET || so->so_family == AF_INET6) && 400 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 401 so->so_protocol == IPPROTO_IP)) { 402 /* Tell tcp or udp that it's talking to sockets */ 403 flags |= SO_SOCKSTR; 404 405 /* 406 * Here we indicate to socktpi_open() our attempt to 407 * make direct calls between sockfs and transport. 408 * The final decision is left to socktpi_open(). 409 */ 410 sti->sti_direct = 1; 411 412 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 413 if (so->so_type == SOCK_STREAM && tso != NULL) { 414 if (SOTOTPI(tso)->sti_direct) { 415 /* 416 * Inherit sti_direct from listener and pass 417 * SO_ACCEPTOR open flag to tcp, indicating 418 * that this is an accept fast-path instance. 419 */ 420 flags |= SO_ACCEPTOR; 421 } else { 422 /* 423 * sti_direct is not set on listener, meaning 424 * that the listener has been converted from 425 * a socket to a stream. Ensure that the 426 * acceptor inherits these settings. 427 */ 428 sti->sti_direct = 0; 429 flags &= ~SO_SOCKSTR; 430 } 431 } 432 } 433 434 /* 435 * Tell local transport that it is talking to sockets. 436 */ 437 if (so->so_family == AF_UNIX) { 438 flags |= SO_SOCKSTR; 439 } 440 441 vp = SOTOV(so); 442 newdev = vp->v_rdev; 443 maj = getmajor(newdev); 444 ASSERT(STREAMSTAB(maj)); 445 446 error = stropen(vp, &newdev, flags, cr); 447 448 stp = vp->v_stream; 449 if (error == 0) { 450 if (so->so_flag & SOCLONE) 451 ASSERT(newdev != vp->v_rdev); 452 mutex_enter(&so->so_lock); 453 sti->sti_dev = newdev; 454 vp->v_rdev = newdev; 455 mutex_exit(&so->so_lock); 456 457 if (stp->sd_flag & STRISTTY) { 458 /* 459 * this is a post SVR4 tty driver - a socket can not 460 * be a controlling terminal. Fail the open. 461 */ 462 (void) sotpi_close(so, flags, cr); 463 return (ENOTTY); /* XXX */ 464 } 465 466 ASSERT(stp->sd_wrq != NULL); 467 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 468 469 /* 470 * If caller is interested in doing direct function call 471 * interface to/from transport module, probe the module 472 * directly beneath the streamhead to see if it qualifies. 473 * 474 * We turn off the direct interface when qualifications fail. 475 * In the acceptor case, we simply turn off the sti_direct 476 * flag on the socket. We do the fallback after the accept 477 * has completed, before the new socket is returned to the 478 * application. 479 */ 480 if (sti->sti_direct) { 481 queue_t *tq = stp->sd_wrq->q_next; 482 483 /* 484 * sti_direct is currently supported and tested 485 * only for tcp/udp; this is the main reason to 486 * have the following assertions. 487 */ 488 ASSERT(so->so_family == AF_INET || 489 so->so_family == AF_INET6); 490 ASSERT(so->so_protocol == IPPROTO_UDP || 491 so->so_protocol == IPPROTO_TCP || 492 so->so_protocol == IPPROTO_IP); 493 ASSERT(so->so_type == SOCK_DGRAM || 494 so->so_type == SOCK_STREAM); 495 496 /* 497 * Abort direct call interface if the module directly 498 * underneath the stream head is not defined with the 499 * _D_DIRECT flag. This could happen in the tcp or 500 * udp case, when some other module is autopushed 501 * above it, or for some reasons the expected module 502 * isn't purely D_MP (which is the main requirement). 503 */ 504 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 505 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 506 int rval; 507 508 /* Continue on without direct calls */ 509 sti->sti_direct = 0; 510 511 /* 512 * Cannot issue ioctl on fallback socket since 513 * there is no conn associated with the queue. 514 * The fallback downcall will notify the proto 515 * of the change. 516 */ 517 if (!(flags & SO_ACCEPTOR) && 518 !(flags & SO_FALLBACK)) { 519 if ((error = strioctl(vp, 520 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 521 cr, &rval)) != 0) { 522 (void) sotpi_close(so, flags, 523 cr); 524 return (error); 525 } 526 } 527 } 528 } 529 530 if (flags & SO_FALLBACK) { 531 /* 532 * The stream created does not have a conn. 533 * do stream set up after conn has been assigned 534 */ 535 return (error); 536 } 537 if (error = so_strinit(so, tso)) { 538 (void) sotpi_close(so, flags, cr); 539 return (error); 540 } 541 542 /* Wildcard */ 543 if (so->so_protocol != so->so_sockparams->sp_protocol) { 544 int protocol = so->so_protocol; 545 /* 546 * Issue SO_PROTOTYPE setsockopt. 547 */ 548 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 549 &protocol, (t_uscalar_t)sizeof (protocol), cr); 550 if (error != 0) { 551 (void) sotpi_close(so, flags, cr); 552 /* 553 * Setsockopt often fails with ENOPROTOOPT but 554 * socket() should fail with 555 * EPROTONOSUPPORT/EPROTOTYPE. 556 */ 557 return (EPROTONOSUPPORT); 558 } 559 } 560 561 } else { 562 /* 563 * While the same socket can not be reopened (unlike specfs) 564 * the stream head sets STREOPENFAIL when the autopush fails. 565 */ 566 if ((stp != NULL) && 567 (stp->sd_flag & STREOPENFAIL)) { 568 /* 569 * Open failed part way through. 570 */ 571 mutex_enter(&stp->sd_lock); 572 stp->sd_flag &= ~STREOPENFAIL; 573 mutex_exit(&stp->sd_lock); 574 (void) sotpi_close(so, flags, cr); 575 return (error); 576 /*NOTREACHED*/ 577 } 578 ASSERT(stp == NULL); 579 } 580 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 581 "sockfs open:maj %d vp %p so %p error %d", 582 maj, vp, so, error); 583 return (error); 584 } 585 586 /* 587 * Bind the socket to an unspecified address in sockfs only. 588 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 589 * required in all cases. 590 */ 591 static void 592 so_automatic_bind(struct sonode *so) 593 { 594 sotpi_info_t *sti = SOTOTPI(so); 595 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 596 597 ASSERT(MUTEX_HELD(&so->so_lock)); 598 ASSERT(!(so->so_state & SS_ISBOUND)); 599 ASSERT(sti->sti_unbind_mp); 600 601 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 602 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 603 sti->sti_laddr_sa->sa_family = so->so_family; 604 so->so_state |= SS_ISBOUND; 605 } 606 607 608 /* 609 * bind the socket. 610 * 611 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 612 * are passed in we allow rebinding. Note that for backwards compatibility 613 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 614 * Thus the rebinding code is currently not executed. 615 * 616 * The constraints for rebinding are: 617 * - it is a SOCK_DGRAM, or 618 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 619 * and no listen() has been done. 620 * This rebinding code was added based on some language in the XNET book 621 * about not returning EINVAL it the protocol allows rebinding. However, 622 * this language is not present in the Posix socket draft. Thus maybe the 623 * rebinding logic should be deleted from the source. 624 * 625 * A null "name" can be used to unbind the socket if: 626 * - it is a SOCK_DGRAM, or 627 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 628 * and no listen() has been done. 629 */ 630 /* ARGSUSED */ 631 static int 632 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 633 socklen_t namelen, int backlog, int flags, struct cred *cr) 634 { 635 struct T_bind_req bind_req; 636 struct T_bind_ack *bind_ack; 637 int error = 0; 638 mblk_t *mp; 639 void *addr; 640 t_uscalar_t addrlen; 641 int unbind_on_err = 1; 642 boolean_t clear_acceptconn_on_err = B_FALSE; 643 boolean_t restore_backlog_on_err = B_FALSE; 644 int save_so_backlog; 645 t_scalar_t PRIM_type = O_T_BIND_REQ; 646 boolean_t tcp_udp_xport; 647 void *nl7c = NULL; 648 sotpi_info_t *sti = SOTOTPI(so); 649 650 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 651 (void *)so, (void *)name, namelen, backlog, flags, 652 pr_state(so->so_state, so->so_mode))); 653 654 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 655 656 if (!(flags & _SOBIND_LOCK_HELD)) { 657 mutex_enter(&so->so_lock); 658 so_lock_single(so); /* Set SOLOCKED */ 659 } else { 660 ASSERT(MUTEX_HELD(&so->so_lock)); 661 ASSERT(so->so_flag & SOLOCKED); 662 } 663 664 /* 665 * Make sure that there is a preallocated unbind_req message 666 * before binding. This message allocated when the socket is 667 * created but it might be have been consumed. 668 */ 669 if (sti->sti_unbind_mp == NULL) { 670 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 671 /* NOTE: holding so_lock while sleeping */ 672 sti->sti_unbind_mp = 673 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 674 cr); 675 } 676 677 if (flags & _SOBIND_REBIND) { 678 /* 679 * Called from solisten after doing an sotpi_unbind() or 680 * potentially without the unbind (latter for AF_INET{,6}). 681 */ 682 ASSERT(name == NULL && namelen == 0); 683 684 if (so->so_family == AF_UNIX) { 685 ASSERT(sti->sti_ux_bound_vp); 686 addr = &sti->sti_ux_laddr; 687 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 688 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 689 "addr 0x%p, vp %p\n", 690 addrlen, 691 (void *)((struct so_ux_addr *)addr)->soua_vp, 692 (void *)sti->sti_ux_bound_vp)); 693 } else { 694 addr = sti->sti_laddr_sa; 695 addrlen = (t_uscalar_t)sti->sti_laddr_len; 696 } 697 } else if (flags & _SOBIND_UNSPEC) { 698 ASSERT(name == NULL && namelen == 0); 699 700 /* 701 * The caller checked SS_ISBOUND but not necessarily 702 * under so_lock 703 */ 704 if (so->so_state & SS_ISBOUND) { 705 /* No error */ 706 goto done; 707 } 708 709 /* Set an initial local address */ 710 switch (so->so_family) { 711 case AF_UNIX: 712 /* 713 * Use an address with same size as struct sockaddr 714 * just like BSD. 715 */ 716 sti->sti_laddr_len = 717 (socklen_t)sizeof (struct sockaddr); 718 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 719 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 720 sti->sti_laddr_sa->sa_family = so->so_family; 721 722 /* 723 * Pass down an address with the implicit bind 724 * magic number and the rest all zeros. 725 * The transport will return a unique address. 726 */ 727 sti->sti_ux_laddr.soua_vp = NULL; 728 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 729 addr = &sti->sti_ux_laddr; 730 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 731 break; 732 733 case AF_INET: 734 case AF_INET6: 735 /* 736 * An unspecified bind in TPI has a NULL address. 737 * Set the address in sockfs to have the sa_family. 738 */ 739 sti->sti_laddr_len = (so->so_family == AF_INET) ? 740 (socklen_t)sizeof (sin_t) : 741 (socklen_t)sizeof (sin6_t); 742 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 743 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 744 sti->sti_laddr_sa->sa_family = so->so_family; 745 addr = NULL; 746 addrlen = 0; 747 break; 748 749 default: 750 /* 751 * An unspecified bind in TPI has a NULL address. 752 * Set the address in sockfs to be zero length. 753 * 754 * Can not assume there is a sa_family for all 755 * protocol families. For example, AF_X25 does not 756 * have a family field. 757 */ 758 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 759 sti->sti_laddr_len = 0; /* XXX correct? */ 760 addr = NULL; 761 addrlen = 0; 762 break; 763 } 764 765 } else { 766 if (so->so_state & SS_ISBOUND) { 767 /* 768 * If it is ok to rebind the socket, first unbind 769 * with the transport. A rebind to the NULL address 770 * is interpreted as an unbind. 771 * Note that a bind to NULL in BSD does unbind the 772 * socket but it fails with EINVAL. 773 * Note that regular sockets set SOV_SOCKBSD i.e. 774 * _SOBIND_SOCKBSD gets set here hence no type of 775 * socket does currently allow rebinding. 776 * 777 * If the name is NULL just do an unbind. 778 */ 779 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 780 name != NULL) { 781 error = EINVAL; 782 unbind_on_err = 0; 783 eprintsoline(so, error); 784 goto done; 785 } 786 if ((so->so_mode & SM_CONNREQUIRED) && 787 (so->so_state & SS_CANTREBIND)) { 788 error = EINVAL; 789 unbind_on_err = 0; 790 eprintsoline(so, error); 791 goto done; 792 } 793 error = sotpi_unbind(so, 0); 794 if (error) { 795 eprintsoline(so, error); 796 goto done; 797 } 798 ASSERT(!(so->so_state & SS_ISBOUND)); 799 if (name == NULL) { 800 so->so_state &= 801 ~(SS_ISCONNECTED|SS_ISCONNECTING); 802 goto done; 803 } 804 } 805 806 /* X/Open requires this check */ 807 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 808 if (xnet_check_print) { 809 printf("sockfs: X/Open bind state check " 810 "caused EINVAL\n"); 811 } 812 error = EINVAL; 813 goto done; 814 } 815 816 switch (so->so_family) { 817 case AF_UNIX: 818 /* 819 * All AF_UNIX addresses are nul terminated 820 * when copied (copyin_name) in so the minimum 821 * length is 3 bytes. 822 */ 823 if (name == NULL || 824 (ssize_t)namelen <= sizeof (short) + 1) { 825 error = EISDIR; 826 eprintsoline(so, error); 827 goto done; 828 } 829 /* 830 * Verify so_family matches the bound family. 831 * BSD does not check this for AF_UNIX resulting 832 * in funny mknods. 833 */ 834 if (name->sa_family != so->so_family) { 835 error = EAFNOSUPPORT; 836 goto done; 837 } 838 break; 839 case AF_INET: 840 if (name == NULL) { 841 error = EINVAL; 842 eprintsoline(so, error); 843 goto done; 844 } 845 if ((size_t)namelen != sizeof (sin_t)) { 846 error = name->sa_family != so->so_family ? 847 EAFNOSUPPORT : EINVAL; 848 eprintsoline(so, error); 849 goto done; 850 } 851 if ((flags & _SOBIND_XPG4_2) && 852 (name->sa_family != so->so_family)) { 853 /* 854 * This check has to be made for X/Open 855 * sockets however application failures have 856 * been observed when it is applied to 857 * all sockets. 858 */ 859 error = EAFNOSUPPORT; 860 eprintsoline(so, error); 861 goto done; 862 } 863 /* 864 * Force a zero sa_family to match so_family. 865 * 866 * Some programs like inetd(1M) don't set the 867 * family field. Other programs leave 868 * sin_family set to garbage - SunOS 4.X does 869 * not check the family field on a bind. 870 * We use the family field that 871 * was passed in to the socket() call. 872 */ 873 name->sa_family = so->so_family; 874 break; 875 876 case AF_INET6: { 877 #ifdef DEBUG 878 sin6_t *sin6 = (sin6_t *)name; 879 #endif /* DEBUG */ 880 881 if (name == NULL) { 882 error = EINVAL; 883 eprintsoline(so, error); 884 goto done; 885 } 886 if ((size_t)namelen != sizeof (sin6_t)) { 887 error = name->sa_family != so->so_family ? 888 EAFNOSUPPORT : EINVAL; 889 eprintsoline(so, error); 890 goto done; 891 } 892 if (name->sa_family != so->so_family) { 893 /* 894 * With IPv6 we require the family to match 895 * unlike in IPv4. 896 */ 897 error = EAFNOSUPPORT; 898 eprintsoline(so, error); 899 goto done; 900 } 901 #ifdef DEBUG 902 /* 903 * Verify that apps don't forget to clear 904 * sin6_scope_id etc 905 */ 906 if (sin6->sin6_scope_id != 0 && 907 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 908 zcmn_err(getzoneid(), CE_WARN, 909 "bind with uninitialized sin6_scope_id " 910 "(%d) on socket. Pid = %d\n", 911 (int)sin6->sin6_scope_id, 912 (int)curproc->p_pid); 913 } 914 if (sin6->__sin6_src_id != 0) { 915 zcmn_err(getzoneid(), CE_WARN, 916 "bind with uninitialized __sin6_src_id " 917 "(%d) on socket. Pid = %d\n", 918 (int)sin6->__sin6_src_id, 919 (int)curproc->p_pid); 920 } 921 #endif /* DEBUG */ 922 break; 923 } 924 default: 925 /* 926 * Don't do any length or sa_family check to allow 927 * non-sockaddr style addresses. 928 */ 929 if (name == NULL) { 930 error = EINVAL; 931 eprintsoline(so, error); 932 goto done; 933 } 934 break; 935 } 936 937 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 938 error = ENAMETOOLONG; 939 eprintsoline(so, error); 940 goto done; 941 } 942 /* 943 * Save local address. 944 */ 945 sti->sti_laddr_len = (socklen_t)namelen; 946 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 947 bcopy(name, sti->sti_laddr_sa, namelen); 948 949 addr = sti->sti_laddr_sa; 950 addrlen = (t_uscalar_t)sti->sti_laddr_len; 951 switch (so->so_family) { 952 case AF_INET6: 953 case AF_INET: 954 break; 955 case AF_UNIX: { 956 struct sockaddr_un *soun = 957 (struct sockaddr_un *)sti->sti_laddr_sa; 958 struct vnode *vp, *rvp; 959 struct vattr vattr; 960 961 ASSERT(sti->sti_ux_bound_vp == NULL); 962 /* 963 * Create vnode for the specified path name. 964 * Keep vnode held with a reference in sti_ux_bound_vp. 965 * Use the vnode pointer as the address used in the 966 * bind with the transport. 967 * 968 * Use the same mode as in BSD. In particular this does 969 * not observe the umask. 970 */ 971 /* MAXPATHLEN + soun_family + nul termination */ 972 if (sti->sti_laddr_len > 973 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 974 error = ENAMETOOLONG; 975 eprintsoline(so, error); 976 goto done; 977 } 978 vattr.va_type = VSOCK; 979 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 980 vattr.va_mask = AT_TYPE|AT_MODE; 981 /* NOTE: holding so_lock */ 982 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 983 EXCL, 0, &vp, CRMKNOD, 0, 0); 984 if (error) { 985 if (error == EEXIST) 986 error = EADDRINUSE; 987 eprintsoline(so, error); 988 goto done; 989 } 990 /* 991 * Establish pointer from the underlying filesystem 992 * vnode to the socket node. 993 * sti_ux_bound_vp and v_stream->sd_vnode form the 994 * cross-linkage between the underlying filesystem 995 * node and the socket node. 996 */ 997 998 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 999 VN_HOLD(rvp); 1000 VN_RELE(vp); 1001 vp = rvp; 1002 } 1003 1004 ASSERT(SOTOV(so)->v_stream); 1005 mutex_enter(&vp->v_lock); 1006 vp->v_stream = SOTOV(so)->v_stream; 1007 sti->sti_ux_bound_vp = vp; 1008 mutex_exit(&vp->v_lock); 1009 1010 /* 1011 * Use the vnode pointer value as a unique address 1012 * (together with the magic number to avoid conflicts 1013 * with implicit binds) in the transport provider. 1014 */ 1015 sti->sti_ux_laddr.soua_vp = 1016 (void *)sti->sti_ux_bound_vp; 1017 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1018 addr = &sti->sti_ux_laddr; 1019 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1020 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1021 addrlen, 1022 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1023 break; 1024 } 1025 } /* end switch (so->so_family) */ 1026 } 1027 1028 /* 1029 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1030 * the transport can start passing up T_CONN_IND messages 1031 * as soon as it receives the bind req and strsock_proto() 1032 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1033 */ 1034 if (flags & _SOBIND_LISTEN) { 1035 if ((so->so_state & SS_ACCEPTCONN) == 0) 1036 clear_acceptconn_on_err = B_TRUE; 1037 save_so_backlog = so->so_backlog; 1038 restore_backlog_on_err = B_TRUE; 1039 so->so_state |= SS_ACCEPTCONN; 1040 so->so_backlog = backlog; 1041 } 1042 1043 /* 1044 * If NL7C addr(s) have been configured check for addr/port match, 1045 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1046 * 1047 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1048 * family sockets only. If match mark as such. 1049 */ 1050 if (nl7c_enabled && ((addr != NULL && 1051 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1052 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1053 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1054 /* 1055 * NL7C is not supported in non-global zones, 1056 * we enforce this restriction here. 1057 */ 1058 if (so->so_zoneid == GLOBAL_ZONEID) { 1059 /* An NL7C socket, mark it */ 1060 sti->sti_nl7c_flags |= NL7C_ENABLED; 1061 if (nl7c == NULL) { 1062 /* 1063 * Was an AF_NCA bind() so add it to the 1064 * addr list for reporting purposes. 1065 */ 1066 nl7c = nl7c_add_addr(addr, addrlen); 1067 } 1068 } else 1069 nl7c = NULL; 1070 } 1071 1072 /* 1073 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1074 * for other transports we will send in a O_T_BIND_REQ. 1075 */ 1076 if (tcp_udp_xport && 1077 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1078 PRIM_type = T_BIND_REQ; 1079 1080 bind_req.PRIM_type = PRIM_type; 1081 bind_req.ADDR_length = addrlen; 1082 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1083 bind_req.CONIND_number = backlog; 1084 /* NOTE: holding so_lock while sleeping */ 1085 mp = soallocproto2(&bind_req, sizeof (bind_req), 1086 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1087 sti->sti_laddr_valid = 0; 1088 1089 /* Done using sti_laddr_sa - can drop the lock */ 1090 mutex_exit(&so->so_lock); 1091 1092 /* 1093 * Intercept the bind_req message here to check if this <address/port> 1094 * was configured as an SSL proxy server, or if another endpoint was 1095 * already configured to act as a proxy for us. 1096 * 1097 * Note, only if NL7C not enabled for this socket. 1098 */ 1099 if (nl7c == NULL && 1100 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1101 so->so_type == SOCK_STREAM) { 1102 1103 if (sti->sti_kssl_ent != NULL) { 1104 kssl_release_ent(sti->sti_kssl_ent, so, 1105 sti->sti_kssl_type); 1106 sti->sti_kssl_ent = NULL; 1107 } 1108 1109 sti->sti_kssl_type = kssl_check_proxy(mp, so, 1110 &sti->sti_kssl_ent); 1111 switch (sti->sti_kssl_type) { 1112 case KSSL_NO_PROXY: 1113 break; 1114 1115 case KSSL_HAS_PROXY: 1116 mutex_enter(&so->so_lock); 1117 goto skip_transport; 1118 1119 case KSSL_IS_PROXY: 1120 break; 1121 } 1122 } 1123 1124 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1125 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1126 if (error) { 1127 eprintsoline(so, error); 1128 mutex_enter(&so->so_lock); 1129 goto done; 1130 } 1131 1132 mutex_enter(&so->so_lock); 1133 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1134 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1135 if (error) { 1136 eprintsoline(so, error); 1137 goto done; 1138 } 1139 skip_transport: 1140 ASSERT(mp); 1141 /* 1142 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1143 * strsock_proto while the lock was dropped above, the bind 1144 * is allowed to complete. 1145 */ 1146 1147 /* Mark as bound. This will be undone if we detect errors below. */ 1148 if (flags & _SOBIND_NOXLATE) { 1149 ASSERT(so->so_family == AF_UNIX); 1150 sti->sti_faddr_noxlate = 1; 1151 } 1152 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1153 so->so_state |= SS_ISBOUND; 1154 ASSERT(sti->sti_unbind_mp); 1155 1156 /* note that we've already set SS_ACCEPTCONN above */ 1157 1158 /* 1159 * Recompute addrlen - an unspecied bind sent down an 1160 * address of length zero but we expect the appropriate length 1161 * in return. 1162 */ 1163 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1164 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1165 1166 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1167 /* 1168 * The alignment restriction is really too strict but 1169 * we want enough alignment to inspect the fields of 1170 * a sockaddr_in. 1171 */ 1172 addr = sogetoff(mp, bind_ack->ADDR_offset, 1173 bind_ack->ADDR_length, 1174 __TPI_ALIGN_SIZE); 1175 if (addr == NULL) { 1176 freemsg(mp); 1177 error = EPROTO; 1178 eprintsoline(so, error); 1179 goto done; 1180 } 1181 if (!(flags & _SOBIND_UNSPEC)) { 1182 /* 1183 * Verify that the transport didn't return something we 1184 * did not want e.g. an address other than what we asked for. 1185 * 1186 * NOTE: These checks would go away if/when we switch to 1187 * using the new TPI (in which the transport would fail 1188 * the request instead of assigning a different address). 1189 * 1190 * NOTE2: For protocols that we don't know (i.e. any 1191 * other than AF_INET6, AF_INET and AF_UNIX), we 1192 * cannot know if the transport should be expected to 1193 * return the same address as that requested. 1194 * 1195 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1196 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1197 * 1198 * For example, in the case of netatalk it may be 1199 * inappropriate for the transport to return the 1200 * requested address (as it may have allocated a local 1201 * port number in behaviour similar to that of an 1202 * AF_INET bind request with a port number of zero). 1203 * 1204 * Given the definition of O_T_BIND_REQ, where the 1205 * transport may bind to an address other than the 1206 * requested address, it's not possible to determine 1207 * whether a returned address that differs from the 1208 * requested address is a reason to fail (because the 1209 * requested address was not available) or succeed 1210 * (because the transport allocated an appropriate 1211 * address and/or port). 1212 * 1213 * sockfs currently requires that the transport return 1214 * the requested address in the T_BIND_ACK, unless 1215 * there is code here to allow for any discrepancy. 1216 * Such code exists for AF_INET and AF_INET6. 1217 * 1218 * Netatalk chooses to return the requested address 1219 * rather than the (correct) allocated address. This 1220 * means that netatalk violates the TPI specification 1221 * (and would not function correctly if used from a 1222 * TLI application), but it does mean that it works 1223 * with sockfs. 1224 * 1225 * As noted above, using the newer XTI bind primitive 1226 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1227 * allow sockfs to be more sure about whether or not 1228 * the bind request had succeeded (as transports are 1229 * not permitted to bind to a different address than 1230 * that requested - they must return failure). 1231 * Unfortunately, support for T_BIND_REQ may not be 1232 * present in all transport implementations (netatalk, 1233 * for example, doesn't have it), making the 1234 * transition difficult. 1235 */ 1236 if (bind_ack->ADDR_length != addrlen) { 1237 /* Assumes that the requested address was in use */ 1238 freemsg(mp); 1239 error = EADDRINUSE; 1240 eprintsoline(so, error); 1241 goto done; 1242 } 1243 1244 switch (so->so_family) { 1245 case AF_INET6: 1246 case AF_INET: { 1247 sin_t *rname, *aname; 1248 1249 rname = (sin_t *)addr; 1250 aname = (sin_t *)sti->sti_laddr_sa; 1251 1252 /* 1253 * Take advantage of the alignment 1254 * of sin_port and sin6_port which fall 1255 * in the same place in their data structures. 1256 * Just use sin_port for either address family. 1257 * 1258 * This may become a problem if (heaven forbid) 1259 * there's a separate ipv6port_reserved... :-P 1260 * 1261 * Binding to port 0 has the semantics of letting 1262 * the transport bind to any port. 1263 * 1264 * If the transport is TCP or UDP since we had sent 1265 * a T_BIND_REQ we would not get a port other than 1266 * what we asked for. 1267 */ 1268 if (tcp_udp_xport) { 1269 /* 1270 * Pick up the new port number if we bound to 1271 * port 0. 1272 */ 1273 if (aname->sin_port == 0) 1274 aname->sin_port = rname->sin_port; 1275 sti->sti_laddr_valid = 1; 1276 break; 1277 } 1278 if (aname->sin_port != 0 && 1279 aname->sin_port != rname->sin_port) { 1280 freemsg(mp); 1281 error = EADDRINUSE; 1282 eprintsoline(so, error); 1283 goto done; 1284 } 1285 /* 1286 * Pick up the new port number if we bound to port 0. 1287 */ 1288 aname->sin_port = rname->sin_port; 1289 1290 /* 1291 * Unfortunately, addresses aren't _quite_ the same. 1292 */ 1293 if (so->so_family == AF_INET) { 1294 if (aname->sin_addr.s_addr != 1295 rname->sin_addr.s_addr) { 1296 freemsg(mp); 1297 error = EADDRNOTAVAIL; 1298 eprintsoline(so, error); 1299 goto done; 1300 } 1301 } else { 1302 sin6_t *rname6 = (sin6_t *)rname; 1303 sin6_t *aname6 = (sin6_t *)aname; 1304 1305 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1306 &rname6->sin6_addr)) { 1307 freemsg(mp); 1308 error = EADDRNOTAVAIL; 1309 eprintsoline(so, error); 1310 goto done; 1311 } 1312 } 1313 break; 1314 } 1315 case AF_UNIX: 1316 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1317 freemsg(mp); 1318 error = EADDRINUSE; 1319 eprintsoline(so, error); 1320 eprintso(so, 1321 ("addrlen %d, addr 0x%x, vp %p\n", 1322 addrlen, *((int *)addr), 1323 (void *)sti->sti_ux_bound_vp)); 1324 goto done; 1325 } 1326 sti->sti_laddr_valid = 1; 1327 break; 1328 default: 1329 /* 1330 * NOTE: This assumes that addresses can be 1331 * byte-compared for equivalence. 1332 */ 1333 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1334 freemsg(mp); 1335 error = EADDRINUSE; 1336 eprintsoline(so, error); 1337 goto done; 1338 } 1339 /* 1340 * Don't mark sti_laddr_valid, as we cannot be 1341 * sure that the returned address is the real 1342 * bound address when talking to an unknown 1343 * transport. 1344 */ 1345 break; 1346 } 1347 } else { 1348 /* 1349 * Save for returned address for getsockname. 1350 * Needed for unspecific bind unless transport supports 1351 * the TI_GETMYNAME ioctl. 1352 * Do this for AF_INET{,6} even though they do, as 1353 * caching info here is much better performance than 1354 * a TPI/STREAMS trip to the transport for getsockname. 1355 * Any which can't for some reason _must_ _not_ set 1356 * sti_laddr_valid here for the caching version of 1357 * getsockname to not break; 1358 */ 1359 switch (so->so_family) { 1360 case AF_UNIX: 1361 /* 1362 * Record the address bound with the transport 1363 * for use by socketpair. 1364 */ 1365 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1366 sti->sti_laddr_valid = 1; 1367 break; 1368 case AF_INET: 1369 case AF_INET6: 1370 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1371 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1372 sti->sti_laddr_valid = 1; 1373 break; 1374 default: 1375 /* 1376 * Don't mark sti_laddr_valid, as we cannot be 1377 * sure that the returned address is the real 1378 * bound address when talking to an unknown 1379 * transport. 1380 */ 1381 break; 1382 } 1383 } 1384 1385 if (nl7c != NULL) { 1386 /* Register listen()er sonode pointer with NL7C */ 1387 nl7c_listener_addr(nl7c, so); 1388 } 1389 1390 freemsg(mp); 1391 1392 done: 1393 if (error) { 1394 /* reset state & backlog to values held on entry */ 1395 if (clear_acceptconn_on_err == B_TRUE) 1396 so->so_state &= ~SS_ACCEPTCONN; 1397 if (restore_backlog_on_err == B_TRUE) 1398 so->so_backlog = save_so_backlog; 1399 1400 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1401 int err; 1402 1403 err = sotpi_unbind(so, 0); 1404 /* LINTED - statement has no consequent: if */ 1405 if (err) { 1406 eprintsoline(so, error); 1407 } else { 1408 ASSERT(!(so->so_state & SS_ISBOUND)); 1409 } 1410 } 1411 } 1412 if (!(flags & _SOBIND_LOCK_HELD)) { 1413 so_unlock_single(so, SOLOCKED); 1414 mutex_exit(&so->so_lock); 1415 } else { 1416 ASSERT(MUTEX_HELD(&so->so_lock)); 1417 ASSERT(so->so_flag & SOLOCKED); 1418 } 1419 return (error); 1420 } 1421 1422 /* bind the socket */ 1423 static int 1424 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1425 int flags, struct cred *cr) 1426 { 1427 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1428 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1429 1430 flags &= ~_SOBIND_SOCKETPAIR; 1431 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1432 } 1433 1434 /* 1435 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1436 * address, or when listen needs to unbind and bind. 1437 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1438 * so that a sobind can pick them up. 1439 */ 1440 static int 1441 sotpi_unbind(struct sonode *so, int flags) 1442 { 1443 struct T_unbind_req unbind_req; 1444 int error = 0; 1445 mblk_t *mp; 1446 sotpi_info_t *sti = SOTOTPI(so); 1447 1448 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1449 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1450 1451 ASSERT(MUTEX_HELD(&so->so_lock)); 1452 ASSERT(so->so_flag & SOLOCKED); 1453 1454 if (!(so->so_state & SS_ISBOUND)) { 1455 error = EINVAL; 1456 eprintsoline(so, error); 1457 goto done; 1458 } 1459 1460 mutex_exit(&so->so_lock); 1461 1462 /* 1463 * Flush the read and write side (except stream head read queue) 1464 * and send down T_UNBIND_REQ. 1465 */ 1466 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1467 1468 unbind_req.PRIM_type = T_UNBIND_REQ; 1469 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1470 0, _ALLOC_SLEEP, CRED()); 1471 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1472 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1473 mutex_enter(&so->so_lock); 1474 if (error) { 1475 eprintsoline(so, error); 1476 goto done; 1477 } 1478 1479 error = sowaitokack(so, T_UNBIND_REQ); 1480 if (error) { 1481 eprintsoline(so, error); 1482 goto done; 1483 } 1484 1485 /* 1486 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1487 * strsock_proto while the lock was dropped above, the unbind 1488 * is allowed to complete. 1489 */ 1490 if (!(flags & _SOUNBIND_REBIND)) { 1491 /* 1492 * Clear out bound address. 1493 */ 1494 vnode_t *vp; 1495 1496 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1497 1498 /* Undo any SSL proxy setup */ 1499 if ((so->so_family == AF_INET || 1500 so->so_family == AF_INET6) && 1501 (so->so_type == SOCK_STREAM) && 1502 (sti->sti_kssl_ent != NULL)) { 1503 kssl_release_ent(sti->sti_kssl_ent, so, 1504 sti->sti_kssl_type); 1505 sti->sti_kssl_ent = NULL; 1506 sti->sti_kssl_type = KSSL_NO_PROXY; 1507 } 1508 sti->sti_ux_bound_vp = NULL; 1509 vn_rele_stream(vp); 1510 } 1511 /* Clear out address */ 1512 sti->sti_laddr_len = 0; 1513 } 1514 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1515 sti->sti_laddr_valid = 0; 1516 1517 done: 1518 1519 /* If the caller held the lock don't release it here */ 1520 ASSERT(MUTEX_HELD(&so->so_lock)); 1521 ASSERT(so->so_flag & SOLOCKED); 1522 1523 return (error); 1524 } 1525 1526 /* 1527 * listen on the socket. 1528 * For TPI conforming transports this has to first unbind with the transport 1529 * and then bind again using the new backlog. 1530 */ 1531 /* ARGSUSED */ 1532 int 1533 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1534 { 1535 int error = 0; 1536 sotpi_info_t *sti = SOTOTPI(so); 1537 1538 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1539 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1540 1541 if (sti->sti_serv_type == T_CLTS) 1542 return (EOPNOTSUPP); 1543 1544 /* 1545 * If the socket is ready to accept connections already, then 1546 * return without doing anything. This avoids a problem where 1547 * a second listen() call fails if a connection is pending and 1548 * leaves the socket unbound. Only when we are not unbinding 1549 * with the transport can we safely increase the backlog. 1550 */ 1551 if (so->so_state & SS_ACCEPTCONN && 1552 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1553 /*CONSTCOND*/ 1554 !solisten_tpi_tcp)) 1555 return (0); 1556 1557 if (so->so_state & SS_ISCONNECTED) 1558 return (EINVAL); 1559 1560 mutex_enter(&so->so_lock); 1561 so_lock_single(so); /* Set SOLOCKED */ 1562 1563 /* 1564 * If the listen doesn't change the backlog we do nothing. 1565 * This avoids an EPROTO error from the transport. 1566 */ 1567 if ((so->so_state & SS_ACCEPTCONN) && 1568 so->so_backlog == backlog) 1569 goto done; 1570 1571 if (!(so->so_state & SS_ISBOUND)) { 1572 /* 1573 * Must have been explicitly bound in the UNIX domain. 1574 */ 1575 if (so->so_family == AF_UNIX) { 1576 error = EINVAL; 1577 goto done; 1578 } 1579 error = sotpi_bindlisten(so, NULL, 0, backlog, 1580 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1581 } else if (backlog > 0) { 1582 /* 1583 * AF_INET{,6} hack to avoid losing the port. 1584 * Assumes that all AF_INET{,6} transports can handle a 1585 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1586 * has already bound thus it is possible to avoid the unbind. 1587 */ 1588 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1589 /*CONSTCOND*/ 1590 !solisten_tpi_tcp)) { 1591 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1592 if (error) 1593 goto done; 1594 } 1595 error = sotpi_bindlisten(so, NULL, 0, backlog, 1596 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1597 } else { 1598 so->so_state |= SS_ACCEPTCONN; 1599 so->so_backlog = backlog; 1600 } 1601 if (error) 1602 goto done; 1603 ASSERT(so->so_state & SS_ACCEPTCONN); 1604 done: 1605 so_unlock_single(so, SOLOCKED); 1606 mutex_exit(&so->so_lock); 1607 return (error); 1608 } 1609 1610 /* 1611 * Disconnect either a specified seqno or all (-1). 1612 * The former is used on listening sockets only. 1613 * 1614 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1615 * the current use of sodisconnect(seqno == -1) is only for shutdown 1616 * so there is no point (and potentially incorrect) to unbind. 1617 */ 1618 static int 1619 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1620 { 1621 struct T_discon_req discon_req; 1622 int error = 0; 1623 mblk_t *mp; 1624 1625 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1626 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1627 1628 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1629 mutex_enter(&so->so_lock); 1630 so_lock_single(so); /* Set SOLOCKED */ 1631 } else { 1632 ASSERT(MUTEX_HELD(&so->so_lock)); 1633 ASSERT(so->so_flag & SOLOCKED); 1634 } 1635 1636 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1637 error = EINVAL; 1638 eprintsoline(so, error); 1639 goto done; 1640 } 1641 1642 mutex_exit(&so->so_lock); 1643 /* 1644 * Flush the write side (unless this is a listener) 1645 * and then send down a T_DISCON_REQ. 1646 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1647 * and other messages.) 1648 */ 1649 if (!(so->so_state & SS_ACCEPTCONN)) 1650 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1651 1652 discon_req.PRIM_type = T_DISCON_REQ; 1653 discon_req.SEQ_number = seqno; 1654 mp = soallocproto1(&discon_req, sizeof (discon_req), 1655 0, _ALLOC_SLEEP, CRED()); 1656 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1657 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1658 mutex_enter(&so->so_lock); 1659 if (error) { 1660 eprintsoline(so, error); 1661 goto done; 1662 } 1663 1664 error = sowaitokack(so, T_DISCON_REQ); 1665 if (error) { 1666 eprintsoline(so, error); 1667 goto done; 1668 } 1669 /* 1670 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1671 * strsock_proto while the lock was dropped above, the disconnect 1672 * is allowed to complete. However, it is not possible to 1673 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1674 */ 1675 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1676 SOTOTPI(so)->sti_laddr_valid = 0; 1677 SOTOTPI(so)->sti_faddr_valid = 0; 1678 done: 1679 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1680 so_unlock_single(so, SOLOCKED); 1681 mutex_exit(&so->so_lock); 1682 } else { 1683 /* If the caller held the lock don't release it here */ 1684 ASSERT(MUTEX_HELD(&so->so_lock)); 1685 ASSERT(so->so_flag & SOLOCKED); 1686 } 1687 return (error); 1688 } 1689 1690 /* ARGSUSED */ 1691 int 1692 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1693 struct sonode **nsop) 1694 { 1695 struct T_conn_ind *conn_ind; 1696 struct T_conn_res *conn_res; 1697 int error = 0; 1698 mblk_t *mp, *ctxmp, *ack_mp; 1699 struct sonode *nso; 1700 vnode_t *nvp; 1701 void *src; 1702 t_uscalar_t srclen; 1703 void *opt; 1704 t_uscalar_t optlen; 1705 t_scalar_t PRIM_type; 1706 t_scalar_t SEQ_number; 1707 size_t sinlen; 1708 sotpi_info_t *sti = SOTOTPI(so); 1709 sotpi_info_t *nsti; 1710 1711 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1712 (void *)so, fflag, (void *)nsop, 1713 pr_state(so->so_state, so->so_mode))); 1714 1715 /* 1716 * Defer single-threading the accepting socket until 1717 * the T_CONN_IND has been received and parsed and the 1718 * new sonode has been opened. 1719 */ 1720 1721 /* Check that we are not already connected */ 1722 if ((so->so_state & SS_ACCEPTCONN) == 0) 1723 goto conn_bad; 1724 again: 1725 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1726 goto e_bad; 1727 1728 ASSERT(mp != NULL); 1729 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1730 ctxmp = mp->b_cont; 1731 1732 /* 1733 * Save SEQ_number for error paths. 1734 */ 1735 SEQ_number = conn_ind->SEQ_number; 1736 1737 srclen = conn_ind->SRC_length; 1738 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1739 if (src == NULL) { 1740 error = EPROTO; 1741 freemsg(mp); 1742 eprintsoline(so, error); 1743 goto disconnect_unlocked; 1744 } 1745 optlen = conn_ind->OPT_length; 1746 switch (so->so_family) { 1747 case AF_INET: 1748 case AF_INET6: 1749 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1750 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1751 &opt, conn_ind->OPT_length); 1752 } else { 1753 /* 1754 * The transport (in this case TCP) hasn't sent up 1755 * a pointer to an instance for the accept fast-path. 1756 * Disable fast-path completely because the call to 1757 * sotpi_create() below would otherwise create an 1758 * incomplete TCP instance, which would lead to 1759 * problems when sockfs sends a normal T_CONN_RES 1760 * message down the new stream. 1761 */ 1762 if (sti->sti_direct) { 1763 int rval; 1764 /* 1765 * For consistency we inform tcp to disable 1766 * direct interface on the listener, though 1767 * we can certainly live without doing this 1768 * because no data will ever travel upstream 1769 * on the listening socket. 1770 */ 1771 sti->sti_direct = 0; 1772 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1773 0, 0, K_TO_K, cr, &rval); 1774 } 1775 opt = NULL; 1776 optlen = 0; 1777 } 1778 break; 1779 case AF_UNIX: 1780 default: 1781 if (optlen != 0) { 1782 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1783 __TPI_ALIGN_SIZE); 1784 if (opt == NULL) { 1785 error = EPROTO; 1786 freemsg(mp); 1787 eprintsoline(so, error); 1788 goto disconnect_unlocked; 1789 } 1790 } 1791 if (so->so_family == AF_UNIX) { 1792 if (!sti->sti_faddr_noxlate) { 1793 src = NULL; 1794 srclen = 0; 1795 } 1796 /* Extract src address from options */ 1797 if (optlen != 0) 1798 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1799 } 1800 break; 1801 } 1802 1803 /* 1804 * Create the new socket. 1805 */ 1806 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1807 if (nso == NULL) { 1808 ASSERT(error != 0); 1809 /* 1810 * Accept can not fail with ENOBUFS. sotpi_create 1811 * sleeps waiting for memory until a signal is caught 1812 * so return EINTR. 1813 */ 1814 freemsg(mp); 1815 if (error == ENOBUFS) 1816 error = EINTR; 1817 goto e_disc_unl; 1818 } 1819 nvp = SOTOV(nso); 1820 nsti = SOTOTPI(nso); 1821 1822 /* 1823 * If the transport sent up an SSL connection context, then attach 1824 * it the new socket, and set the (sd_wputdatafunc)() and 1825 * (sd_rputdatafunc)() stream head hooks to intercept and process 1826 * SSL records. 1827 */ 1828 if (ctxmp != NULL) { 1829 /* 1830 * This kssl_ctx_t is already held for us by the transport. 1831 * So, we don't need to do a kssl_hold_ctx() here. 1832 */ 1833 nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1834 freemsg(ctxmp); 1835 mp->b_cont = NULL; 1836 strsetrwputdatahooks(nvp, strsock_kssl_input, 1837 strsock_kssl_output); 1838 } 1839 #ifdef DEBUG 1840 /* 1841 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1842 * it's inherited early to allow debugging of the accept code itself. 1843 */ 1844 nso->so_options |= so->so_options & SO_DEBUG; 1845 #endif /* DEBUG */ 1846 1847 /* 1848 * Save the SRC address from the T_CONN_IND 1849 * for getpeername to work on AF_UNIX and on transports that do not 1850 * support TI_GETPEERNAME. 1851 * 1852 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1853 * copyin_name(). 1854 */ 1855 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1856 error = EINVAL; 1857 freemsg(mp); 1858 eprintsoline(so, error); 1859 goto disconnect_vp_unlocked; 1860 } 1861 nsti->sti_faddr_len = (socklen_t)srclen; 1862 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1863 bcopy(src, nsti->sti_faddr_sa, srclen); 1864 nsti->sti_faddr_valid = 1; 1865 1866 /* 1867 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1868 */ 1869 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1870 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1871 cred_t *cr; 1872 pid_t cpid; 1873 1874 cr = msg_getcred(mp, &cpid); 1875 if (cr != NULL) { 1876 crhold(cr); 1877 nso->so_peercred = cr; 1878 nso->so_cpid = cpid; 1879 } 1880 freemsg(mp); 1881 1882 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1883 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1884 if (mp == NULL) { 1885 /* 1886 * Accept can not fail with ENOBUFS. 1887 * A signal was caught so return EINTR. 1888 */ 1889 error = EINTR; 1890 eprintsoline(so, error); 1891 goto disconnect_vp_unlocked; 1892 } 1893 conn_res = (struct T_conn_res *)mp->b_rptr; 1894 } else { 1895 /* 1896 * For efficency reasons we use msg_extractcred; no crhold 1897 * needed since db_credp is cleared (i.e., we move the cred 1898 * from the message to so_peercred. 1899 */ 1900 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1901 1902 mp->b_rptr = DB_BASE(mp); 1903 conn_res = (struct T_conn_res *)mp->b_rptr; 1904 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1905 1906 mblk_setcred(mp, cr, curproc->p_pid); 1907 } 1908 1909 /* 1910 * New socket must be bound at least in sockfs and, except for AF_INET, 1911 * (or AF_INET6) it also has to be bound in the transport provider. 1912 * We set the local address in the sonode from the T_OK_ACK of the 1913 * T_CONN_RES. For this reason the address we bind to here isn't 1914 * important. 1915 */ 1916 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1917 /*CONSTCOND*/ 1918 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1919 /* 1920 * Optimization for AF_INET{,6} transports 1921 * that can handle a T_CONN_RES without being bound. 1922 */ 1923 mutex_enter(&nso->so_lock); 1924 so_automatic_bind(nso); 1925 mutex_exit(&nso->so_lock); 1926 } else { 1927 /* Perform NULL bind with the transport provider. */ 1928 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1929 cr)) != 0) { 1930 ASSERT(error != ENOBUFS); 1931 freemsg(mp); 1932 eprintsoline(nso, error); 1933 goto disconnect_vp_unlocked; 1934 } 1935 } 1936 1937 /* 1938 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1939 * so that any data arriving on the new socket will cause the 1940 * appropriate signals to be delivered for the new socket. 1941 * 1942 * No other thread (except strsock_proto and strsock_misc) 1943 * can access the new socket thus we relax the locking. 1944 */ 1945 nso->so_pgrp = so->so_pgrp; 1946 nso->so_state |= so->so_state & SS_ASYNC; 1947 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1948 1949 if (nso->so_pgrp != 0) { 1950 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1951 eprintsoline(nso, error); 1952 error = 0; 1953 nso->so_pgrp = 0; 1954 } 1955 } 1956 1957 /* 1958 * Make note of the socket level options. TCP and IP level options 1959 * are already inherited. We could do all this after accept is 1960 * successful but doing it here simplifies code and no harm done 1961 * for error case. 1962 */ 1963 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1964 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1965 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1966 nso->so_sndbuf = so->so_sndbuf; 1967 nso->so_rcvbuf = so->so_rcvbuf; 1968 if (nso->so_options & SO_LINGER) 1969 nso->so_linger = so->so_linger; 1970 1971 /* 1972 * Note that the following sti_direct code path should be 1973 * removed once we are confident that the direct sockets 1974 * do not result in any degradation. 1975 */ 1976 if (sti->sti_direct) { 1977 1978 ASSERT(opt != NULL); 1979 1980 conn_res->OPT_length = optlen; 1981 conn_res->OPT_offset = MBLKL(mp); 1982 bcopy(&opt, mp->b_wptr, optlen); 1983 mp->b_wptr += optlen; 1984 conn_res->PRIM_type = T_CONN_RES; 1985 conn_res->ACCEPTOR_id = 0; 1986 PRIM_type = T_CONN_RES; 1987 1988 /* Send down the T_CONN_RES on acceptor STREAM */ 1989 error = kstrputmsg(SOTOV(nso), mp, NULL, 1990 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1991 if (error) { 1992 mutex_enter(&so->so_lock); 1993 so_lock_single(so); 1994 eprintsoline(so, error); 1995 goto disconnect_vp; 1996 } 1997 mutex_enter(&nso->so_lock); 1998 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1999 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2000 if (error) { 2001 mutex_exit(&nso->so_lock); 2002 mutex_enter(&so->so_lock); 2003 so_lock_single(so); 2004 eprintsoline(so, error); 2005 goto disconnect_vp; 2006 } 2007 if (nso->so_family == AF_INET) { 2008 sin_t *sin; 2009 2010 sin = (sin_t *)(ack_mp->b_rptr + 2011 sizeof (struct T_ok_ack)); 2012 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 2013 nsti->sti_laddr_len = sizeof (sin_t); 2014 } else { 2015 sin6_t *sin6; 2016 2017 sin6 = (sin6_t *)(ack_mp->b_rptr + 2018 sizeof (struct T_ok_ack)); 2019 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 2020 nsti->sti_laddr_len = sizeof (sin6_t); 2021 } 2022 freemsg(ack_mp); 2023 2024 nso->so_state |= SS_ISCONNECTED; 2025 nso->so_proto_handle = (sock_lower_handle_t)opt; 2026 nsti->sti_laddr_valid = 1; 2027 2028 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 2029 /* 2030 * A NL7C marked listen()er so the new socket 2031 * inherits the listen()er's NL7C state, except 2032 * for NL7C_POLLIN. 2033 * 2034 * Only call NL7C to process the new socket if 2035 * the listen socket allows blocking i/o. 2036 */ 2037 nsti->sti_nl7c_flags = 2038 sti->sti_nl7c_flags & (~NL7C_POLLIN); 2039 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 2040 /* 2041 * Nonblocking accept() just make it 2042 * persist to defer processing to the 2043 * read-side syscall (e.g. read). 2044 */ 2045 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 2046 } else if (nl7c_process(nso, B_FALSE)) { 2047 /* 2048 * NL7C has completed processing on the 2049 * socket, close the socket and back to 2050 * the top to await the next T_CONN_IND. 2051 */ 2052 mutex_exit(&nso->so_lock); 2053 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 2054 cr, NULL); 2055 VN_RELE(nvp); 2056 goto again; 2057 } 2058 /* Pass the new socket out */ 2059 } 2060 2061 mutex_exit(&nso->so_lock); 2062 2063 /* 2064 * It's possible, through the use of autopush for example, 2065 * that the acceptor stream may not support sti_direct 2066 * semantics. If the new socket does not support sti_direct 2067 * we issue a _SIOCSOCKFALLBACK to inform the transport 2068 * as we would in the I_PUSH case. 2069 */ 2070 if (nsti->sti_direct == 0) { 2071 int rval; 2072 2073 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2074 0, 0, K_TO_K, cr, &rval)) != 0) { 2075 mutex_enter(&so->so_lock); 2076 so_lock_single(so); 2077 eprintsoline(so, error); 2078 goto disconnect_vp; 2079 } 2080 } 2081 2082 /* 2083 * Pass out new socket. 2084 */ 2085 if (nsop != NULL) 2086 *nsop = nso; 2087 2088 return (0); 2089 } 2090 2091 /* 2092 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2093 * which don't support the FireEngine accept fast-path. It is also 2094 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2095 * again. Neither sockfs nor TCP attempt to find out if some other 2096 * random module has been inserted in between (in which case we 2097 * should follow TLI accept behaviour). We blindly assume the worst 2098 * case and revert back to old behaviour i.e. TCP will not send us 2099 * any option (eager) and the accept should happen on the listener 2100 * queue. Any queued T_conn_ind have already got their options removed 2101 * by so_sock2_stream() when "sockmod" was I_POP'd. 2102 */ 2103 /* 2104 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2105 */ 2106 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2107 #ifdef _ILP32 2108 queue_t *q; 2109 2110 /* 2111 * Find read queue in driver 2112 * Can safely do this since we "own" nso/nvp. 2113 */ 2114 q = strvp2wq(nvp)->q_next; 2115 while (SAMESTR(q)) 2116 q = q->q_next; 2117 q = RD(q); 2118 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2119 #else 2120 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2121 #endif /* _ILP32 */ 2122 conn_res->PRIM_type = O_T_CONN_RES; 2123 PRIM_type = O_T_CONN_RES; 2124 } else { 2125 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2126 conn_res->PRIM_type = T_CONN_RES; 2127 PRIM_type = T_CONN_RES; 2128 } 2129 conn_res->SEQ_number = SEQ_number; 2130 conn_res->OPT_length = 0; 2131 conn_res->OPT_offset = 0; 2132 2133 mutex_enter(&so->so_lock); 2134 so_lock_single(so); /* Set SOLOCKED */ 2135 mutex_exit(&so->so_lock); 2136 2137 error = kstrputmsg(SOTOV(so), mp, NULL, 2138 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2139 mutex_enter(&so->so_lock); 2140 if (error) { 2141 eprintsoline(so, error); 2142 goto disconnect_vp; 2143 } 2144 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2145 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2146 if (error) { 2147 eprintsoline(so, error); 2148 goto disconnect_vp; 2149 } 2150 /* 2151 * If there is a sin/sin6 appended onto the T_OK_ACK use 2152 * that to set the local address. If this is not present 2153 * then we zero out the address and don't set the 2154 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2155 * the pathname from the listening socket. 2156 */ 2157 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2158 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2159 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2160 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2161 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2162 nsti->sti_laddr_len = sinlen; 2163 nsti->sti_laddr_valid = 1; 2164 } else if (nso->so_family == AF_UNIX) { 2165 ASSERT(so->so_family == AF_UNIX); 2166 nsti->sti_laddr_len = sti->sti_laddr_len; 2167 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2168 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2169 nsti->sti_laddr_len); 2170 nsti->sti_laddr_valid = 1; 2171 } else { 2172 nsti->sti_laddr_len = sti->sti_laddr_len; 2173 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2174 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2175 nsti->sti_laddr_sa->sa_family = nso->so_family; 2176 } 2177 freemsg(ack_mp); 2178 2179 so_unlock_single(so, SOLOCKED); 2180 mutex_exit(&so->so_lock); 2181 2182 nso->so_state |= SS_ISCONNECTED; 2183 2184 /* 2185 * Pass out new socket. 2186 */ 2187 if (nsop != NULL) 2188 *nsop = nso; 2189 2190 return (0); 2191 2192 2193 eproto_disc_unl: 2194 error = EPROTO; 2195 e_disc_unl: 2196 eprintsoline(so, error); 2197 goto disconnect_unlocked; 2198 2199 pr_disc_vp_unl: 2200 eprintsoline(so, error); 2201 disconnect_vp_unlocked: 2202 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2203 VN_RELE(nvp); 2204 disconnect_unlocked: 2205 (void) sodisconnect(so, SEQ_number, 0); 2206 return (error); 2207 2208 pr_disc_vp: 2209 eprintsoline(so, error); 2210 disconnect_vp: 2211 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2212 so_unlock_single(so, SOLOCKED); 2213 mutex_exit(&so->so_lock); 2214 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2215 VN_RELE(nvp); 2216 return (error); 2217 2218 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2219 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2220 ? EOPNOTSUPP : EINVAL; 2221 e_bad: 2222 eprintsoline(so, error); 2223 return (error); 2224 } 2225 2226 /* 2227 * connect a socket. 2228 * 2229 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2230 * unconnect (by specifying a null address). 2231 */ 2232 int 2233 sotpi_connect(struct sonode *so, 2234 const struct sockaddr *name, 2235 socklen_t namelen, 2236 int fflag, 2237 int flags, 2238 struct cred *cr) 2239 { 2240 struct T_conn_req conn_req; 2241 int error = 0; 2242 mblk_t *mp; 2243 void *src; 2244 socklen_t srclen; 2245 void *addr; 2246 socklen_t addrlen; 2247 boolean_t need_unlock; 2248 sotpi_info_t *sti = SOTOTPI(so); 2249 2250 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2251 (void *)so, (void *)name, namelen, fflag, flags, 2252 pr_state(so->so_state, so->so_mode))); 2253 2254 /* 2255 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2256 * avoid sleeping for memory with SOLOCKED held. 2257 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2258 * + sizeof (struct T_opthdr). 2259 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2260 * exceed sti_faddr_maxlen). 2261 */ 2262 mp = soallocproto(sizeof (struct T_conn_req) + 2263 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2264 cr); 2265 if (mp == NULL) { 2266 /* 2267 * Connect can not fail with ENOBUFS. A signal was 2268 * caught so return EINTR. 2269 */ 2270 error = EINTR; 2271 eprintsoline(so, error); 2272 return (error); 2273 } 2274 2275 mutex_enter(&so->so_lock); 2276 /* 2277 * Make sure there is a preallocated T_unbind_req message 2278 * before any binding. This message is allocated when the 2279 * socket is created. Since another thread can consume 2280 * so_unbind_mp by the time we return from so_lock_single(), 2281 * we should check the availability of so_unbind_mp after 2282 * we return from so_lock_single(). 2283 */ 2284 2285 so_lock_single(so); /* Set SOLOCKED */ 2286 need_unlock = B_TRUE; 2287 2288 if (sti->sti_unbind_mp == NULL) { 2289 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2290 /* NOTE: holding so_lock while sleeping */ 2291 sti->sti_unbind_mp = 2292 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2293 if (sti->sti_unbind_mp == NULL) { 2294 error = EINTR; 2295 goto done; 2296 } 2297 } 2298 2299 /* 2300 * Can't have done a listen before connecting. 2301 */ 2302 if (so->so_state & SS_ACCEPTCONN) { 2303 error = EOPNOTSUPP; 2304 goto done; 2305 } 2306 2307 /* 2308 * Must be bound with the transport 2309 */ 2310 if (!(so->so_state & SS_ISBOUND)) { 2311 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2312 /*CONSTCOND*/ 2313 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2314 /* 2315 * Optimization for AF_INET{,6} transports 2316 * that can handle a T_CONN_REQ without being bound. 2317 */ 2318 so_automatic_bind(so); 2319 } else { 2320 error = sotpi_bind(so, NULL, 0, 2321 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2322 if (error) 2323 goto done; 2324 } 2325 ASSERT(so->so_state & SS_ISBOUND); 2326 flags |= _SOCONNECT_DID_BIND; 2327 } 2328 2329 /* 2330 * Handle a connect to a name parameter of type AF_UNSPEC like a 2331 * connect to a null address. This is the portable method to 2332 * unconnect a socket. 2333 */ 2334 if ((namelen >= sizeof (sa_family_t)) && 2335 (name->sa_family == AF_UNSPEC)) { 2336 name = NULL; 2337 namelen = 0; 2338 } 2339 2340 /* 2341 * Check that we are not already connected. 2342 * A connection-oriented socket cannot be reconnected. 2343 * A connected connection-less socket can be 2344 * - connected to a different address by a subsequent connect 2345 * - "unconnected" by a connect to the NULL address 2346 */ 2347 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2348 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2349 if (so->so_mode & SM_CONNREQUIRED) { 2350 /* Connection-oriented socket */ 2351 error = so->so_state & SS_ISCONNECTED ? 2352 EISCONN : EALREADY; 2353 goto done; 2354 } 2355 /* Connection-less socket */ 2356 if (name == NULL) { 2357 /* 2358 * Remove the connected state and clear SO_DGRAM_ERRIND 2359 * since it was set when the socket was connected. 2360 * If this is UDP also send down a T_DISCON_REQ. 2361 */ 2362 int val; 2363 2364 if ((so->so_family == AF_INET || 2365 so->so_family == AF_INET6) && 2366 (so->so_type == SOCK_DGRAM || 2367 so->so_type == SOCK_RAW) && 2368 /*CONSTCOND*/ 2369 !soconnect_tpi_udp) { 2370 /* XXX What about implicitly unbinding here? */ 2371 error = sodisconnect(so, -1, 2372 _SODISCONNECT_LOCK_HELD); 2373 } else { 2374 so->so_state &= 2375 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2376 sti->sti_faddr_valid = 0; 2377 sti->sti_faddr_len = 0; 2378 } 2379 2380 /* Remove SOLOCKED since setsockopt will grab it */ 2381 so_unlock_single(so, SOLOCKED); 2382 mutex_exit(&so->so_lock); 2383 2384 val = 0; 2385 (void) sotpi_setsockopt(so, SOL_SOCKET, 2386 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2387 cr); 2388 2389 mutex_enter(&so->so_lock); 2390 so_lock_single(so); /* Set SOLOCKED */ 2391 goto done; 2392 } 2393 } 2394 ASSERT(so->so_state & SS_ISBOUND); 2395 2396 if (name == NULL || namelen == 0) { 2397 error = EINVAL; 2398 goto done; 2399 } 2400 /* 2401 * Mark the socket if sti_faddr_sa represents the transport level 2402 * address. 2403 */ 2404 if (flags & _SOCONNECT_NOXLATE) { 2405 struct sockaddr_ux *soaddr_ux; 2406 2407 ASSERT(so->so_family == AF_UNIX); 2408 if (namelen != sizeof (struct sockaddr_ux)) { 2409 error = EINVAL; 2410 goto done; 2411 } 2412 soaddr_ux = (struct sockaddr_ux *)name; 2413 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2414 namelen = sizeof (soaddr_ux->sou_addr); 2415 sti->sti_faddr_noxlate = 1; 2416 } 2417 2418 /* 2419 * Length and family checks. 2420 */ 2421 error = so_addr_verify(so, name, namelen); 2422 if (error) 2423 goto bad; 2424 2425 /* 2426 * Save foreign address. Needed for AF_UNIX as well as 2427 * transport providers that do not support TI_GETPEERNAME. 2428 * Also used for cached foreign address for TCP and UDP. 2429 */ 2430 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2431 error = EINVAL; 2432 goto done; 2433 } 2434 sti->sti_faddr_len = (socklen_t)namelen; 2435 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2436 bcopy(name, sti->sti_faddr_sa, namelen); 2437 sti->sti_faddr_valid = 1; 2438 2439 if (so->so_family == AF_UNIX) { 2440 if (sti->sti_faddr_noxlate) { 2441 /* 2442 * Already have a transport internal address. Do not 2443 * pass any (transport internal) source address. 2444 */ 2445 addr = sti->sti_faddr_sa; 2446 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2447 src = NULL; 2448 srclen = 0; 2449 } else { 2450 /* 2451 * Pass the sockaddr_un source address as an option 2452 * and translate the remote address. 2453 * Holding so_lock thus sti_laddr_sa can not change. 2454 */ 2455 src = sti->sti_laddr_sa; 2456 srclen = (t_uscalar_t)sti->sti_laddr_len; 2457 dprintso(so, 1, 2458 ("sotpi_connect UNIX: srclen %d, src %p\n", 2459 srclen, src)); 2460 error = so_ux_addr_xlate(so, 2461 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2462 (flags & _SOCONNECT_XPG4_2), 2463 &addr, &addrlen); 2464 if (error) 2465 goto bad; 2466 } 2467 } else { 2468 addr = sti->sti_faddr_sa; 2469 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2470 src = NULL; 2471 srclen = 0; 2472 } 2473 /* 2474 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2475 * option which asks the transport provider to send T_UDERR_IND 2476 * messages. These T_UDERR_IND messages are used to return connected 2477 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2478 * 2479 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2480 * we send down a T_CONN_REQ. This is needed to let the 2481 * transport assign a local address that is consistent with 2482 * the remote address. Applications depend on a getsockname() 2483 * after a connect() to retrieve the "source" IP address for 2484 * the connected socket. Invalidate the cached local address 2485 * to force getsockname() to enquire of the transport. 2486 */ 2487 if (!(so->so_mode & SM_CONNREQUIRED)) { 2488 /* 2489 * Datagram socket. 2490 */ 2491 int32_t val; 2492 2493 so_unlock_single(so, SOLOCKED); 2494 mutex_exit(&so->so_lock); 2495 2496 val = 1; 2497 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2498 &val, (t_uscalar_t)sizeof (val), cr); 2499 2500 mutex_enter(&so->so_lock); 2501 so_lock_single(so); /* Set SOLOCKED */ 2502 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2503 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2504 soconnect_tpi_udp) { 2505 soisconnected(so); 2506 goto done; 2507 } 2508 /* 2509 * Send down T_CONN_REQ etc. 2510 * Clear fflag to avoid returning EWOULDBLOCK. 2511 */ 2512 fflag = 0; 2513 ASSERT(so->so_family != AF_UNIX); 2514 sti->sti_laddr_valid = 0; 2515 } else if (sti->sti_laddr_len != 0) { 2516 /* 2517 * If the local address or port was "any" then it may be 2518 * changed by the transport as a result of the 2519 * connect. Invalidate the cached version if we have one. 2520 */ 2521 switch (so->so_family) { 2522 case AF_INET: 2523 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2524 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2525 INADDR_ANY || 2526 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2527 sti->sti_laddr_valid = 0; 2528 break; 2529 2530 case AF_INET6: 2531 ASSERT(sti->sti_laddr_len == 2532 (socklen_t)sizeof (sin6_t)); 2533 if (IN6_IS_ADDR_UNSPECIFIED( 2534 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2535 IN6_IS_ADDR_V4MAPPED_ANY( 2536 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2537 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2538 sti->sti_laddr_valid = 0; 2539 break; 2540 2541 default: 2542 break; 2543 } 2544 } 2545 2546 /* 2547 * Check for failure of an earlier call 2548 */ 2549 if (so->so_error != 0) 2550 goto so_bad; 2551 2552 /* 2553 * Send down T_CONN_REQ. Message was allocated above. 2554 */ 2555 conn_req.PRIM_type = T_CONN_REQ; 2556 conn_req.DEST_length = addrlen; 2557 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2558 if (srclen == 0) { 2559 conn_req.OPT_length = 0; 2560 conn_req.OPT_offset = 0; 2561 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2562 soappendmsg(mp, addr, addrlen); 2563 } else { 2564 /* 2565 * There is a AF_UNIX sockaddr_un to include as a source 2566 * address option. 2567 */ 2568 struct T_opthdr toh; 2569 2570 toh.level = SOL_SOCKET; 2571 toh.name = SO_SRCADDR; 2572 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2573 toh.status = 0; 2574 conn_req.OPT_length = 2575 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2576 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2577 _TPI_ALIGN_TOPT(addrlen)); 2578 2579 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2580 soappendmsg(mp, addr, addrlen); 2581 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2582 soappendmsg(mp, &toh, sizeof (toh)); 2583 soappendmsg(mp, src, srclen); 2584 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2585 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2586 } 2587 /* 2588 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2589 * in order to have the right state when the T_CONN_CON shows up. 2590 */ 2591 soisconnecting(so); 2592 mutex_exit(&so->so_lock); 2593 2594 if (AU_AUDITING()) 2595 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2596 2597 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2598 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2599 mp = NULL; 2600 mutex_enter(&so->so_lock); 2601 if (error != 0) 2602 goto bad; 2603 2604 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2605 goto bad; 2606 2607 /* Allow other threads to access the socket */ 2608 so_unlock_single(so, SOLOCKED); 2609 need_unlock = B_FALSE; 2610 2611 /* 2612 * Wait until we get a T_CONN_CON or an error 2613 */ 2614 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2615 so_lock_single(so); /* Set SOLOCKED */ 2616 need_unlock = B_TRUE; 2617 } 2618 2619 done: 2620 freemsg(mp); 2621 switch (error) { 2622 case EINPROGRESS: 2623 case EALREADY: 2624 case EISCONN: 2625 case EINTR: 2626 /* Non-fatal errors */ 2627 sti->sti_laddr_valid = 0; 2628 /* FALLTHRU */ 2629 case 0: 2630 break; 2631 default: 2632 ASSERT(need_unlock); 2633 /* 2634 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2635 * and invalidate local-address cache 2636 */ 2637 so->so_state &= ~SS_ISCONNECTING; 2638 sti->sti_laddr_valid = 0; 2639 /* A discon_ind might have already unbound us */ 2640 if ((flags & _SOCONNECT_DID_BIND) && 2641 (so->so_state & SS_ISBOUND)) { 2642 int err; 2643 2644 err = sotpi_unbind(so, 0); 2645 /* LINTED - statement has no conseq */ 2646 if (err) { 2647 eprintsoline(so, err); 2648 } 2649 } 2650 break; 2651 } 2652 if (need_unlock) 2653 so_unlock_single(so, SOLOCKED); 2654 mutex_exit(&so->so_lock); 2655 return (error); 2656 2657 so_bad: error = sogeterr(so, B_TRUE); 2658 bad: eprintsoline(so, error); 2659 goto done; 2660 } 2661 2662 /* ARGSUSED */ 2663 int 2664 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2665 { 2666 struct T_ordrel_req ordrel_req; 2667 mblk_t *mp; 2668 uint_t old_state, state_change; 2669 int error = 0; 2670 sotpi_info_t *sti = SOTOTPI(so); 2671 2672 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2673 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2674 2675 mutex_enter(&so->so_lock); 2676 so_lock_single(so); /* Set SOLOCKED */ 2677 2678 /* 2679 * SunOS 4.X has no check for datagram sockets. 2680 * 5.X checks that it is connected (ENOTCONN) 2681 * X/Open requires that we check the connected state. 2682 */ 2683 if (!(so->so_state & SS_ISCONNECTED)) { 2684 if (!xnet_skip_checks) { 2685 error = ENOTCONN; 2686 if (xnet_check_print) { 2687 printf("sockfs: X/Open shutdown check " 2688 "caused ENOTCONN\n"); 2689 } 2690 } 2691 goto done; 2692 } 2693 /* 2694 * Record the current state and then perform any state changes. 2695 * Then use the difference between the old and new states to 2696 * determine which messages need to be sent. 2697 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2698 * duplicate calls to shutdown(). 2699 */ 2700 old_state = so->so_state; 2701 2702 switch (how) { 2703 case 0: 2704 socantrcvmore(so); 2705 break; 2706 case 1: 2707 socantsendmore(so); 2708 break; 2709 case 2: 2710 socantsendmore(so); 2711 socantrcvmore(so); 2712 break; 2713 default: 2714 error = EINVAL; 2715 goto done; 2716 } 2717 2718 /* 2719 * Assumes that the SS_CANT* flags are never cleared in the above code. 2720 */ 2721 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2722 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2723 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2724 2725 switch (state_change) { 2726 case 0: 2727 dprintso(so, 1, 2728 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2729 so->so_state)); 2730 goto done; 2731 2732 case SS_CANTRCVMORE: 2733 mutex_exit(&so->so_lock); 2734 strseteof(SOTOV(so), 1); 2735 /* 2736 * strseteof takes care of read side wakeups, 2737 * pollwakeups, and signals. 2738 */ 2739 /* 2740 * Get the read lock before flushing data to avoid problems 2741 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2742 */ 2743 mutex_enter(&so->so_lock); 2744 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2745 mutex_exit(&so->so_lock); 2746 2747 /* Flush read side queue */ 2748 strflushrq(SOTOV(so), FLUSHALL); 2749 2750 mutex_enter(&so->so_lock); 2751 so_unlock_read(so); /* Clear SOREADLOCKED */ 2752 break; 2753 2754 case SS_CANTSENDMORE: 2755 mutex_exit(&so->so_lock); 2756 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2757 mutex_enter(&so->so_lock); 2758 break; 2759 2760 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2761 mutex_exit(&so->so_lock); 2762 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2763 strseteof(SOTOV(so), 1); 2764 /* 2765 * strseteof takes care of read side wakeups, 2766 * pollwakeups, and signals. 2767 */ 2768 /* 2769 * Get the read lock before flushing data to avoid problems 2770 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2771 */ 2772 mutex_enter(&so->so_lock); 2773 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2774 mutex_exit(&so->so_lock); 2775 2776 /* Flush read side queue */ 2777 strflushrq(SOTOV(so), FLUSHALL); 2778 2779 mutex_enter(&so->so_lock); 2780 so_unlock_read(so); /* Clear SOREADLOCKED */ 2781 break; 2782 } 2783 2784 ASSERT(MUTEX_HELD(&so->so_lock)); 2785 2786 /* 2787 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2788 * was set due to this call and the new state has both of them set: 2789 * Send the AF_UNIX close indication 2790 * For T_COTS send a discon_ind 2791 * 2792 * If cantsend was set due to this call: 2793 * For T_COTSORD send an ordrel_ind 2794 * 2795 * Note that for T_CLTS there is no message sent here. 2796 */ 2797 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2798 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2799 /* 2800 * For SunOS 4.X compatibility we tell the other end 2801 * that we are unable to receive at this point. 2802 */ 2803 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2804 so_unix_close(so); 2805 2806 if (sti->sti_serv_type == T_COTS) 2807 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2808 } 2809 if ((state_change & SS_CANTSENDMORE) && 2810 (sti->sti_serv_type == T_COTS_ORD)) { 2811 /* Send an orderly release */ 2812 ordrel_req.PRIM_type = T_ORDREL_REQ; 2813 2814 mutex_exit(&so->so_lock); 2815 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2816 0, _ALLOC_SLEEP, cr); 2817 /* 2818 * Send down the T_ORDREL_REQ even if there is flow control. 2819 * This prevents shutdown from blocking. 2820 * Note that there is no T_OK_ACK for ordrel_req. 2821 */ 2822 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2823 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2824 mutex_enter(&so->so_lock); 2825 if (error) { 2826 eprintsoline(so, error); 2827 goto done; 2828 } 2829 } 2830 2831 done: 2832 so_unlock_single(so, SOLOCKED); 2833 mutex_exit(&so->so_lock); 2834 return (error); 2835 } 2836 2837 /* 2838 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2839 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2840 * that we have closed. 2841 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2842 * T_UNITDATA_REQ containing the same option. 2843 * 2844 * For SOCK_DGRAM half-connections (somebody connected to this end 2845 * but this end is not connect) we don't know where to send any 2846 * SO_UNIX_CLOSE. 2847 * 2848 * We have to ignore stream head errors just in case there has been 2849 * a shutdown(output). 2850 * Ignore any flow control to try to get the message more quickly to the peer. 2851 * While locally ignoring flow control solves the problem when there 2852 * is only the loopback transport on the stream it would not provide 2853 * the correct AF_UNIX socket semantics when one or more modules have 2854 * been pushed. 2855 */ 2856 void 2857 so_unix_close(struct sonode *so) 2858 { 2859 int error; 2860 struct T_opthdr toh; 2861 mblk_t *mp; 2862 sotpi_info_t *sti = SOTOTPI(so); 2863 2864 ASSERT(MUTEX_HELD(&so->so_lock)); 2865 2866 ASSERT(so->so_family == AF_UNIX); 2867 2868 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2869 (SS_ISCONNECTED|SS_ISBOUND)) 2870 return; 2871 2872 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2873 (void *)so, pr_state(so->so_state, so->so_mode))); 2874 2875 toh.level = SOL_SOCKET; 2876 toh.name = SO_UNIX_CLOSE; 2877 2878 /* zero length + header */ 2879 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2880 toh.status = 0; 2881 2882 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2883 struct T_optdata_req tdr; 2884 2885 tdr.PRIM_type = T_OPTDATA_REQ; 2886 tdr.DATA_flag = 0; 2887 2888 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2889 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2890 2891 /* NOTE: holding so_lock while sleeping */ 2892 mp = soallocproto2(&tdr, sizeof (tdr), 2893 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2894 } else { 2895 struct T_unitdata_req tudr; 2896 void *addr; 2897 socklen_t addrlen; 2898 void *src; 2899 socklen_t srclen; 2900 struct T_opthdr toh2; 2901 t_scalar_t size; 2902 2903 /* Connecteded DGRAM socket */ 2904 2905 /* 2906 * For AF_UNIX the destination address is translated to 2907 * an internal name and the source address is passed as 2908 * an option. 2909 */ 2910 /* 2911 * Length and family checks. 2912 */ 2913 error = so_addr_verify(so, sti->sti_faddr_sa, 2914 (t_uscalar_t)sti->sti_faddr_len); 2915 if (error) { 2916 eprintsoline(so, error); 2917 return; 2918 } 2919 if (sti->sti_faddr_noxlate) { 2920 /* 2921 * Already have a transport internal address. Do not 2922 * pass any (transport internal) source address. 2923 */ 2924 addr = sti->sti_faddr_sa; 2925 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2926 src = NULL; 2927 srclen = 0; 2928 } else { 2929 /* 2930 * Pass the sockaddr_un source address as an option 2931 * and translate the remote address. 2932 * Holding so_lock thus sti_laddr_sa can not change. 2933 */ 2934 src = sti->sti_laddr_sa; 2935 srclen = (socklen_t)sti->sti_laddr_len; 2936 dprintso(so, 1, 2937 ("so_ux_close: srclen %d, src %p\n", 2938 srclen, src)); 2939 error = so_ux_addr_xlate(so, 2940 sti->sti_faddr_sa, 2941 (socklen_t)sti->sti_faddr_len, 0, 2942 &addr, &addrlen); 2943 if (error) { 2944 eprintsoline(so, error); 2945 return; 2946 } 2947 } 2948 tudr.PRIM_type = T_UNITDATA_REQ; 2949 tudr.DEST_length = addrlen; 2950 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2951 if (srclen == 0) { 2952 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2953 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2954 _TPI_ALIGN_TOPT(addrlen)); 2955 2956 size = tudr.OPT_offset + tudr.OPT_length; 2957 /* NOTE: holding so_lock while sleeping */ 2958 mp = soallocproto2(&tudr, sizeof (tudr), 2959 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2960 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2961 soappendmsg(mp, &toh, sizeof (toh)); 2962 } else { 2963 /* 2964 * There is a AF_UNIX sockaddr_un to include as a 2965 * source address option. 2966 */ 2967 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2968 _TPI_ALIGN_TOPT(srclen)); 2969 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2970 _TPI_ALIGN_TOPT(addrlen)); 2971 2972 toh2.level = SOL_SOCKET; 2973 toh2.name = SO_SRCADDR; 2974 toh2.len = (t_uscalar_t)(srclen + 2975 sizeof (struct T_opthdr)); 2976 toh2.status = 0; 2977 2978 size = tudr.OPT_offset + tudr.OPT_length; 2979 2980 /* NOTE: holding so_lock while sleeping */ 2981 mp = soallocproto2(&tudr, sizeof (tudr), 2982 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2983 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2984 soappendmsg(mp, &toh, sizeof (toh)); 2985 soappendmsg(mp, &toh2, sizeof (toh2)); 2986 soappendmsg(mp, src, srclen); 2987 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2988 } 2989 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2990 } 2991 mutex_exit(&so->so_lock); 2992 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2993 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2994 mutex_enter(&so->so_lock); 2995 } 2996 2997 /* 2998 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2999 * In addition, the caller typically verifies that there is some 3000 * potential state to clear by checking 3001 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 3002 * before calling this routine. 3003 * Note that such a check can be made without holding so_lock since 3004 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 3005 * decrements sti_oobsigcnt. 3006 * 3007 * When data is read *after* the point that all pending 3008 * oob data has been consumed the oob indication is cleared. 3009 * 3010 * This logic keeps select/poll returning POLLRDBAND and 3011 * SIOCATMARK returning true until we have read past 3012 * the mark. 3013 */ 3014 static void 3015 sorecv_update_oobstate(struct sonode *so) 3016 { 3017 sotpi_info_t *sti = SOTOTPI(so); 3018 3019 mutex_enter(&so->so_lock); 3020 ASSERT(so_verify_oobstate(so)); 3021 dprintso(so, 1, 3022 ("sorecv_update_oobstate: counts %d/%d state %s\n", 3023 sti->sti_oobsigcnt, 3024 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 3025 if (sti->sti_oobsigcnt == 0) { 3026 /* No more pending oob indications */ 3027 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 3028 freemsg(so->so_oobmsg); 3029 so->so_oobmsg = NULL; 3030 } 3031 ASSERT(so_verify_oobstate(so)); 3032 mutex_exit(&so->so_lock); 3033 } 3034 3035 /* 3036 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 3037 */ 3038 static int 3039 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 3040 { 3041 sotpi_info_t *sti = SOTOTPI(so); 3042 int error = 0; 3043 mblk_t *tmp = NULL; 3044 mblk_t *pmp = NULL; 3045 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 3046 3047 ASSERT(nmp != NULL); 3048 3049 while (nmp != NULL && uiop->uio_resid > 0) { 3050 ssize_t n; 3051 3052 if (DB_TYPE(nmp) == M_DATA) { 3053 /* 3054 * We have some data, uiomove up to resid bytes. 3055 */ 3056 n = MIN(MBLKL(nmp), uiop->uio_resid); 3057 if (n > 0) 3058 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3059 nmp->b_rptr += n; 3060 if (nmp->b_rptr == nmp->b_wptr) { 3061 pmp = nmp; 3062 nmp = nmp->b_cont; 3063 } 3064 if (error) 3065 break; 3066 } else { 3067 /* 3068 * We only handle data, save for caller to handle. 3069 */ 3070 if (pmp != NULL) { 3071 pmp->b_cont = nmp->b_cont; 3072 } 3073 nmp->b_cont = NULL; 3074 if (*rmp == NULL) { 3075 *rmp = nmp; 3076 } else { 3077 tmp->b_cont = nmp; 3078 } 3079 nmp = nmp->b_cont; 3080 tmp = nmp; 3081 } 3082 } 3083 if (pmp != NULL) { 3084 /* Free any mblk_t(s) which we have consumed */ 3085 pmp->b_cont = NULL; 3086 freemsg(sti->sti_nl7c_rcv_mp); 3087 } 3088 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3089 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3090 if (error == 0) { 3091 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3092 3093 error = p->r_v.r_v2; 3094 p->r_v.r_v2 = 0; 3095 } 3096 rp->r_vals = sti->sti_nl7c_rcv_rval; 3097 sti->sti_nl7c_rcv_rval = 0; 3098 } else { 3099 /* More mblk_t(s) to process so no rval to return */ 3100 rp->r_vals = 0; 3101 } 3102 return (error); 3103 } 3104 /* 3105 * Receive the next message on the queue. 3106 * If msg_controllen is non-zero when called the caller is interested in 3107 * any received control info (options). 3108 * If msg_namelen is non-zero when called the caller is interested in 3109 * any received source address. 3110 * The routine returns with msg_control and msg_name pointing to 3111 * kmem_alloc'ed memory which the caller has to free. 3112 */ 3113 /* ARGSUSED */ 3114 int 3115 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3116 struct cred *cr) 3117 { 3118 union T_primitives *tpr; 3119 mblk_t *mp; 3120 uchar_t pri; 3121 int pflag, opflag; 3122 void *control; 3123 t_uscalar_t controllen; 3124 t_uscalar_t namelen; 3125 int so_state = so->so_state; /* Snapshot */ 3126 ssize_t saved_resid; 3127 rval_t rval; 3128 int flags; 3129 clock_t timout; 3130 int error = 0; 3131 sotpi_info_t *sti = SOTOTPI(so); 3132 3133 flags = msg->msg_flags; 3134 msg->msg_flags = 0; 3135 3136 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3137 (void *)so, (void *)msg, flags, 3138 pr_state(so->so_state, so->so_mode), so->so_error)); 3139 3140 if (so->so_version == SOV_STREAM) { 3141 so_update_attrs(so, SOACC); 3142 /* The imaginary "sockmod" has been popped - act as a stream */ 3143 return (strread(SOTOV(so), uiop, cr)); 3144 } 3145 3146 /* 3147 * If we are not connected because we have never been connected 3148 * we return ENOTCONN. If we have been connected (but are no longer 3149 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3150 * the EOF. 3151 * 3152 * An alternative would be to post an ENOTCONN error in stream head 3153 * (read+write) and clear it when we're connected. However, that error 3154 * would cause incorrect poll/select behavior! 3155 */ 3156 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3157 (so->so_mode & SM_CONNREQUIRED)) { 3158 return (ENOTCONN); 3159 } 3160 3161 /* 3162 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3163 * after checking that the read queue is empty) and returns zero. 3164 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3165 * is zero. 3166 */ 3167 3168 if (flags & MSG_OOB) { 3169 /* Check that the transport supports OOB */ 3170 if (!(so->so_mode & SM_EXDATA)) 3171 return (EOPNOTSUPP); 3172 so_update_attrs(so, SOACC); 3173 return (sorecvoob(so, msg, uiop, flags, 3174 (so->so_options & SO_OOBINLINE))); 3175 } 3176 3177 so_update_attrs(so, SOACC); 3178 3179 /* 3180 * Set msg_controllen and msg_namelen to zero here to make it 3181 * simpler in the cases that no control or name is returned. 3182 */ 3183 controllen = msg->msg_controllen; 3184 namelen = msg->msg_namelen; 3185 msg->msg_controllen = 0; 3186 msg->msg_namelen = 0; 3187 3188 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3189 namelen, controllen)); 3190 3191 mutex_enter(&so->so_lock); 3192 /* 3193 * If an NL7C enabled socket and not waiting for write data. 3194 */ 3195 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3196 NL7C_ENABLED) { 3197 if (sti->sti_nl7c_uri) { 3198 /* Close uri processing for a previous request */ 3199 nl7c_close(so); 3200 } 3201 if ((so_state & SS_CANTRCVMORE) && 3202 sti->sti_nl7c_rcv_mp == NULL) { 3203 /* Nothing to process, EOF */ 3204 mutex_exit(&so->so_lock); 3205 return (0); 3206 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3207 /* Persistent NL7C socket, try to process request */ 3208 boolean_t ret; 3209 3210 ret = nl7c_process(so, 3211 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3212 rval.r_vals = sti->sti_nl7c_rcv_rval; 3213 error = rval.r_v.r_v2; 3214 if (error) { 3215 /* Error of some sort, return it */ 3216 mutex_exit(&so->so_lock); 3217 return (error); 3218 } 3219 if (sti->sti_nl7c_flags && 3220 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3221 /* 3222 * Still an NL7C socket and no data 3223 * to pass up to the caller. 3224 */ 3225 mutex_exit(&so->so_lock); 3226 if (ret) { 3227 /* EOF */ 3228 return (0); 3229 } else { 3230 /* Need more data */ 3231 return (EAGAIN); 3232 } 3233 } 3234 } else { 3235 /* 3236 * Not persistent so no further NL7C processing. 3237 */ 3238 sti->sti_nl7c_flags = 0; 3239 } 3240 } 3241 /* 3242 * Only one reader is allowed at any given time. This is needed 3243 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3244 * 3245 * This is slightly different that BSD behavior in that it fails with 3246 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3247 * is single-threaded using sblock(), which is dropped while waiting 3248 * for data to appear. The difference shows up e.g. if one 3249 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3250 * does use nonblocking io and different threads are reading each 3251 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3252 * in this case as long as the read queue doesn't get empty. 3253 * In this implementation the thread using nonblocking io can 3254 * get an EWOULDBLOCK error due to the blocking thread executing 3255 * e.g. in the uiomove in kstrgetmsg. 3256 * This difference is not believed to be significant. 3257 */ 3258 /* Set SOREADLOCKED */ 3259 error = so_lock_read_intr(so, 3260 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3261 mutex_exit(&so->so_lock); 3262 if (error) 3263 return (error); 3264 3265 /* 3266 * Tell kstrgetmsg to not inspect the stream head errors until all 3267 * queued data has been consumed. 3268 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3269 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3270 * 3271 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3272 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3273 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3274 */ 3275 pflag = MSG_ANY | MSG_DELAYERROR; 3276 if (flags & MSG_PEEK) { 3277 pflag |= MSG_IPEEK; 3278 flags &= ~MSG_WAITALL; 3279 } 3280 if (so->so_mode & SM_ATOMIC) 3281 pflag |= MSG_DISCARDTAIL; 3282 3283 if (flags & MSG_DONTWAIT) 3284 timout = 0; 3285 else 3286 timout = -1; 3287 opflag = pflag; 3288 retry: 3289 saved_resid = uiop->uio_resid; 3290 pri = 0; 3291 mp = NULL; 3292 if (sti->sti_nl7c_rcv_mp != NULL) { 3293 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3294 error = nl7c_sorecv(so, &mp, uiop, &rval); 3295 } else { 3296 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3297 timout, &rval); 3298 } 3299 if (error != 0) { 3300 /* kstrgetmsg returns ETIME when timeout expires */ 3301 if (error == ETIME) 3302 error = EWOULDBLOCK; 3303 goto out; 3304 } 3305 /* 3306 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3307 * For non-datagrams MOREDATA is used to set MSG_EOR. 3308 */ 3309 ASSERT(!(rval.r_val1 & MORECTL)); 3310 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3311 msg->msg_flags |= MSG_TRUNC; 3312 3313 if (mp == NULL) { 3314 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3315 /* 3316 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3317 * The draft Posix socket spec states that the mark should 3318 * not be cleared when peeking. We follow the latter. 3319 */ 3320 if ((so->so_state & 3321 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3322 (uiop->uio_resid != saved_resid) && 3323 !(flags & MSG_PEEK)) { 3324 sorecv_update_oobstate(so); 3325 } 3326 3327 mutex_enter(&so->so_lock); 3328 /* Set MSG_EOR based on MOREDATA */ 3329 if (!(rval.r_val1 & MOREDATA)) { 3330 if (so->so_state & SS_SAVEDEOR) { 3331 msg->msg_flags |= MSG_EOR; 3332 so->so_state &= ~SS_SAVEDEOR; 3333 } 3334 } 3335 /* 3336 * If some data was received (i.e. not EOF) and the 3337 * read/recv* has not been satisfied wait for some more. 3338 */ 3339 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3340 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3341 mutex_exit(&so->so_lock); 3342 pflag = opflag | MSG_NOMARK; 3343 goto retry; 3344 } 3345 goto out_locked; 3346 } 3347 3348 /* strsock_proto has already verified length and alignment */ 3349 tpr = (union T_primitives *)mp->b_rptr; 3350 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3351 3352 switch (tpr->type) { 3353 case T_DATA_IND: { 3354 if ((so->so_state & 3355 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3356 (uiop->uio_resid != saved_resid) && 3357 !(flags & MSG_PEEK)) { 3358 sorecv_update_oobstate(so); 3359 } 3360 3361 /* 3362 * Set msg_flags to MSG_EOR based on 3363 * MORE_flag and MOREDATA. 3364 */ 3365 mutex_enter(&so->so_lock); 3366 so->so_state &= ~SS_SAVEDEOR; 3367 if (!(tpr->data_ind.MORE_flag & 1)) { 3368 if (!(rval.r_val1 & MOREDATA)) 3369 msg->msg_flags |= MSG_EOR; 3370 else 3371 so->so_state |= SS_SAVEDEOR; 3372 } 3373 freemsg(mp); 3374 /* 3375 * If some data was received (i.e. not EOF) and the 3376 * read/recv* has not been satisfied wait for some more. 3377 */ 3378 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3379 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3380 mutex_exit(&so->so_lock); 3381 pflag = opflag | MSG_NOMARK; 3382 goto retry; 3383 } 3384 goto out_locked; 3385 } 3386 case T_UNITDATA_IND: { 3387 void *addr; 3388 t_uscalar_t addrlen; 3389 void *abuf; 3390 t_uscalar_t optlen; 3391 void *opt; 3392 3393 if ((so->so_state & 3394 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3395 (uiop->uio_resid != saved_resid) && 3396 !(flags & MSG_PEEK)) { 3397 sorecv_update_oobstate(so); 3398 } 3399 3400 if (namelen != 0) { 3401 /* Caller wants source address */ 3402 addrlen = tpr->unitdata_ind.SRC_length; 3403 addr = sogetoff(mp, 3404 tpr->unitdata_ind.SRC_offset, 3405 addrlen, 1); 3406 if (addr == NULL) { 3407 freemsg(mp); 3408 error = EPROTO; 3409 eprintsoline(so, error); 3410 goto out; 3411 } 3412 if (so->so_family == AF_UNIX) { 3413 /* 3414 * Can not use the transport level address. 3415 * If there is a SO_SRCADDR option carrying 3416 * the socket level address it will be 3417 * extracted below. 3418 */ 3419 addr = NULL; 3420 addrlen = 0; 3421 } 3422 } 3423 optlen = tpr->unitdata_ind.OPT_length; 3424 if (optlen != 0) { 3425 t_uscalar_t ncontrollen; 3426 3427 /* 3428 * Extract any source address option. 3429 * Determine how large cmsg buffer is needed. 3430 */ 3431 opt = sogetoff(mp, 3432 tpr->unitdata_ind.OPT_offset, 3433 optlen, __TPI_ALIGN_SIZE); 3434 3435 if (opt == NULL) { 3436 freemsg(mp); 3437 error = EPROTO; 3438 eprintsoline(so, error); 3439 goto out; 3440 } 3441 if (so->so_family == AF_UNIX) 3442 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3443 ncontrollen = so_cmsglen(mp, opt, optlen, 3444 !(flags & MSG_XPG4_2)); 3445 if (controllen != 0) 3446 controllen = ncontrollen; 3447 else if (ncontrollen != 0) 3448 msg->msg_flags |= MSG_CTRUNC; 3449 } else { 3450 controllen = 0; 3451 } 3452 3453 if (namelen != 0) { 3454 /* 3455 * Return address to caller. 3456 * Caller handles truncation if length 3457 * exceeds msg_namelen. 3458 * NOTE: AF_UNIX NUL termination is ensured by 3459 * the sender's copyin_name(). 3460 */ 3461 abuf = kmem_alloc(addrlen, KM_SLEEP); 3462 3463 bcopy(addr, abuf, addrlen); 3464 msg->msg_name = abuf; 3465 msg->msg_namelen = addrlen; 3466 } 3467 3468 if (controllen != 0) { 3469 /* 3470 * Return control msg to caller. 3471 * Caller handles truncation if length 3472 * exceeds msg_controllen. 3473 */ 3474 control = kmem_zalloc(controllen, KM_SLEEP); 3475 3476 error = so_opt2cmsg(mp, opt, optlen, 3477 !(flags & MSG_XPG4_2), 3478 control, controllen); 3479 if (error) { 3480 freemsg(mp); 3481 if (msg->msg_namelen != 0) 3482 kmem_free(msg->msg_name, 3483 msg->msg_namelen); 3484 kmem_free(control, controllen); 3485 eprintsoline(so, error); 3486 goto out; 3487 } 3488 msg->msg_control = control; 3489 msg->msg_controllen = controllen; 3490 } 3491 3492 freemsg(mp); 3493 goto out; 3494 } 3495 case T_OPTDATA_IND: { 3496 struct T_optdata_req *tdr; 3497 void *opt; 3498 t_uscalar_t optlen; 3499 3500 if ((so->so_state & 3501 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3502 (uiop->uio_resid != saved_resid) && 3503 !(flags & MSG_PEEK)) { 3504 sorecv_update_oobstate(so); 3505 } 3506 3507 tdr = (struct T_optdata_req *)mp->b_rptr; 3508 optlen = tdr->OPT_length; 3509 if (optlen != 0) { 3510 t_uscalar_t ncontrollen; 3511 /* 3512 * Determine how large cmsg buffer is needed. 3513 */ 3514 opt = sogetoff(mp, 3515 tpr->optdata_ind.OPT_offset, 3516 optlen, __TPI_ALIGN_SIZE); 3517 3518 if (opt == NULL) { 3519 freemsg(mp); 3520 error = EPROTO; 3521 eprintsoline(so, error); 3522 goto out; 3523 } 3524 3525 ncontrollen = so_cmsglen(mp, opt, optlen, 3526 !(flags & MSG_XPG4_2)); 3527 if (controllen != 0) 3528 controllen = ncontrollen; 3529 else if (ncontrollen != 0) 3530 msg->msg_flags |= MSG_CTRUNC; 3531 } else { 3532 controllen = 0; 3533 } 3534 3535 if (controllen != 0) { 3536 /* 3537 * Return control msg to caller. 3538 * Caller handles truncation if length 3539 * exceeds msg_controllen. 3540 */ 3541 control = kmem_zalloc(controllen, KM_SLEEP); 3542 3543 error = so_opt2cmsg(mp, opt, optlen, 3544 !(flags & MSG_XPG4_2), 3545 control, controllen); 3546 if (error) { 3547 freemsg(mp); 3548 kmem_free(control, controllen); 3549 eprintsoline(so, error); 3550 goto out; 3551 } 3552 msg->msg_control = control; 3553 msg->msg_controllen = controllen; 3554 } 3555 3556 /* 3557 * Set msg_flags to MSG_EOR based on 3558 * DATA_flag and MOREDATA. 3559 */ 3560 mutex_enter(&so->so_lock); 3561 so->so_state &= ~SS_SAVEDEOR; 3562 if (!(tpr->data_ind.MORE_flag & 1)) { 3563 if (!(rval.r_val1 & MOREDATA)) 3564 msg->msg_flags |= MSG_EOR; 3565 else 3566 so->so_state |= SS_SAVEDEOR; 3567 } 3568 freemsg(mp); 3569 /* 3570 * If some data was received (i.e. not EOF) and the 3571 * read/recv* has not been satisfied wait for some more. 3572 * Not possible to wait if control info was received. 3573 */ 3574 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3575 controllen == 0 && 3576 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3577 mutex_exit(&so->so_lock); 3578 pflag = opflag | MSG_NOMARK; 3579 goto retry; 3580 } 3581 goto out_locked; 3582 } 3583 case T_EXDATA_IND: { 3584 dprintso(so, 1, 3585 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3586 "state %s\n", 3587 sti->sti_oobsigcnt, sti->sti_oobcnt, 3588 saved_resid - uiop->uio_resid, 3589 pr_state(so->so_state, so->so_mode))); 3590 /* 3591 * kstrgetmsg handles MSGMARK so there is nothing to 3592 * inspect in the T_EXDATA_IND. 3593 * strsock_proto makes the stream head queue the T_EXDATA_IND 3594 * as a separate message with no M_DATA component. Furthermore, 3595 * the stream head does not consolidate M_DATA messages onto 3596 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3597 * remains a message by itself. This is needed since MSGMARK 3598 * marks both the whole message as well as the last byte 3599 * of the message. 3600 */ 3601 freemsg(mp); 3602 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3603 if (flags & MSG_PEEK) { 3604 /* 3605 * Even though we are peeking we consume the 3606 * T_EXDATA_IND thereby moving the mark information 3607 * to SS_RCVATMARK. Then the oob code below will 3608 * retry the peeking kstrgetmsg. 3609 * Note that the stream head read queue is 3610 * never flushed without holding SOREADLOCKED 3611 * thus the T_EXDATA_IND can not disappear 3612 * underneath us. 3613 */ 3614 dprintso(so, 1, 3615 ("sotpi_recvmsg: consume EXDATA_IND " 3616 "counts %d/%d state %s\n", 3617 sti->sti_oobsigcnt, 3618 sti->sti_oobcnt, 3619 pr_state(so->so_state, so->so_mode))); 3620 3621 pflag = MSG_ANY | MSG_DELAYERROR; 3622 if (so->so_mode & SM_ATOMIC) 3623 pflag |= MSG_DISCARDTAIL; 3624 3625 pri = 0; 3626 mp = NULL; 3627 3628 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3629 &pri, &pflag, (clock_t)-1, &rval); 3630 ASSERT(uiop->uio_resid == saved_resid); 3631 3632 if (error) { 3633 #ifdef SOCK_DEBUG 3634 if (error != EWOULDBLOCK && error != EINTR) { 3635 eprintsoline(so, error); 3636 } 3637 #endif /* SOCK_DEBUG */ 3638 goto out; 3639 } 3640 ASSERT(mp); 3641 tpr = (union T_primitives *)mp->b_rptr; 3642 ASSERT(tpr->type == T_EXDATA_IND); 3643 freemsg(mp); 3644 } /* end "if (flags & MSG_PEEK)" */ 3645 3646 /* 3647 * Decrement the number of queued and pending oob. 3648 * 3649 * SS_RCVATMARK is cleared when we read past a mark. 3650 * SS_HAVEOOBDATA is cleared when we've read past the 3651 * last mark. 3652 * SS_OOBPEND is cleared if we've read past the last 3653 * mark and no (new) SIGURG has been posted. 3654 */ 3655 mutex_enter(&so->so_lock); 3656 ASSERT(so_verify_oobstate(so)); 3657 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3658 ASSERT(sti->sti_oobsigcnt > 0); 3659 sti->sti_oobsigcnt--; 3660 ASSERT(sti->sti_oobcnt > 0); 3661 sti->sti_oobcnt--; 3662 /* 3663 * Since the T_EXDATA_IND has been removed from the stream 3664 * head, but we have not read data past the mark, 3665 * sockfs needs to track that the socket is still at the mark. 3666 * 3667 * Since no data was received call kstrgetmsg again to wait 3668 * for data. 3669 */ 3670 so->so_state |= SS_RCVATMARK; 3671 mutex_exit(&so->so_lock); 3672 dprintso(so, 1, 3673 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3674 sti->sti_oobsigcnt, sti->sti_oobcnt, 3675 pr_state(so->so_state, so->so_mode))); 3676 pflag = opflag; 3677 goto retry; 3678 } 3679 default: 3680 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3681 (void *)so, tpr->type, (void *)mp); 3682 ASSERT(0); 3683 freemsg(mp); 3684 error = EPROTO; 3685 eprintsoline(so, error); 3686 goto out; 3687 } 3688 /* NOTREACHED */ 3689 out: 3690 mutex_enter(&so->so_lock); 3691 out_locked: 3692 so_unlock_read(so); /* Clear SOREADLOCKED */ 3693 mutex_exit(&so->so_lock); 3694 return (error); 3695 } 3696 3697 /* 3698 * Sending data with options on a datagram socket. 3699 * Assumes caller has verified that SS_ISBOUND etc. are set. 3700 */ 3701 static int 3702 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3703 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3704 { 3705 struct T_unitdata_req tudr; 3706 mblk_t *mp; 3707 int error; 3708 void *addr; 3709 socklen_t addrlen; 3710 void *src; 3711 socklen_t srclen; 3712 ssize_t len; 3713 int size; 3714 struct T_opthdr toh; 3715 struct fdbuf *fdbuf; 3716 t_uscalar_t optlen; 3717 void *fds; 3718 int fdlen; 3719 sotpi_info_t *sti = SOTOTPI(so); 3720 3721 ASSERT(name && namelen); 3722 ASSERT(control && controllen); 3723 3724 len = uiop->uio_resid; 3725 if (len > (ssize_t)sti->sti_tidu_size) { 3726 return (EMSGSIZE); 3727 } 3728 3729 /* 3730 * For AF_UNIX the destination address is translated to an internal 3731 * name and the source address is passed as an option. 3732 * Also, file descriptors are passed as file pointers in an 3733 * option. 3734 */ 3735 3736 /* 3737 * Length and family checks. 3738 */ 3739 error = so_addr_verify(so, name, namelen); 3740 if (error) { 3741 eprintsoline(so, error); 3742 return (error); 3743 } 3744 if (so->so_family == AF_UNIX) { 3745 if (sti->sti_faddr_noxlate) { 3746 /* 3747 * Already have a transport internal address. Do not 3748 * pass any (transport internal) source address. 3749 */ 3750 addr = name; 3751 addrlen = namelen; 3752 src = NULL; 3753 srclen = 0; 3754 } else { 3755 /* 3756 * Pass the sockaddr_un source address as an option 3757 * and translate the remote address. 3758 * 3759 * Note that this code does not prevent sti_laddr_sa 3760 * from changing while it is being used. Thus 3761 * if an unbind+bind occurs concurrently with this 3762 * send the peer might see a partially new and a 3763 * partially old "from" address. 3764 */ 3765 src = sti->sti_laddr_sa; 3766 srclen = (t_uscalar_t)sti->sti_laddr_len; 3767 dprintso(so, 1, 3768 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3769 srclen, src)); 3770 error = so_ux_addr_xlate(so, name, namelen, 3771 (flags & MSG_XPG4_2), 3772 &addr, &addrlen); 3773 if (error) { 3774 eprintsoline(so, error); 3775 return (error); 3776 } 3777 } 3778 } else { 3779 addr = name; 3780 addrlen = namelen; 3781 src = NULL; 3782 srclen = 0; 3783 } 3784 optlen = so_optlen(control, controllen, 3785 !(flags & MSG_XPG4_2)); 3786 tudr.PRIM_type = T_UNITDATA_REQ; 3787 tudr.DEST_length = addrlen; 3788 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3789 if (srclen != 0) 3790 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3791 _TPI_ALIGN_TOPT(srclen)); 3792 else 3793 tudr.OPT_length = optlen; 3794 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3795 _TPI_ALIGN_TOPT(addrlen)); 3796 3797 size = tudr.OPT_offset + tudr.OPT_length; 3798 3799 /* 3800 * File descriptors only when SM_FDPASSING set. 3801 */ 3802 error = so_getfdopt(control, controllen, 3803 !(flags & MSG_XPG4_2), &fds, &fdlen); 3804 if (error) 3805 return (error); 3806 if (fdlen != -1) { 3807 if (!(so->so_mode & SM_FDPASSING)) 3808 return (EOPNOTSUPP); 3809 3810 error = fdbuf_create(fds, fdlen, &fdbuf); 3811 if (error) 3812 return (error); 3813 mp = fdbuf_allocmsg(size, fdbuf); 3814 } else { 3815 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3816 if (mp == NULL) { 3817 /* 3818 * Caught a signal waiting for memory. 3819 * Let send* return EINTR. 3820 */ 3821 return (EINTR); 3822 } 3823 } 3824 soappendmsg(mp, &tudr, sizeof (tudr)); 3825 soappendmsg(mp, addr, addrlen); 3826 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3827 3828 if (fdlen != -1) { 3829 ASSERT(fdbuf != NULL); 3830 toh.level = SOL_SOCKET; 3831 toh.name = SO_FILEP; 3832 toh.len = fdbuf->fd_size + 3833 (t_uscalar_t)sizeof (struct T_opthdr); 3834 toh.status = 0; 3835 soappendmsg(mp, &toh, sizeof (toh)); 3836 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3837 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3838 } 3839 if (srclen != 0) { 3840 /* 3841 * There is a AF_UNIX sockaddr_un to include as a source 3842 * address option. 3843 */ 3844 toh.level = SOL_SOCKET; 3845 toh.name = SO_SRCADDR; 3846 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3847 toh.status = 0; 3848 soappendmsg(mp, &toh, sizeof (toh)); 3849 soappendmsg(mp, src, srclen); 3850 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3851 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3852 } 3853 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3854 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3855 /* At most 3 bytes left in the message */ 3856 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3857 ASSERT(MBLKL(mp) <= (ssize_t)size); 3858 3859 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3860 if (AU_AUDITING()) 3861 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3862 3863 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3864 #ifdef SOCK_DEBUG 3865 if (error) { 3866 eprintsoline(so, error); 3867 } 3868 #endif /* SOCK_DEBUG */ 3869 return (error); 3870 } 3871 3872 /* 3873 * Sending data with options on a connected stream socket. 3874 * Assumes caller has verified that SS_ISCONNECTED is set. 3875 */ 3876 static int 3877 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3878 t_uscalar_t controllen, int flags) 3879 { 3880 struct T_optdata_req tdr; 3881 mblk_t *mp; 3882 int error; 3883 ssize_t iosize; 3884 int size; 3885 struct fdbuf *fdbuf; 3886 t_uscalar_t optlen; 3887 void *fds; 3888 int fdlen; 3889 struct T_opthdr toh; 3890 sotpi_info_t *sti = SOTOTPI(so); 3891 3892 dprintso(so, 1, 3893 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3894 3895 /* 3896 * Has to be bound and connected. However, since no locks are 3897 * held the state could have changed after sotpi_sendmsg checked it 3898 * thus it is not possible to ASSERT on the state. 3899 */ 3900 3901 /* Options on connection-oriented only when SM_OPTDATA set. */ 3902 if (!(so->so_mode & SM_OPTDATA)) 3903 return (EOPNOTSUPP); 3904 3905 do { 3906 /* 3907 * Set the MORE flag if uio_resid does not fit in this 3908 * message or if the caller passed in "more". 3909 * Error for transports with zero tidu_size. 3910 */ 3911 tdr.PRIM_type = T_OPTDATA_REQ; 3912 iosize = sti->sti_tidu_size; 3913 if (iosize <= 0) 3914 return (EMSGSIZE); 3915 if (uiop->uio_resid > iosize) { 3916 tdr.DATA_flag = 1; 3917 } else { 3918 if (more) 3919 tdr.DATA_flag = 1; 3920 else 3921 tdr.DATA_flag = 0; 3922 iosize = uiop->uio_resid; 3923 } 3924 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3925 tdr.DATA_flag, iosize)); 3926 3927 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3928 tdr.OPT_length = optlen; 3929 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3930 3931 size = (int)sizeof (tdr) + optlen; 3932 /* 3933 * File descriptors only when SM_FDPASSING set. 3934 */ 3935 error = so_getfdopt(control, controllen, 3936 !(flags & MSG_XPG4_2), &fds, &fdlen); 3937 if (error) 3938 return (error); 3939 if (fdlen != -1) { 3940 if (!(so->so_mode & SM_FDPASSING)) 3941 return (EOPNOTSUPP); 3942 3943 error = fdbuf_create(fds, fdlen, &fdbuf); 3944 if (error) 3945 return (error); 3946 mp = fdbuf_allocmsg(size, fdbuf); 3947 } else { 3948 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3949 if (mp == NULL) { 3950 /* 3951 * Caught a signal waiting for memory. 3952 * Let send* return EINTR. 3953 */ 3954 return (EINTR); 3955 } 3956 } 3957 soappendmsg(mp, &tdr, sizeof (tdr)); 3958 3959 if (fdlen != -1) { 3960 ASSERT(fdbuf != NULL); 3961 toh.level = SOL_SOCKET; 3962 toh.name = SO_FILEP; 3963 toh.len = fdbuf->fd_size + 3964 (t_uscalar_t)sizeof (struct T_opthdr); 3965 toh.status = 0; 3966 soappendmsg(mp, &toh, sizeof (toh)); 3967 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3968 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3969 } 3970 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3971 /* At most 3 bytes left in the message */ 3972 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3973 ASSERT(MBLKL(mp) <= (ssize_t)size); 3974 3975 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3976 3977 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3978 0, MSG_BAND, 0); 3979 if (error) { 3980 eprintsoline(so, error); 3981 return (error); 3982 } 3983 control = NULL; 3984 if (uiop->uio_resid > 0) { 3985 /* 3986 * Recheck for fatal errors. Fail write even though 3987 * some data have been written. This is consistent 3988 * with strwrite semantics and BSD sockets semantics. 3989 */ 3990 if (so->so_state & SS_CANTSENDMORE) { 3991 eprintsoline(so, error); 3992 return (EPIPE); 3993 } 3994 if (so->so_error != 0) { 3995 mutex_enter(&so->so_lock); 3996 error = sogeterr(so, B_TRUE); 3997 mutex_exit(&so->so_lock); 3998 if (error != 0) { 3999 eprintsoline(so, error); 4000 return (error); 4001 } 4002 } 4003 } 4004 } while (uiop->uio_resid > 0); 4005 return (0); 4006 } 4007 4008 /* 4009 * Sending data on a datagram socket. 4010 * Assumes caller has verified that SS_ISBOUND etc. are set. 4011 * 4012 * For AF_UNIX the destination address is translated to an internal 4013 * name and the source address is passed as an option. 4014 */ 4015 int 4016 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4017 struct uio *uiop, int flags) 4018 { 4019 struct T_unitdata_req tudr; 4020 mblk_t *mp; 4021 int error; 4022 void *addr; 4023 socklen_t addrlen; 4024 void *src; 4025 socklen_t srclen; 4026 ssize_t len; 4027 sotpi_info_t *sti = SOTOTPI(so); 4028 4029 ASSERT(name != NULL && namelen != 0); 4030 4031 len = uiop->uio_resid; 4032 if (len > sti->sti_tidu_size) { 4033 error = EMSGSIZE; 4034 goto done; 4035 } 4036 4037 /* Length and family checks */ 4038 error = so_addr_verify(so, name, namelen); 4039 if (error != 0) 4040 goto done; 4041 4042 if (sti->sti_direct) 4043 return (sodgram_direct(so, name, namelen, uiop, flags)); 4044 4045 if (so->so_family == AF_UNIX) { 4046 if (sti->sti_faddr_noxlate) { 4047 /* 4048 * Already have a transport internal address. Do not 4049 * pass any (transport internal) source address. 4050 */ 4051 addr = name; 4052 addrlen = namelen; 4053 src = NULL; 4054 srclen = 0; 4055 } else { 4056 /* 4057 * Pass the sockaddr_un source address as an option 4058 * and translate the remote address. 4059 * 4060 * Note that this code does not prevent sti_laddr_sa 4061 * from changing while it is being used. Thus 4062 * if an unbind+bind occurs concurrently with this 4063 * send the peer might see a partially new and a 4064 * partially old "from" address. 4065 */ 4066 src = sti->sti_laddr_sa; 4067 srclen = (socklen_t)sti->sti_laddr_len; 4068 dprintso(so, 1, 4069 ("sosend_dgram UNIX: srclen %d, src %p\n", 4070 srclen, src)); 4071 error = so_ux_addr_xlate(so, name, namelen, 4072 (flags & MSG_XPG4_2), 4073 &addr, &addrlen); 4074 if (error) { 4075 eprintsoline(so, error); 4076 goto done; 4077 } 4078 } 4079 } else { 4080 addr = name; 4081 addrlen = namelen; 4082 src = NULL; 4083 srclen = 0; 4084 } 4085 tudr.PRIM_type = T_UNITDATA_REQ; 4086 tudr.DEST_length = addrlen; 4087 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4088 if (srclen == 0) { 4089 tudr.OPT_length = 0; 4090 tudr.OPT_offset = 0; 4091 4092 mp = soallocproto2(&tudr, sizeof (tudr), 4093 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4094 if (mp == NULL) { 4095 /* 4096 * Caught a signal waiting for memory. 4097 * Let send* return EINTR. 4098 */ 4099 error = EINTR; 4100 goto done; 4101 } 4102 } else { 4103 /* 4104 * There is a AF_UNIX sockaddr_un to include as a source 4105 * address option. 4106 */ 4107 struct T_opthdr toh; 4108 ssize_t size; 4109 4110 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4111 _TPI_ALIGN_TOPT(srclen)); 4112 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4113 _TPI_ALIGN_TOPT(addrlen)); 4114 4115 toh.level = SOL_SOCKET; 4116 toh.name = SO_SRCADDR; 4117 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4118 toh.status = 0; 4119 4120 size = tudr.OPT_offset + tudr.OPT_length; 4121 mp = soallocproto2(&tudr, sizeof (tudr), 4122 addr, addrlen, size, _ALLOC_INTR, CRED()); 4123 if (mp == NULL) { 4124 /* 4125 * Caught a signal waiting for memory. 4126 * Let send* return EINTR. 4127 */ 4128 error = EINTR; 4129 goto done; 4130 } 4131 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4132 soappendmsg(mp, &toh, sizeof (toh)); 4133 soappendmsg(mp, src, srclen); 4134 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4135 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4136 } 4137 4138 if (AU_AUDITING()) 4139 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4140 4141 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4142 done: 4143 #ifdef SOCK_DEBUG 4144 if (error) { 4145 eprintsoline(so, error); 4146 } 4147 #endif /* SOCK_DEBUG */ 4148 return (error); 4149 } 4150 4151 /* 4152 * Sending data on a connected stream socket. 4153 * Assumes caller has verified that SS_ISCONNECTED is set. 4154 */ 4155 int 4156 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4157 int sflag) 4158 { 4159 struct T_data_req tdr; 4160 mblk_t *mp; 4161 int error; 4162 ssize_t iosize; 4163 sotpi_info_t *sti = SOTOTPI(so); 4164 4165 dprintso(so, 1, 4166 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4167 (void *)so, uiop->uio_resid, prim, sflag)); 4168 4169 /* 4170 * Has to be bound and connected. However, since no locks are 4171 * held the state could have changed after sotpi_sendmsg checked it 4172 * thus it is not possible to ASSERT on the state. 4173 */ 4174 4175 do { 4176 /* 4177 * Set the MORE flag if uio_resid does not fit in this 4178 * message or if the caller passed in "more". 4179 * Error for transports with zero tidu_size. 4180 */ 4181 tdr.PRIM_type = prim; 4182 iosize = sti->sti_tidu_size; 4183 if (iosize <= 0) 4184 return (EMSGSIZE); 4185 if (uiop->uio_resid > iosize) { 4186 tdr.MORE_flag = 1; 4187 } else { 4188 if (more) 4189 tdr.MORE_flag = 1; 4190 else 4191 tdr.MORE_flag = 0; 4192 iosize = uiop->uio_resid; 4193 } 4194 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4195 prim, tdr.MORE_flag, iosize)); 4196 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4197 if (mp == NULL) { 4198 /* 4199 * Caught a signal waiting for memory. 4200 * Let send* return EINTR. 4201 */ 4202 return (EINTR); 4203 } 4204 4205 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4206 0, sflag | MSG_BAND, 0); 4207 if (error) { 4208 eprintsoline(so, error); 4209 return (error); 4210 } 4211 if (uiop->uio_resid > 0) { 4212 /* 4213 * Recheck for fatal errors. Fail write even though 4214 * some data have been written. This is consistent 4215 * with strwrite semantics and BSD sockets semantics. 4216 */ 4217 if (so->so_state & SS_CANTSENDMORE) { 4218 eprintsoline(so, error); 4219 return (EPIPE); 4220 } 4221 if (so->so_error != 0) { 4222 mutex_enter(&so->so_lock); 4223 error = sogeterr(so, B_TRUE); 4224 mutex_exit(&so->so_lock); 4225 if (error != 0) { 4226 eprintsoline(so, error); 4227 return (error); 4228 } 4229 } 4230 } 4231 } while (uiop->uio_resid > 0); 4232 return (0); 4233 } 4234 4235 /* 4236 * Check the state for errors and call the appropriate send function. 4237 * 4238 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4239 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4240 * after sending the message. 4241 */ 4242 static int 4243 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4244 struct cred *cr) 4245 { 4246 int so_state; 4247 int so_mode; 4248 int error; 4249 struct sockaddr *name; 4250 t_uscalar_t namelen; 4251 int dontroute; 4252 int flags; 4253 sotpi_info_t *sti = SOTOTPI(so); 4254 4255 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4256 (void *)so, (void *)msg, msg->msg_flags, 4257 pr_state(so->so_state, so->so_mode), so->so_error)); 4258 4259 if (so->so_version == SOV_STREAM) { 4260 /* The imaginary "sockmod" has been popped - act as a stream */ 4261 so_update_attrs(so, SOMOD); 4262 return (strwrite(SOTOV(so), uiop, cr)); 4263 } 4264 4265 mutex_enter(&so->so_lock); 4266 so_state = so->so_state; 4267 4268 if (so_state & SS_CANTSENDMORE) { 4269 mutex_exit(&so->so_lock); 4270 return (EPIPE); 4271 } 4272 4273 if (so->so_error != 0) { 4274 error = sogeterr(so, B_TRUE); 4275 if (error != 0) { 4276 mutex_exit(&so->so_lock); 4277 return (error); 4278 } 4279 } 4280 4281 name = (struct sockaddr *)msg->msg_name; 4282 namelen = msg->msg_namelen; 4283 4284 so_mode = so->so_mode; 4285 4286 if (name == NULL) { 4287 if (!(so_state & SS_ISCONNECTED)) { 4288 mutex_exit(&so->so_lock); 4289 if (so_mode & SM_CONNREQUIRED) 4290 return (ENOTCONN); 4291 else 4292 return (EDESTADDRREQ); 4293 } 4294 if (so_mode & SM_CONNREQUIRED) { 4295 name = NULL; 4296 namelen = 0; 4297 } else { 4298 /* 4299 * Note that this code does not prevent sti_faddr_sa 4300 * from changing while it is being used. Thus 4301 * if an "unconnect"+connect occurs concurrently with 4302 * this send the datagram might be delivered to a 4303 * garbaled address. 4304 */ 4305 ASSERT(sti->sti_faddr_sa); 4306 name = sti->sti_faddr_sa; 4307 namelen = (t_uscalar_t)sti->sti_faddr_len; 4308 } 4309 } else { 4310 if (!(so_state & SS_ISCONNECTED) && 4311 (so_mode & SM_CONNREQUIRED)) { 4312 /* Required but not connected */ 4313 mutex_exit(&so->so_lock); 4314 return (ENOTCONN); 4315 } 4316 /* 4317 * Ignore the address on connection-oriented sockets. 4318 * Just like BSD this code does not generate an error for 4319 * TCP (a CONNREQUIRED socket) when sending to an address 4320 * passed in with sendto/sendmsg. Instead the data is 4321 * delivered on the connection as if no address had been 4322 * supplied. 4323 */ 4324 if ((so_state & SS_ISCONNECTED) && 4325 !(so_mode & SM_CONNREQUIRED)) { 4326 mutex_exit(&so->so_lock); 4327 return (EISCONN); 4328 } 4329 if (!(so_state & SS_ISBOUND)) { 4330 so_lock_single(so); /* Set SOLOCKED */ 4331 error = sotpi_bind(so, NULL, 0, 4332 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4333 so_unlock_single(so, SOLOCKED); 4334 if (error) { 4335 mutex_exit(&so->so_lock); 4336 eprintsoline(so, error); 4337 return (error); 4338 } 4339 } 4340 /* 4341 * Handle delayed datagram errors. These are only queued 4342 * when the application sets SO_DGRAM_ERRIND. 4343 * Return the error if we are sending to the address 4344 * that was returned in the last T_UDERROR_IND. 4345 * If sending to some other address discard the delayed 4346 * error indication. 4347 */ 4348 if (sti->sti_delayed_error) { 4349 struct T_uderror_ind *tudi; 4350 void *addr; 4351 t_uscalar_t addrlen; 4352 boolean_t match = B_FALSE; 4353 4354 ASSERT(sti->sti_eaddr_mp); 4355 error = sti->sti_delayed_error; 4356 sti->sti_delayed_error = 0; 4357 tudi = 4358 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4359 addrlen = tudi->DEST_length; 4360 addr = sogetoff(sti->sti_eaddr_mp, 4361 tudi->DEST_offset, addrlen, 1); 4362 ASSERT(addr); /* Checked by strsock_proto */ 4363 switch (so->so_family) { 4364 case AF_INET: { 4365 /* Compare just IP address and port */ 4366 sin_t *sin1 = (sin_t *)name; 4367 sin_t *sin2 = (sin_t *)addr; 4368 4369 if (addrlen == sizeof (sin_t) && 4370 namelen == addrlen && 4371 sin1->sin_port == sin2->sin_port && 4372 sin1->sin_addr.s_addr == 4373 sin2->sin_addr.s_addr) 4374 match = B_TRUE; 4375 break; 4376 } 4377 case AF_INET6: { 4378 /* Compare just IP address and port. Not flow */ 4379 sin6_t *sin1 = (sin6_t *)name; 4380 sin6_t *sin2 = (sin6_t *)addr; 4381 4382 if (addrlen == sizeof (sin6_t) && 4383 namelen == addrlen && 4384 sin1->sin6_port == sin2->sin6_port && 4385 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4386 &sin2->sin6_addr)) 4387 match = B_TRUE; 4388 break; 4389 } 4390 case AF_UNIX: 4391 default: 4392 if (namelen == addrlen && 4393 bcmp(name, addr, namelen) == 0) 4394 match = B_TRUE; 4395 } 4396 if (match) { 4397 freemsg(sti->sti_eaddr_mp); 4398 sti->sti_eaddr_mp = NULL; 4399 mutex_exit(&so->so_lock); 4400 #ifdef DEBUG 4401 dprintso(so, 0, 4402 ("sockfs delayed error %d for %s\n", 4403 error, 4404 pr_addr(so->so_family, name, namelen))); 4405 #endif /* DEBUG */ 4406 return (error); 4407 } 4408 freemsg(sti->sti_eaddr_mp); 4409 sti->sti_eaddr_mp = NULL; 4410 } 4411 } 4412 mutex_exit(&so->so_lock); 4413 4414 flags = msg->msg_flags; 4415 dontroute = 0; 4416 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4417 uint32_t val; 4418 4419 val = 1; 4420 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4421 &val, (t_uscalar_t)sizeof (val), cr); 4422 if (error) 4423 return (error); 4424 dontroute = 1; 4425 } 4426 4427 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4428 error = EOPNOTSUPP; 4429 goto done; 4430 } 4431 if (msg->msg_controllen != 0) { 4432 if (!(so_mode & SM_CONNREQUIRED)) { 4433 so_update_attrs(so, SOMOD); 4434 error = sosend_dgramcmsg(so, name, namelen, uiop, 4435 msg->msg_control, msg->msg_controllen, flags); 4436 } else { 4437 if (flags & MSG_OOB) { 4438 /* Can't generate T_EXDATA_REQ with options */ 4439 error = EOPNOTSUPP; 4440 goto done; 4441 } 4442 so_update_attrs(so, SOMOD); 4443 error = sosend_svccmsg(so, uiop, 4444 !(flags & MSG_EOR), 4445 msg->msg_control, msg->msg_controllen, 4446 flags); 4447 } 4448 goto done; 4449 } 4450 4451 so_update_attrs(so, SOMOD); 4452 if (!(so_mode & SM_CONNREQUIRED)) { 4453 /* 4454 * If there is no SO_DONTROUTE to turn off return immediately 4455 * from send_dgram. This can allow tail-call optimizations. 4456 */ 4457 if (!dontroute) { 4458 return (sosend_dgram(so, name, namelen, uiop, flags)); 4459 } 4460 error = sosend_dgram(so, name, namelen, uiop, flags); 4461 } else { 4462 t_scalar_t prim; 4463 int sflag; 4464 4465 /* Ignore msg_name in the connected state */ 4466 if (flags & MSG_OOB) { 4467 prim = T_EXDATA_REQ; 4468 /* 4469 * Send down T_EXDATA_REQ even if there is flow 4470 * control for data. 4471 */ 4472 sflag = MSG_IGNFLOW; 4473 } else { 4474 if (so_mode & SM_BYTESTREAM) { 4475 /* Byte stream transport - use write */ 4476 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4477 4478 /* Send M_DATA messages */ 4479 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4480 (error = nl7c_data(so, uiop)) >= 0) { 4481 /* NL7C consumed the data */ 4482 return (error); 4483 } 4484 /* 4485 * If there is no SO_DONTROUTE to turn off, 4486 * sti_direct is on, and there is no flow 4487 * control, we can take the fast path. 4488 */ 4489 if (!dontroute && sti->sti_direct != 0 && 4490 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4491 return (sostream_direct(so, uiop, 4492 NULL, cr)); 4493 } 4494 error = strwrite(SOTOV(so), uiop, cr); 4495 goto done; 4496 } 4497 prim = T_DATA_REQ; 4498 sflag = 0; 4499 } 4500 /* 4501 * If there is no SO_DONTROUTE to turn off return immediately 4502 * from sosend_svc. This can allow tail-call optimizations. 4503 */ 4504 if (!dontroute) 4505 return (sosend_svc(so, uiop, prim, 4506 !(flags & MSG_EOR), sflag)); 4507 error = sosend_svc(so, uiop, prim, 4508 !(flags & MSG_EOR), sflag); 4509 } 4510 ASSERT(dontroute); 4511 done: 4512 if (dontroute) { 4513 uint32_t val; 4514 4515 val = 0; 4516 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4517 &val, (t_uscalar_t)sizeof (val), cr); 4518 } 4519 return (error); 4520 } 4521 4522 /* 4523 * kstrwritemp() has very similar semantics as that of strwrite(). 4524 * The main difference is it obtains mblks from the caller and also 4525 * does not do any copy as done in strwrite() from user buffers to 4526 * kernel buffers. 4527 * 4528 * Currently, this routine is used by sendfile to send data allocated 4529 * within the kernel without any copying. This interface does not use the 4530 * synchronous stream interface as synch. stream interface implies 4531 * copying. 4532 */ 4533 int 4534 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4535 { 4536 struct stdata *stp; 4537 struct queue *wqp; 4538 mblk_t *newmp; 4539 char waitflag; 4540 int tempmode; 4541 int error = 0; 4542 int done = 0; 4543 struct sonode *so; 4544 boolean_t direct; 4545 4546 ASSERT(vp->v_stream); 4547 stp = vp->v_stream; 4548 4549 so = VTOSO(vp); 4550 direct = _SOTOTPI(so)->sti_direct; 4551 4552 /* 4553 * This is the sockfs direct fast path. canputnext() need 4554 * not be accurate so we don't grab the sd_lock here. If 4555 * we get flow-controlled, we grab sd_lock just before the 4556 * do..while loop below to emulate what strwrite() does. 4557 */ 4558 wqp = stp->sd_wrq; 4559 if (canputnext(wqp) && direct && 4560 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4561 return (sostream_direct(so, NULL, mp, CRED())); 4562 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4563 /* Fast check of flags before acquiring the lock */ 4564 mutex_enter(&stp->sd_lock); 4565 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4566 mutex_exit(&stp->sd_lock); 4567 if (error != 0) { 4568 if (!(stp->sd_flag & STPLEX) && 4569 (stp->sd_wput_opt & SW_SIGPIPE)) { 4570 error = EPIPE; 4571 } 4572 return (error); 4573 } 4574 } 4575 4576 waitflag = WRITEWAIT; 4577 if (stp->sd_flag & OLDNDELAY) 4578 tempmode = fmode & ~FNDELAY; 4579 else 4580 tempmode = fmode; 4581 4582 mutex_enter(&stp->sd_lock); 4583 do { 4584 if (canputnext(wqp)) { 4585 mutex_exit(&stp->sd_lock); 4586 if (stp->sd_wputdatafunc != NULL) { 4587 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4588 NULL, NULL, NULL); 4589 if (newmp == NULL) { 4590 /* The caller will free mp */ 4591 return (ECOMM); 4592 } 4593 mp = newmp; 4594 } 4595 putnext(wqp, mp); 4596 return (0); 4597 } 4598 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4599 &done); 4600 } while (error == 0 && !done); 4601 4602 mutex_exit(&stp->sd_lock); 4603 /* 4604 * EAGAIN tells the application to try again. ENOMEM 4605 * is returned only if the memory allocation size 4606 * exceeds the physical limits of the system. ENOMEM 4607 * can't be true here. 4608 */ 4609 if (error == ENOMEM) 4610 error = EAGAIN; 4611 return (error); 4612 } 4613 4614 /* ARGSUSED */ 4615 static int 4616 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4617 struct cred *cr, mblk_t **mpp) 4618 { 4619 int error; 4620 4621 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4622 return (EAFNOSUPPORT); 4623 4624 if (so->so_state & SS_CANTSENDMORE) 4625 return (EPIPE); 4626 4627 if (so->so_type != SOCK_STREAM) 4628 return (EOPNOTSUPP); 4629 4630 if ((so->so_state & SS_ISCONNECTED) == 0) 4631 return (ENOTCONN); 4632 4633 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4634 if (error == 0) 4635 *mpp = NULL; 4636 return (error); 4637 } 4638 4639 /* 4640 * Sending data on a datagram socket. 4641 * Assumes caller has verified that SS_ISBOUND etc. are set. 4642 */ 4643 /* ARGSUSED */ 4644 static int 4645 sodgram_direct(struct sonode *so, struct sockaddr *name, 4646 socklen_t namelen, struct uio *uiop, int flags) 4647 { 4648 struct T_unitdata_req tudr; 4649 mblk_t *mp = NULL; 4650 int error = 0; 4651 void *addr; 4652 socklen_t addrlen; 4653 ssize_t len; 4654 struct stdata *stp = SOTOV(so)->v_stream; 4655 int so_state; 4656 queue_t *udp_wq; 4657 boolean_t connected; 4658 mblk_t *mpdata = NULL; 4659 sotpi_info_t *sti = SOTOTPI(so); 4660 uint32_t auditing = AU_AUDITING(); 4661 4662 ASSERT(name != NULL && namelen != 0); 4663 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4664 ASSERT(!(so->so_mode & SM_EXDATA)); 4665 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4666 ASSERT(SOTOV(so)->v_type == VSOCK); 4667 4668 /* Caller checked for proper length */ 4669 len = uiop->uio_resid; 4670 ASSERT(len <= sti->sti_tidu_size); 4671 4672 /* Length and family checks have been done by caller */ 4673 ASSERT(name->sa_family == so->so_family); 4674 ASSERT(so->so_family == AF_INET || 4675 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4676 ASSERT(so->so_family == AF_INET6 || 4677 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4678 4679 addr = name; 4680 addrlen = namelen; 4681 4682 if (stp->sd_sidp != NULL && 4683 (error = straccess(stp, JCWRITE)) != 0) 4684 goto done; 4685 4686 so_state = so->so_state; 4687 4688 connected = so_state & SS_ISCONNECTED; 4689 if (!connected) { 4690 tudr.PRIM_type = T_UNITDATA_REQ; 4691 tudr.DEST_length = addrlen; 4692 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4693 tudr.OPT_length = 0; 4694 tudr.OPT_offset = 0; 4695 4696 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4697 _ALLOC_INTR, CRED()); 4698 if (mp == NULL) { 4699 /* 4700 * Caught a signal waiting for memory. 4701 * Let send* return EINTR. 4702 */ 4703 error = EINTR; 4704 goto done; 4705 } 4706 } 4707 4708 /* 4709 * For UDP we don't break up the copyin into smaller pieces 4710 * as in the TCP case. That means if ENOMEM is returned by 4711 * mcopyinuio() then the uio vector has not been modified at 4712 * all and we fallback to either strwrite() or kstrputmsg() 4713 * below. Note also that we never generate priority messages 4714 * from here. 4715 */ 4716 udp_wq = stp->sd_wrq->q_next; 4717 if (canput(udp_wq) && 4718 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4719 ASSERT(DB_TYPE(mpdata) == M_DATA); 4720 ASSERT(uiop->uio_resid == 0); 4721 if (!connected) 4722 linkb(mp, mpdata); 4723 else 4724 mp = mpdata; 4725 if (auditing) 4726 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4727 4728 udp_wput(udp_wq, mp); 4729 return (0); 4730 } 4731 4732 ASSERT(mpdata == NULL); 4733 if (error != 0 && error != ENOMEM) { 4734 freemsg(mp); 4735 return (error); 4736 } 4737 4738 /* 4739 * For connected, let strwrite() handle the blocking case. 4740 * Otherwise we fall thru and use kstrputmsg(). 4741 */ 4742 if (connected) 4743 return (strwrite(SOTOV(so), uiop, CRED())); 4744 4745 if (auditing) 4746 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4747 4748 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4749 done: 4750 #ifdef SOCK_DEBUG 4751 if (error != 0) { 4752 eprintsoline(so, error); 4753 } 4754 #endif /* SOCK_DEBUG */ 4755 return (error); 4756 } 4757 4758 int 4759 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4760 { 4761 struct stdata *stp = SOTOV(so)->v_stream; 4762 ssize_t iosize, rmax, maxblk; 4763 queue_t *tcp_wq = stp->sd_wrq->q_next; 4764 mblk_t *newmp; 4765 int error = 0, wflag = 0; 4766 4767 ASSERT(so->so_mode & SM_BYTESTREAM); 4768 ASSERT(SOTOV(so)->v_type == VSOCK); 4769 4770 if (stp->sd_sidp != NULL && 4771 (error = straccess(stp, JCWRITE)) != 0) 4772 return (error); 4773 4774 if (uiop == NULL) { 4775 /* 4776 * kstrwritemp() should have checked sd_flag and 4777 * flow-control before coming here. If we end up 4778 * here it means that we can simply pass down the 4779 * data to tcp. 4780 */ 4781 ASSERT(mp != NULL); 4782 if (stp->sd_wputdatafunc != NULL) { 4783 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4784 NULL, NULL, NULL); 4785 if (newmp == NULL) { 4786 /* The caller will free mp */ 4787 return (ECOMM); 4788 } 4789 mp = newmp; 4790 } 4791 tcp_wput(tcp_wq, mp); 4792 return (0); 4793 } 4794 4795 /* Fallback to strwrite() to do proper error handling */ 4796 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4797 return (strwrite(SOTOV(so), uiop, cr)); 4798 4799 rmax = stp->sd_qn_maxpsz; 4800 ASSERT(rmax >= 0 || rmax == INFPSZ); 4801 if (rmax == 0 || uiop->uio_resid <= 0) 4802 return (0); 4803 4804 if (rmax == INFPSZ) 4805 rmax = uiop->uio_resid; 4806 4807 maxblk = stp->sd_maxblk; 4808 4809 for (;;) { 4810 iosize = MIN(uiop->uio_resid, rmax); 4811 4812 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4813 if (mp == NULL) { 4814 /* 4815 * Fallback to strwrite() for ENOMEM; if this 4816 * is our first time in this routine and the uio 4817 * vector has not been modified, we will end up 4818 * calling strwrite() without any flag set. 4819 */ 4820 if (error == ENOMEM) 4821 goto slow_send; 4822 else 4823 return (error); 4824 } 4825 ASSERT(uiop->uio_resid >= 0); 4826 /* 4827 * If mp is non-NULL and ENOMEM is set, it means that 4828 * mcopyinuio() was able to break down some of the user 4829 * data into one or more mblks. Send the partial data 4830 * to tcp and let the rest be handled in strwrite(). 4831 */ 4832 ASSERT(error == 0 || error == ENOMEM); 4833 if (stp->sd_wputdatafunc != NULL) { 4834 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4835 NULL, NULL, NULL); 4836 if (newmp == NULL) { 4837 /* The caller will free mp */ 4838 return (ECOMM); 4839 } 4840 mp = newmp; 4841 } 4842 tcp_wput(tcp_wq, mp); 4843 4844 wflag |= NOINTR; 4845 4846 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4847 ASSERT(error == 0); 4848 break; 4849 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4850 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4851 slow_send: 4852 /* 4853 * We were able to send down partial data using 4854 * the direct call interface, but are now relying 4855 * on strwrite() to handle the non-fastpath cases. 4856 * If the socket is blocking we will sleep in 4857 * strwaitq() until write is permitted, otherwise, 4858 * we will need to return the amount of bytes 4859 * written so far back to the app. This is the 4860 * reason why we pass NOINTR flag to strwrite() 4861 * for non-blocking socket, because we don't want 4862 * to return EAGAIN when portion of the user data 4863 * has actually been sent down. 4864 */ 4865 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4866 } 4867 } 4868 return (0); 4869 } 4870 4871 /* 4872 * Update sti_faddr by asking the transport (unless AF_UNIX). 4873 */ 4874 /* ARGSUSED */ 4875 int 4876 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4877 boolean_t accept, struct cred *cr) 4878 { 4879 struct strbuf strbuf; 4880 int error = 0, res; 4881 void *addr; 4882 t_uscalar_t addrlen; 4883 k_sigset_t smask; 4884 sotpi_info_t *sti = SOTOTPI(so); 4885 4886 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4887 (void *)so, pr_state(so->so_state, so->so_mode))); 4888 4889 ASSERT(*namelen > 0); 4890 mutex_enter(&so->so_lock); 4891 so_lock_single(so); /* Set SOLOCKED */ 4892 4893 if (accept) { 4894 bcopy(sti->sti_faddr_sa, name, 4895 MIN(*namelen, sti->sti_faddr_len)); 4896 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4897 goto done; 4898 } 4899 4900 if (!(so->so_state & SS_ISCONNECTED)) { 4901 error = ENOTCONN; 4902 goto done; 4903 } 4904 /* Added this check for X/Open */ 4905 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4906 error = EINVAL; 4907 if (xnet_check_print) { 4908 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4909 } 4910 goto done; 4911 } 4912 4913 if (sti->sti_faddr_valid) { 4914 bcopy(sti->sti_faddr_sa, name, 4915 MIN(*namelen, sti->sti_faddr_len)); 4916 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4917 goto done; 4918 } 4919 4920 #ifdef DEBUG 4921 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4922 pr_addr(so->so_family, sti->sti_faddr_sa, 4923 (t_uscalar_t)sti->sti_faddr_len))); 4924 #endif /* DEBUG */ 4925 4926 if (so->so_family == AF_UNIX) { 4927 /* Transport has different name space - return local info */ 4928 if (sti->sti_faddr_noxlate) 4929 *namelen = 0; 4930 error = 0; 4931 goto done; 4932 } 4933 4934 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4935 4936 ASSERT(sti->sti_faddr_sa); 4937 /* Allocate local buffer to use with ioctl */ 4938 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4939 mutex_exit(&so->so_lock); 4940 addr = kmem_alloc(addrlen, KM_SLEEP); 4941 4942 /* 4943 * Issue TI_GETPEERNAME with signals masked. 4944 * Put the result in sti_faddr_sa so that getpeername works after 4945 * a shutdown(output). 4946 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4947 * back to the socket. 4948 */ 4949 strbuf.buf = addr; 4950 strbuf.maxlen = addrlen; 4951 strbuf.len = 0; 4952 4953 sigintr(&smask, 0); 4954 res = 0; 4955 ASSERT(cr); 4956 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4957 0, K_TO_K, cr, &res); 4958 sigunintr(&smask); 4959 4960 mutex_enter(&so->so_lock); 4961 /* 4962 * If there is an error record the error in so_error put don't fail 4963 * the getpeername. Instead fallback on the recorded 4964 * sti->sti_faddr_sa. 4965 */ 4966 if (error) { 4967 /* 4968 * Various stream head errors can be returned to the ioctl. 4969 * However, it is impossible to determine which ones of 4970 * these are really socket level errors that were incorrectly 4971 * consumed by the ioctl. Thus this code silently ignores the 4972 * error - to code explicitly does not reinstate the error 4973 * using soseterror(). 4974 * Experiments have shows that at least this set of 4975 * errors are reported and should not be reinstated on the 4976 * socket: 4977 * EINVAL E.g. if an I_LINK was in effect when 4978 * getpeername was called. 4979 * EPIPE The ioctl error semantics prefer the write 4980 * side error over the read side error. 4981 * ENOTCONN The transport just got disconnected but 4982 * sockfs had not yet seen the T_DISCON_IND 4983 * when issuing the ioctl. 4984 */ 4985 error = 0; 4986 } else if (res == 0 && strbuf.len > 0 && 4987 (so->so_state & SS_ISCONNECTED)) { 4988 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4989 sti->sti_faddr_len = (socklen_t)strbuf.len; 4990 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4991 sti->sti_faddr_valid = 1; 4992 4993 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4994 *namelen = sti->sti_faddr_len; 4995 } 4996 kmem_free(addr, addrlen); 4997 #ifdef DEBUG 4998 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4999 pr_addr(so->so_family, sti->sti_faddr_sa, 5000 (t_uscalar_t)sti->sti_faddr_len))); 5001 #endif /* DEBUG */ 5002 done: 5003 so_unlock_single(so, SOLOCKED); 5004 mutex_exit(&so->so_lock); 5005 return (error); 5006 } 5007 5008 /* 5009 * Update sti_laddr by asking the transport (unless AF_UNIX). 5010 */ 5011 int 5012 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5013 struct cred *cr) 5014 { 5015 struct strbuf strbuf; 5016 int error = 0, res; 5017 void *addr; 5018 t_uscalar_t addrlen; 5019 k_sigset_t smask; 5020 sotpi_info_t *sti = SOTOTPI(so); 5021 5022 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5023 (void *)so, pr_state(so->so_state, so->so_mode))); 5024 5025 ASSERT(*namelen > 0); 5026 mutex_enter(&so->so_lock); 5027 so_lock_single(so); /* Set SOLOCKED */ 5028 5029 #ifdef DEBUG 5030 5031 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5032 pr_addr(so->so_family, sti->sti_laddr_sa, 5033 (t_uscalar_t)sti->sti_laddr_len))); 5034 #endif /* DEBUG */ 5035 if (sti->sti_laddr_valid) { 5036 bcopy(sti->sti_laddr_sa, name, 5037 MIN(*namelen, sti->sti_laddr_len)); 5038 *namelen = sti->sti_laddr_len; 5039 goto done; 5040 } 5041 5042 if (so->so_family == AF_UNIX) { 5043 /* Transport has different name space - return local info */ 5044 error = 0; 5045 *namelen = 0; 5046 goto done; 5047 } 5048 if (!(so->so_state & SS_ISBOUND)) { 5049 /* If not bound, then nothing to return. */ 5050 error = 0; 5051 goto done; 5052 } 5053 5054 /* Allocate local buffer to use with ioctl */ 5055 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5056 mutex_exit(&so->so_lock); 5057 addr = kmem_alloc(addrlen, KM_SLEEP); 5058 5059 /* 5060 * Issue TI_GETMYNAME with signals masked. 5061 * Put the result in sti_laddr_sa so that getsockname works after 5062 * a shutdown(output). 5063 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5064 * back to the socket. 5065 */ 5066 strbuf.buf = addr; 5067 strbuf.maxlen = addrlen; 5068 strbuf.len = 0; 5069 5070 sigintr(&smask, 0); 5071 res = 0; 5072 ASSERT(cr); 5073 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5074 0, K_TO_K, cr, &res); 5075 sigunintr(&smask); 5076 5077 mutex_enter(&so->so_lock); 5078 /* 5079 * If there is an error record the error in so_error put don't fail 5080 * the getsockname. Instead fallback on the recorded 5081 * sti->sti_laddr_sa. 5082 */ 5083 if (error) { 5084 /* 5085 * Various stream head errors can be returned to the ioctl. 5086 * However, it is impossible to determine which ones of 5087 * these are really socket level errors that were incorrectly 5088 * consumed by the ioctl. Thus this code silently ignores the 5089 * error - to code explicitly does not reinstate the error 5090 * using soseterror(). 5091 * Experiments have shows that at least this set of 5092 * errors are reported and should not be reinstated on the 5093 * socket: 5094 * EINVAL E.g. if an I_LINK was in effect when 5095 * getsockname was called. 5096 * EPIPE The ioctl error semantics prefer the write 5097 * side error over the read side error. 5098 */ 5099 error = 0; 5100 } else if (res == 0 && strbuf.len > 0 && 5101 (so->so_state & SS_ISBOUND)) { 5102 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5103 sti->sti_laddr_len = (socklen_t)strbuf.len; 5104 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5105 sti->sti_laddr_valid = 1; 5106 5107 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5108 *namelen = sti->sti_laddr_len; 5109 } 5110 kmem_free(addr, addrlen); 5111 #ifdef DEBUG 5112 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5113 pr_addr(so->so_family, sti->sti_laddr_sa, 5114 (t_uscalar_t)sti->sti_laddr_len))); 5115 #endif /* DEBUG */ 5116 done: 5117 so_unlock_single(so, SOLOCKED); 5118 mutex_exit(&so->so_lock); 5119 return (error); 5120 } 5121 5122 /* 5123 * Get socket options. For SOL_SOCKET options some options are handled 5124 * by the sockfs while others use the value recorded in the sonode as a 5125 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5126 * 5127 * On the return most *optlenp bytes are copied to optval. 5128 */ 5129 /* ARGSUSED */ 5130 int 5131 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5132 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5133 { 5134 struct T_optmgmt_req optmgmt_req; 5135 struct T_optmgmt_ack *optmgmt_ack; 5136 struct opthdr oh; 5137 struct opthdr *opt_res; 5138 mblk_t *mp = NULL; 5139 int error = 0; 5140 void *option = NULL; /* Set if fallback value */ 5141 t_uscalar_t maxlen = *optlenp; 5142 t_uscalar_t len; 5143 uint32_t value; 5144 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5145 struct timeval32 tmo_val32; 5146 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5147 5148 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5149 (void *)so, level, option_name, optval, (void *)optlenp, 5150 pr_state(so->so_state, so->so_mode))); 5151 5152 mutex_enter(&so->so_lock); 5153 so_lock_single(so); /* Set SOLOCKED */ 5154 5155 /* 5156 * Check for SOL_SOCKET options. 5157 * Certain SOL_SOCKET options are returned directly whereas 5158 * others only provide a default (fallback) value should 5159 * the T_SVR4_OPTMGMT_REQ fail. 5160 */ 5161 if (level == SOL_SOCKET) { 5162 /* Check parameters */ 5163 switch (option_name) { 5164 case SO_TYPE: 5165 case SO_ERROR: 5166 case SO_DEBUG: 5167 case SO_ACCEPTCONN: 5168 case SO_REUSEADDR: 5169 case SO_KEEPALIVE: 5170 case SO_DONTROUTE: 5171 case SO_BROADCAST: 5172 case SO_USELOOPBACK: 5173 case SO_OOBINLINE: 5174 case SO_SNDBUF: 5175 case SO_RCVBUF: 5176 #ifdef notyet 5177 case SO_SNDLOWAT: 5178 case SO_RCVLOWAT: 5179 #endif /* notyet */ 5180 case SO_DOMAIN: 5181 case SO_DGRAM_ERRIND: 5182 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5183 error = EINVAL; 5184 eprintsoline(so, error); 5185 goto done2; 5186 } 5187 break; 5188 case SO_RCVTIMEO: 5189 case SO_SNDTIMEO: 5190 if (get_udatamodel() == DATAMODEL_NONE || 5191 get_udatamodel() == DATAMODEL_NATIVE) { 5192 if (maxlen < sizeof (struct timeval)) { 5193 error = EINVAL; 5194 eprintsoline(so, error); 5195 goto done2; 5196 } 5197 } else { 5198 if (maxlen < sizeof (struct timeval32)) { 5199 error = EINVAL; 5200 eprintsoline(so, error); 5201 goto done2; 5202 } 5203 5204 } 5205 break; 5206 case SO_LINGER: 5207 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5208 error = EINVAL; 5209 eprintsoline(so, error); 5210 goto done2; 5211 } 5212 break; 5213 case SO_SND_BUFINFO: 5214 if (maxlen < (t_uscalar_t) 5215 sizeof (struct so_snd_bufinfo)) { 5216 error = EINVAL; 5217 eprintsoline(so, error); 5218 goto done2; 5219 } 5220 break; 5221 } 5222 5223 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5224 5225 switch (option_name) { 5226 case SO_TYPE: 5227 value = so->so_type; 5228 option = &value; 5229 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5230 5231 case SO_ERROR: 5232 value = sogeterr(so, B_TRUE); 5233 option = &value; 5234 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5235 5236 case SO_ACCEPTCONN: 5237 if (so->so_state & SS_ACCEPTCONN) 5238 value = SO_ACCEPTCONN; 5239 else 5240 value = 0; 5241 #ifdef DEBUG 5242 if (value) { 5243 dprintso(so, 1, 5244 ("sotpi_getsockopt: 0x%x is set\n", 5245 option_name)); 5246 } else { 5247 dprintso(so, 1, 5248 ("sotpi_getsockopt: 0x%x not set\n", 5249 option_name)); 5250 } 5251 #endif /* DEBUG */ 5252 option = &value; 5253 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5254 5255 case SO_DEBUG: 5256 case SO_REUSEADDR: 5257 case SO_KEEPALIVE: 5258 case SO_DONTROUTE: 5259 case SO_BROADCAST: 5260 case SO_USELOOPBACK: 5261 case SO_OOBINLINE: 5262 case SO_DGRAM_ERRIND: 5263 value = (so->so_options & option_name); 5264 #ifdef DEBUG 5265 if (value) { 5266 dprintso(so, 1, 5267 ("sotpi_getsockopt: 0x%x is set\n", 5268 option_name)); 5269 } else { 5270 dprintso(so, 1, 5271 ("sotpi_getsockopt: 0x%x not set\n", 5272 option_name)); 5273 } 5274 #endif /* DEBUG */ 5275 option = &value; 5276 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5277 5278 /* 5279 * The following options are only returned by sockfs when the 5280 * T_SVR4_OPTMGMT_REQ fails. 5281 */ 5282 case SO_LINGER: 5283 option = &so->so_linger; 5284 len = (t_uscalar_t)sizeof (struct linger); 5285 break; 5286 case SO_SNDBUF: { 5287 ssize_t lvalue; 5288 5289 /* 5290 * If the option has not been set then get a default 5291 * value from the read queue. This value is 5292 * returned if the transport fails 5293 * the T_SVR4_OPTMGMT_REQ. 5294 */ 5295 lvalue = so->so_sndbuf; 5296 if (lvalue == 0) { 5297 mutex_exit(&so->so_lock); 5298 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5299 QHIWAT, 0, &lvalue); 5300 mutex_enter(&so->so_lock); 5301 dprintso(so, 1, 5302 ("got SO_SNDBUF %ld from q\n", lvalue)); 5303 } 5304 value = (int)lvalue; 5305 option = &value; 5306 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5307 break; 5308 } 5309 case SO_RCVBUF: { 5310 ssize_t lvalue; 5311 5312 /* 5313 * If the option has not been set then get a default 5314 * value from the read queue. This value is 5315 * returned if the transport fails 5316 * the T_SVR4_OPTMGMT_REQ. 5317 * 5318 * XXX If SO_RCVBUF has been set and this is an 5319 * XPG 4.2 application then do not ask the transport 5320 * since the transport might adjust the value and not 5321 * return exactly what was set by the application. 5322 * For non-XPG 4.2 application we return the value 5323 * that the transport is actually using. 5324 */ 5325 lvalue = so->so_rcvbuf; 5326 if (lvalue == 0) { 5327 mutex_exit(&so->so_lock); 5328 (void) strqget(RD(strvp2wq(SOTOV(so))), 5329 QHIWAT, 0, &lvalue); 5330 mutex_enter(&so->so_lock); 5331 dprintso(so, 1, 5332 ("got SO_RCVBUF %ld from q\n", lvalue)); 5333 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5334 value = (int)lvalue; 5335 option = &value; 5336 goto copyout; /* skip asking transport */ 5337 } 5338 value = (int)lvalue; 5339 option = &value; 5340 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5341 break; 5342 } 5343 case SO_DOMAIN: 5344 value = so->so_family; 5345 option = &value; 5346 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5347 5348 #ifdef notyet 5349 /* 5350 * We do not implement the semantics of these options 5351 * thus we shouldn't implement the options either. 5352 */ 5353 case SO_SNDLOWAT: 5354 value = so->so_sndlowat; 5355 option = &value; 5356 break; 5357 case SO_RCVLOWAT: 5358 value = so->so_rcvlowat; 5359 option = &value; 5360 break; 5361 #endif /* notyet */ 5362 case SO_SNDTIMEO: 5363 case SO_RCVTIMEO: { 5364 clock_t val; 5365 5366 if (option_name == SO_RCVTIMEO) 5367 val = drv_hztousec(so->so_rcvtimeo); 5368 else 5369 val = drv_hztousec(so->so_sndtimeo); 5370 tmo_val.tv_sec = val / (1000 * 1000); 5371 tmo_val.tv_usec = val % (1000 * 1000); 5372 if (get_udatamodel() == DATAMODEL_NONE || 5373 get_udatamodel() == DATAMODEL_NATIVE) { 5374 option = &tmo_val; 5375 len = sizeof (struct timeval); 5376 } else { 5377 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5378 option = &tmo_val32; 5379 len = sizeof (struct timeval32); 5380 } 5381 break; 5382 } 5383 case SO_SND_BUFINFO: { 5384 snd_bufinfo.sbi_wroff = 5385 (so->so_proto_props).sopp_wroff; 5386 snd_bufinfo.sbi_maxblk = 5387 (so->so_proto_props).sopp_maxblk; 5388 snd_bufinfo.sbi_maxpsz = 5389 (so->so_proto_props).sopp_maxpsz; 5390 snd_bufinfo.sbi_tail = 5391 (so->so_proto_props).sopp_tail; 5392 option = &snd_bufinfo; 5393 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5394 break; 5395 } 5396 } 5397 } 5398 5399 mutex_exit(&so->so_lock); 5400 5401 /* Send request */ 5402 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5403 optmgmt_req.MGMT_flags = T_CHECK; 5404 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5405 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5406 5407 oh.level = level; 5408 oh.name = option_name; 5409 oh.len = maxlen; 5410 5411 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5412 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5413 /* Let option management work in the presence of data flow control */ 5414 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5415 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5416 mp = NULL; 5417 mutex_enter(&so->so_lock); 5418 if (error) { 5419 eprintsoline(so, error); 5420 goto done2; 5421 } 5422 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5423 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5424 if (error) { 5425 if (option != NULL) { 5426 /* We have a fallback value */ 5427 error = 0; 5428 goto copyout; 5429 } 5430 eprintsoline(so, error); 5431 goto done2; 5432 } 5433 ASSERT(mp); 5434 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5435 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5436 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5437 if (opt_res == NULL) { 5438 if (option != NULL) { 5439 /* We have a fallback value */ 5440 error = 0; 5441 goto copyout; 5442 } 5443 error = EPROTO; 5444 eprintsoline(so, error); 5445 goto done; 5446 } 5447 option = &opt_res[1]; 5448 5449 /* check to ensure that the option is within bounds */ 5450 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5451 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5452 if (option != NULL) { 5453 /* We have a fallback value */ 5454 error = 0; 5455 goto copyout; 5456 } 5457 error = EPROTO; 5458 eprintsoline(so, error); 5459 goto done; 5460 } 5461 5462 len = opt_res->len; 5463 5464 copyout: { 5465 t_uscalar_t size = MIN(len, maxlen); 5466 bcopy(option, optval, size); 5467 bcopy(&size, optlenp, sizeof (size)); 5468 } 5469 done: 5470 freemsg(mp); 5471 done2: 5472 so_unlock_single(so, SOLOCKED); 5473 mutex_exit(&so->so_lock); 5474 5475 return (error); 5476 } 5477 5478 /* 5479 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5480 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5481 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5482 * setsockopt has to work even if the transport does not support the option. 5483 */ 5484 /* ARGSUSED */ 5485 int 5486 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5487 const void *optval, t_uscalar_t optlen, struct cred *cr) 5488 { 5489 struct T_optmgmt_req optmgmt_req; 5490 struct opthdr oh; 5491 mblk_t *mp; 5492 int error = 0; 5493 boolean_t handled = B_FALSE; 5494 5495 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5496 (void *)so, level, option_name, optval, optlen, 5497 pr_state(so->so_state, so->so_mode))); 5498 5499 /* X/Open requires this check */ 5500 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5501 if (xnet_check_print) 5502 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5503 return (EINVAL); 5504 } 5505 5506 mutex_enter(&so->so_lock); 5507 so_lock_single(so); /* Set SOLOCKED */ 5508 mutex_exit(&so->so_lock); 5509 5510 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5511 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5512 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5513 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5514 5515 oh.level = level; 5516 oh.name = option_name; 5517 oh.len = optlen; 5518 5519 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5520 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5521 /* Let option management work in the presence of data flow control */ 5522 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5523 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5524 mp = NULL; 5525 mutex_enter(&so->so_lock); 5526 if (error) { 5527 eprintsoline(so, error); 5528 goto done2; 5529 } 5530 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5531 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5532 if (error) { 5533 eprintsoline(so, error); 5534 goto done; 5535 } 5536 ASSERT(mp); 5537 /* No need to verify T_optmgmt_ack */ 5538 freemsg(mp); 5539 done: 5540 /* 5541 * Check for SOL_SOCKET options and record their values. 5542 * If we know about a SOL_SOCKET parameter and the transport 5543 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5544 * EPROTO) we let the setsockopt succeed. 5545 */ 5546 if (level == SOL_SOCKET) { 5547 /* Check parameters */ 5548 switch (option_name) { 5549 case SO_DEBUG: 5550 case SO_REUSEADDR: 5551 case SO_KEEPALIVE: 5552 case SO_DONTROUTE: 5553 case SO_BROADCAST: 5554 case SO_USELOOPBACK: 5555 case SO_OOBINLINE: 5556 case SO_SNDBUF: 5557 case SO_RCVBUF: 5558 #ifdef notyet 5559 case SO_SNDLOWAT: 5560 case SO_RCVLOWAT: 5561 #endif /* notyet */ 5562 case SO_DGRAM_ERRIND: 5563 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5564 error = EINVAL; 5565 eprintsoline(so, error); 5566 goto done2; 5567 } 5568 ASSERT(optval); 5569 handled = B_TRUE; 5570 break; 5571 case SO_SNDTIMEO: 5572 case SO_RCVTIMEO: 5573 if (get_udatamodel() == DATAMODEL_NONE || 5574 get_udatamodel() == DATAMODEL_NATIVE) { 5575 if (optlen != sizeof (struct timeval)) { 5576 error = EINVAL; 5577 eprintsoline(so, error); 5578 goto done2; 5579 } 5580 } else { 5581 if (optlen != sizeof (struct timeval32)) { 5582 error = EINVAL; 5583 eprintsoline(so, error); 5584 goto done2; 5585 } 5586 } 5587 ASSERT(optval); 5588 handled = B_TRUE; 5589 break; 5590 case SO_LINGER: 5591 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5592 error = EINVAL; 5593 eprintsoline(so, error); 5594 goto done2; 5595 } 5596 ASSERT(optval); 5597 handled = B_TRUE; 5598 break; 5599 } 5600 5601 #define intvalue (*(int32_t *)optval) 5602 5603 switch (option_name) { 5604 case SO_TYPE: 5605 case SO_ERROR: 5606 case SO_ACCEPTCONN: 5607 /* Can't be set */ 5608 error = ENOPROTOOPT; 5609 goto done2; 5610 case SO_LINGER: { 5611 struct linger *l = (struct linger *)optval; 5612 5613 so->so_linger.l_linger = l->l_linger; 5614 if (l->l_onoff) { 5615 so->so_linger.l_onoff = SO_LINGER; 5616 so->so_options |= SO_LINGER; 5617 } else { 5618 so->so_linger.l_onoff = 0; 5619 so->so_options &= ~SO_LINGER; 5620 } 5621 break; 5622 } 5623 5624 case SO_DEBUG: 5625 #ifdef SOCK_TEST 5626 if (intvalue & 2) 5627 sock_test_timelimit = 10 * hz; 5628 else 5629 sock_test_timelimit = 0; 5630 5631 if (intvalue & 4) 5632 do_useracc = 0; 5633 else 5634 do_useracc = 1; 5635 #endif /* SOCK_TEST */ 5636 /* FALLTHRU */ 5637 case SO_REUSEADDR: 5638 case SO_KEEPALIVE: 5639 case SO_DONTROUTE: 5640 case SO_BROADCAST: 5641 case SO_USELOOPBACK: 5642 case SO_OOBINLINE: 5643 case SO_DGRAM_ERRIND: 5644 if (intvalue != 0) { 5645 dprintso(so, 1, 5646 ("socket_setsockopt: setting 0x%x\n", 5647 option_name)); 5648 so->so_options |= option_name; 5649 } else { 5650 dprintso(so, 1, 5651 ("socket_setsockopt: clearing 0x%x\n", 5652 option_name)); 5653 so->so_options &= ~option_name; 5654 } 5655 break; 5656 /* 5657 * The following options are only returned by us when the 5658 * transport layer fails. 5659 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5660 * since the transport might adjust the value and not 5661 * return exactly what was set by the application. 5662 */ 5663 case SO_SNDBUF: 5664 so->so_sndbuf = intvalue; 5665 break; 5666 case SO_RCVBUF: 5667 so->so_rcvbuf = intvalue; 5668 break; 5669 case SO_RCVPSH: 5670 so->so_rcv_timer_interval = intvalue; 5671 break; 5672 #ifdef notyet 5673 /* 5674 * We do not implement the semantics of these options 5675 * thus we shouldn't implement the options either. 5676 */ 5677 case SO_SNDLOWAT: 5678 so->so_sndlowat = intvalue; 5679 break; 5680 case SO_RCVLOWAT: 5681 so->so_rcvlowat = intvalue; 5682 break; 5683 #endif /* notyet */ 5684 case SO_SNDTIMEO: 5685 case SO_RCVTIMEO: { 5686 struct timeval tl; 5687 clock_t val; 5688 5689 if (get_udatamodel() == DATAMODEL_NONE || 5690 get_udatamodel() == DATAMODEL_NATIVE) 5691 bcopy(&tl, (struct timeval *)optval, 5692 sizeof (struct timeval)); 5693 else 5694 TIMEVAL32_TO_TIMEVAL(&tl, 5695 (struct timeval32 *)optval); 5696 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5697 if (option_name == SO_RCVTIMEO) 5698 so->so_rcvtimeo = drv_usectohz(val); 5699 else 5700 so->so_sndtimeo = drv_usectohz(val); 5701 break; 5702 } 5703 } 5704 #undef intvalue 5705 5706 if (error) { 5707 if ((error == ENOPROTOOPT || error == EPROTO || 5708 error == EINVAL) && handled) { 5709 dprintso(so, 1, 5710 ("setsockopt: ignoring error %d for 0x%x\n", 5711 error, option_name)); 5712 error = 0; 5713 } 5714 } 5715 } 5716 done2: 5717 so_unlock_single(so, SOLOCKED); 5718 mutex_exit(&so->so_lock); 5719 return (error); 5720 } 5721 5722 /* 5723 * sotpi_close() is called when the last open reference goes away. 5724 */ 5725 /* ARGSUSED */ 5726 int 5727 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5728 { 5729 struct vnode *vp = SOTOV(so); 5730 dev_t dev; 5731 int error = 0; 5732 sotpi_info_t *sti = SOTOTPI(so); 5733 5734 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5735 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5736 5737 dev = sti->sti_dev; 5738 5739 ASSERT(STREAMSTAB(getmajor(dev))); 5740 5741 mutex_enter(&so->so_lock); 5742 so_lock_single(so); /* Set SOLOCKED */ 5743 5744 ASSERT(so_verify_oobstate(so)); 5745 5746 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5747 sti->sti_nl7c_flags = 0; 5748 nl7c_close(so); 5749 } 5750 5751 if (vp->v_stream != NULL) { 5752 vnode_t *ux_vp; 5753 5754 if (so->so_family == AF_UNIX) { 5755 /* Could avoid this when CANTSENDMORE for !dgram */ 5756 so_unix_close(so); 5757 } 5758 5759 mutex_exit(&so->so_lock); 5760 /* 5761 * Disassemble the linkage from the AF_UNIX underlying file 5762 * system vnode to this socket (by atomically clearing 5763 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5764 * and frees the stream head. 5765 */ 5766 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5767 ASSERT(ux_vp->v_stream); 5768 sti->sti_ux_bound_vp = NULL; 5769 vn_rele_stream(ux_vp); 5770 } 5771 if (so->so_family == AF_INET || so->so_family == AF_INET6) { 5772 strsetrwputdatahooks(SOTOV(so), NULL, NULL); 5773 if (sti->sti_kssl_ent != NULL) { 5774 kssl_release_ent(sti->sti_kssl_ent, so, 5775 sti->sti_kssl_type); 5776 sti->sti_kssl_ent = NULL; 5777 } 5778 if (sti->sti_kssl_ctx != NULL) { 5779 kssl_release_ctx(sti->sti_kssl_ctx); 5780 sti->sti_kssl_ctx = NULL; 5781 } 5782 sti->sti_kssl_type = KSSL_NO_PROXY; 5783 } 5784 error = strclose(vp, flag, cr); 5785 vp->v_stream = NULL; 5786 mutex_enter(&so->so_lock); 5787 } 5788 5789 /* 5790 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5791 */ 5792 so_flush_discon_ind(so); 5793 5794 so_unlock_single(so, SOLOCKED); 5795 mutex_exit(&so->so_lock); 5796 5797 /* 5798 * Needed for STREAMs. 5799 * Decrement the device driver's reference count for streams 5800 * opened via the clone dip. The driver was held in clone_open(). 5801 * The absence of clone_close() forces this asymmetry. 5802 */ 5803 if (so->so_flag & SOCLONE) 5804 ddi_rele_driver(getmajor(dev)); 5805 5806 return (error); 5807 } 5808 5809 static int 5810 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5811 struct cred *cr, int32_t *rvalp) 5812 { 5813 struct vnode *vp = SOTOV(so); 5814 sotpi_info_t *sti = SOTOTPI(so); 5815 int error = 0; 5816 5817 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5818 cmd, arg, pr_state(so->so_state, so->so_mode))); 5819 5820 switch (cmd) { 5821 case SIOCSQPTR: 5822 /* 5823 * SIOCSQPTR is valid only when helper stream is created 5824 * by the protocol. 5825 */ 5826 case _I_INSERT: 5827 case _I_REMOVE: 5828 /* 5829 * Since there's no compelling reason to support these ioctls 5830 * on sockets, and doing so would increase the complexity 5831 * markedly, prevent it. 5832 */ 5833 return (EOPNOTSUPP); 5834 5835 case I_FIND: 5836 case I_LIST: 5837 case I_LOOK: 5838 case I_POP: 5839 case I_PUSH: 5840 /* 5841 * To prevent races and inconsistencies between the actual 5842 * state of the stream and the state according to the sonode, 5843 * we serialize all operations which modify or operate on the 5844 * list of modules on the socket's stream. 5845 */ 5846 mutex_enter(&sti->sti_plumb_lock); 5847 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5848 mutex_exit(&sti->sti_plumb_lock); 5849 return (error); 5850 5851 default: 5852 if (so->so_version != SOV_STREAM) 5853 break; 5854 5855 /* 5856 * The imaginary "sockmod" has been popped; act as a stream. 5857 */ 5858 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5859 } 5860 5861 ASSERT(so->so_version != SOV_STREAM); 5862 5863 /* 5864 * Process socket-specific ioctls. 5865 */ 5866 switch (cmd) { 5867 case FIONBIO: { 5868 int32_t value; 5869 5870 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5871 (mode & (int)FKIOCTL))) 5872 return (EFAULT); 5873 5874 mutex_enter(&so->so_lock); 5875 if (value) { 5876 so->so_state |= SS_NDELAY; 5877 } else { 5878 so->so_state &= ~SS_NDELAY; 5879 } 5880 mutex_exit(&so->so_lock); 5881 return (0); 5882 } 5883 5884 case FIOASYNC: { 5885 int32_t value; 5886 5887 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5888 (mode & (int)FKIOCTL))) 5889 return (EFAULT); 5890 5891 mutex_enter(&so->so_lock); 5892 /* 5893 * SS_ASYNC flag not already set correctly? 5894 * (!value != !(so->so_state & SS_ASYNC)) 5895 * but some engineers find that too hard to read. 5896 */ 5897 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5898 value != 0 && (so->so_state & SS_ASYNC) == 0) 5899 error = so_flip_async(so, vp, mode, cr); 5900 mutex_exit(&so->so_lock); 5901 return (error); 5902 } 5903 5904 case SIOCSPGRP: 5905 case FIOSETOWN: { 5906 pid_t pgrp; 5907 5908 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5909 (mode & (int)FKIOCTL))) 5910 return (EFAULT); 5911 5912 mutex_enter(&so->so_lock); 5913 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5914 /* Any change? */ 5915 if (pgrp != so->so_pgrp) 5916 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5917 mutex_exit(&so->so_lock); 5918 return (error); 5919 } 5920 case SIOCGPGRP: 5921 case FIOGETOWN: 5922 if (so_copyout(&so->so_pgrp, (void *)arg, 5923 sizeof (pid_t), (mode & (int)FKIOCTL))) 5924 return (EFAULT); 5925 return (0); 5926 5927 case SIOCATMARK: { 5928 int retval; 5929 uint_t so_state; 5930 5931 /* 5932 * strwaitmark has a finite timeout after which it 5933 * returns -1 if the mark state is undetermined. 5934 * In order to avoid any race between the mark state 5935 * in sockfs and the mark state in the stream head this 5936 * routine loops until the mark state can be determined 5937 * (or the urgent data indication has been removed by some 5938 * other thread). 5939 */ 5940 do { 5941 mutex_enter(&so->so_lock); 5942 so_state = so->so_state; 5943 mutex_exit(&so->so_lock); 5944 if (so_state & SS_RCVATMARK) { 5945 retval = 1; 5946 } else if (!(so_state & SS_OOBPEND)) { 5947 /* 5948 * No SIGURG has been generated -- there is no 5949 * pending or present urgent data. Thus can't 5950 * possibly be at the mark. 5951 */ 5952 retval = 0; 5953 } else { 5954 /* 5955 * Have the stream head wait until there is 5956 * either some messages on the read queue, or 5957 * STRATMARK or STRNOTATMARK gets set. The 5958 * STRNOTATMARK flag is used so that the 5959 * transport can send up a MSGNOTMARKNEXT 5960 * M_DATA to indicate that it is not 5961 * at the mark and additional data is not about 5962 * to be send upstream. 5963 * 5964 * If the mark state is undetermined this will 5965 * return -1 and we will loop rechecking the 5966 * socket state. 5967 */ 5968 retval = strwaitmark(vp); 5969 } 5970 } while (retval == -1); 5971 5972 if (so_copyout(&retval, (void *)arg, sizeof (int), 5973 (mode & (int)FKIOCTL))) 5974 return (EFAULT); 5975 return (0); 5976 } 5977 5978 case I_FDINSERT: 5979 case I_SENDFD: 5980 case I_RECVFD: 5981 case I_ATMARK: 5982 case _SIOCSOCKFALLBACK: 5983 /* 5984 * These ioctls do not apply to sockets. I_FDINSERT can be 5985 * used to send M_PROTO messages without modifying the socket 5986 * state. I_SENDFD/RECVFD should not be used for socket file 5987 * descriptor passing since they assume a twisted stream. 5988 * SIOCATMARK must be used instead of I_ATMARK. 5989 * 5990 * _SIOCSOCKFALLBACK from an application should never be 5991 * processed. It is only generated by socktpi_open() or 5992 * in response to I_POP or I_PUSH. 5993 */ 5994 #ifdef DEBUG 5995 zcmn_err(getzoneid(), CE_WARN, 5996 "Unsupported STREAMS ioctl 0x%x on socket. " 5997 "Pid = %d\n", cmd, curproc->p_pid); 5998 #endif /* DEBUG */ 5999 return (EOPNOTSUPP); 6000 6001 case _I_GETPEERCRED: 6002 if ((mode & FKIOCTL) == 0) 6003 return (EINVAL); 6004 6005 mutex_enter(&so->so_lock); 6006 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6007 error = ENOTSUP; 6008 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6009 error = ENOTCONN; 6010 } else if (so->so_peercred != NULL) { 6011 k_peercred_t *kp = (k_peercred_t *)arg; 6012 kp->pc_cr = so->so_peercred; 6013 kp->pc_cpid = so->so_cpid; 6014 crhold(so->so_peercred); 6015 } else { 6016 error = EINVAL; 6017 } 6018 mutex_exit(&so->so_lock); 6019 return (error); 6020 6021 default: 6022 /* 6023 * Do the higher-order bits of the ioctl cmd indicate 6024 * that it is an I_* streams ioctl? 6025 */ 6026 if ((cmd & 0xffffff00U) == STR && 6027 so->so_version == SOV_SOCKBSD) { 6028 #ifdef DEBUG 6029 zcmn_err(getzoneid(), CE_WARN, 6030 "Unsupported STREAMS ioctl 0x%x on socket. " 6031 "Pid = %d\n", cmd, curproc->p_pid); 6032 #endif /* DEBUG */ 6033 return (EOPNOTSUPP); 6034 } 6035 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6036 } 6037 } 6038 6039 /* 6040 * Handle plumbing-related ioctls. 6041 */ 6042 static int 6043 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6044 struct cred *cr, int32_t *rvalp) 6045 { 6046 static const char sockmod_name[] = "sockmod"; 6047 struct sonode *so = VTOSO(vp); 6048 char mname[FMNAMESZ + 1]; 6049 int error; 6050 sotpi_info_t *sti = SOTOTPI(so); 6051 6052 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6053 6054 if (so->so_version == SOV_SOCKBSD) 6055 return (EOPNOTSUPP); 6056 6057 if (so->so_version == SOV_STREAM) { 6058 /* 6059 * The imaginary "sockmod" has been popped - act as a stream. 6060 * If this is a push of sockmod then change back to a socket. 6061 */ 6062 if (cmd == I_PUSH) { 6063 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6064 (void *)arg, mname, sizeof (mname), NULL); 6065 6066 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6067 dprintso(so, 0, ("socktpi_ioctl: going to " 6068 "socket version\n")); 6069 so_stream2sock(so); 6070 return (0); 6071 } 6072 } 6073 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6074 } 6075 6076 switch (cmd) { 6077 case I_PUSH: 6078 if (sti->sti_direct) { 6079 mutex_enter(&so->so_lock); 6080 so_lock_single(so); 6081 mutex_exit(&so->so_lock); 6082 6083 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6084 cr, rvalp); 6085 6086 mutex_enter(&so->so_lock); 6087 if (error == 0) 6088 sti->sti_direct = 0; 6089 so_unlock_single(so, SOLOCKED); 6090 mutex_exit(&so->so_lock); 6091 6092 if (error != 0) 6093 return (error); 6094 } 6095 6096 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6097 if (error == 0) 6098 sti->sti_pushcnt++; 6099 return (error); 6100 6101 case I_POP: 6102 if (sti->sti_pushcnt == 0) { 6103 /* Emulate sockmod being popped */ 6104 dprintso(so, 0, 6105 ("socktpi_ioctl: going to STREAMS version\n")); 6106 return (so_sock2stream(so)); 6107 } 6108 6109 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6110 if (error == 0) 6111 sti->sti_pushcnt--; 6112 return (error); 6113 6114 case I_LIST: { 6115 struct str_mlist *kmlistp, *umlistp; 6116 struct str_list kstrlist; 6117 ssize_t kstrlistsize; 6118 int i, nmods; 6119 6120 STRUCT_DECL(str_list, ustrlist); 6121 STRUCT_INIT(ustrlist, mode); 6122 6123 if (arg == NULL) { 6124 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6125 if (error == 0) 6126 (*rvalp)++; /* Add one for sockmod */ 6127 return (error); 6128 } 6129 6130 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6131 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6132 if (error != 0) 6133 return (error); 6134 6135 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6136 if (nmods <= 0) 6137 return (EINVAL); 6138 /* 6139 * Ceiling nmods at nstrpush to prevent someone from 6140 * maliciously consuming lots of kernel memory. 6141 */ 6142 nmods = MIN(nmods, nstrpush); 6143 6144 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6145 kstrlist.sl_nmods = nmods; 6146 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6147 6148 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6149 cr, rvalp); 6150 if (error != 0) 6151 goto done; 6152 6153 /* 6154 * Considering the module list as a 0-based array of sl_nmods 6155 * modules, sockmod should conceptually exist at slot 6156 * sti_pushcnt. Insert sockmod at this location by sliding all 6157 * of the module names after so_pushcnt over by one. We know 6158 * that there will be room to do this since we allocated 6159 * sl_modlist with an additional slot. 6160 */ 6161 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6162 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6163 6164 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6165 kstrlist.sl_nmods++; 6166 6167 /* 6168 * Copy all of the entries out to ustrlist. 6169 */ 6170 kmlistp = kstrlist.sl_modlist; 6171 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6172 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6173 error = so_copyout(kmlistp++, umlistp++, 6174 sizeof (struct str_mlist), mode & FKIOCTL); 6175 if (error != 0) 6176 goto done; 6177 } 6178 6179 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6180 mode & FKIOCTL); 6181 if (error == 0) 6182 *rvalp = 0; 6183 done: 6184 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6185 return (error); 6186 } 6187 case I_LOOK: 6188 if (sti->sti_pushcnt == 0) { 6189 return (so_copyout(sockmod_name, (void *)arg, 6190 sizeof (sockmod_name), mode & FKIOCTL)); 6191 } 6192 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6193 6194 case I_FIND: 6195 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6196 if (error && error != EINVAL) 6197 return (error); 6198 6199 /* if not found and string was sockmod return 1 */ 6200 if (*rvalp == 0 || error == EINVAL) { 6201 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6202 (void *)arg, mname, sizeof (mname), NULL); 6203 if (error == ENAMETOOLONG) 6204 error = EINVAL; 6205 6206 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6207 *rvalp = 1; 6208 } 6209 return (error); 6210 6211 default: 6212 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6213 break; 6214 } 6215 6216 return (0); 6217 } 6218 6219 /* 6220 * Wrapper around the streams poll routine that implements socket poll 6221 * semantics. 6222 * The sockfs never calls pollwakeup itself - the stream head take care 6223 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6224 * stream head there can never be a deadlock due to holding so_lock across 6225 * pollwakeup and acquiring so_lock in this routine. 6226 * 6227 * However, since the performance of VOP_POLL is critical we avoid 6228 * acquiring so_lock here. This is based on two assumptions: 6229 * - The poll implementation holds locks to serialize the VOP_POLL call 6230 * and a pollwakeup for the same pollhead. This ensures that should 6231 * e.g. so_state change during a socktpi_poll call the pollwakeup 6232 * (which strsock_* and strrput conspire to issue) is issued after 6233 * the state change. Thus the pollwakeup will block until VOP_POLL has 6234 * returned and then wake up poll and have it call VOP_POLL again. 6235 * - The reading of so_state without holding so_lock does not result in 6236 * stale data that is older than the latest state change that has dropped 6237 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6238 * memory barrier to force the data into the coherency domain. 6239 */ 6240 static int 6241 sotpi_poll( 6242 struct sonode *so, 6243 short events, 6244 int anyyet, 6245 short *reventsp, 6246 struct pollhead **phpp) 6247 { 6248 short origevents = events; 6249 struct vnode *vp = SOTOV(so); 6250 int error; 6251 int so_state = so->so_state; /* snapshot */ 6252 sotpi_info_t *sti = SOTOTPI(so); 6253 6254 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6255 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6256 6257 ASSERT(vp->v_type == VSOCK); 6258 ASSERT(vp->v_stream != NULL); 6259 6260 if (so->so_version == SOV_STREAM) { 6261 /* The imaginary "sockmod" has been popped - act as a stream */ 6262 return (strpoll(vp->v_stream, events, anyyet, 6263 reventsp, phpp)); 6264 } 6265 6266 if (!(so_state & SS_ISCONNECTED) && 6267 (so->so_mode & SM_CONNREQUIRED)) { 6268 /* Not connected yet - turn off write side events */ 6269 events &= ~(POLLOUT|POLLWRBAND); 6270 } 6271 /* 6272 * Check for errors without calling strpoll if the caller wants them. 6273 * In sockets the errors are represented as input/output events 6274 * and there is no need to ask the stream head for this information. 6275 */ 6276 if (so->so_error != 0 && 6277 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6278 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6279 return (0); 6280 } 6281 /* 6282 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6283 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6284 * will not trigger a POLLIN event with POLLRDDATA set. 6285 * The handling of urgent data (causing POLLRDBAND) is done by 6286 * inspecting SS_OOBPEND below. 6287 */ 6288 events |= POLLRDDATA; 6289 6290 /* 6291 * After shutdown(output) a stream head write error is set. 6292 * However, we should not return output events. 6293 */ 6294 events |= POLLNOERR; 6295 error = strpoll(vp->v_stream, events, anyyet, 6296 reventsp, phpp); 6297 if (error) 6298 return (error); 6299 6300 ASSERT(!(*reventsp & POLLERR)); 6301 6302 /* 6303 * Notes on T_CONN_IND handling for sockets. 6304 * 6305 * If strpoll() returned without events, SR_POLLIN is guaranteed 6306 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6307 * 6308 * Since the so_lock is not held, soqueueconnind() may have run 6309 * and a T_CONN_IND may be waiting. We now check for any queued 6310 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6311 * to ensure poll returns. 6312 * 6313 * However: 6314 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6315 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6316 * the following actions will occur; taken together they ensure the 6317 * syscall will return. 6318 * 6319 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6320 * the accept() was run on a non-blocking socket sowaitconnind() 6321 * may have already returned EWOULDBLOCK, so not be waiting to 6322 * process the message. Additionally socktpi_poll() has probably 6323 * proceeded past the sti_conn_ind_head check below. 6324 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6325 * this thread, however that could occur before poll_common() 6326 * has entered cv_wait. 6327 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6328 * 6329 * Before proceeding to cv_wait() in poll_common() for an event, 6330 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6331 * and if set, re-calls strpoll() to ensure the late arriving 6332 * T_CONN_IND is recognized, and pollsys() returns. 6333 */ 6334 6335 if (sti->sti_conn_ind_head != NULL) 6336 *reventsp |= (POLLIN|POLLRDNORM) & events; 6337 6338 if (so->so_state & SS_OOBPEND) 6339 *reventsp |= POLLRDBAND & events; 6340 6341 if (sti->sti_nl7c_rcv_mp != NULL) { 6342 *reventsp |= (POLLIN|POLLRDNORM) & events; 6343 } 6344 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6345 ((POLLIN|POLLRDNORM) & *reventsp)) { 6346 sti->sti_nl7c_flags |= NL7C_POLLIN; 6347 } 6348 6349 return (0); 6350 } 6351 6352 /*ARGSUSED*/ 6353 static int 6354 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6355 { 6356 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6357 int error = 0; 6358 6359 error = sonode_constructor(buf, cdrarg, kmflags); 6360 if (error != 0) 6361 return (error); 6362 6363 error = i_sotpi_info_constructor(&st->st_info); 6364 if (error != 0) 6365 sonode_destructor(buf, cdrarg); 6366 6367 st->st_sonode.so_priv = &st->st_info; 6368 6369 return (error); 6370 } 6371 6372 /*ARGSUSED1*/ 6373 static void 6374 socktpi_destructor(void *buf, void *cdrarg) 6375 { 6376 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6377 6378 ASSERT(st->st_sonode.so_priv == &st->st_info); 6379 st->st_sonode.so_priv = NULL; 6380 6381 i_sotpi_info_destructor(&st->st_info); 6382 sonode_destructor(buf, cdrarg); 6383 } 6384 6385 static int 6386 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6387 { 6388 int retval; 6389 6390 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6391 struct sonode *so = (struct sonode *)buf; 6392 sotpi_info_t *sti = SOTOTPI(so); 6393 6394 mutex_enter(&socklist.sl_lock); 6395 6396 sti->sti_next_so = socklist.sl_list; 6397 sti->sti_prev_so = NULL; 6398 if (sti->sti_next_so != NULL) 6399 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6400 socklist.sl_list = so; 6401 6402 mutex_exit(&socklist.sl_lock); 6403 6404 } 6405 return (retval); 6406 } 6407 6408 static void 6409 socktpi_unix_destructor(void *buf, void *cdrarg) 6410 { 6411 struct sonode *so = (struct sonode *)buf; 6412 sotpi_info_t *sti = SOTOTPI(so); 6413 6414 mutex_enter(&socklist.sl_lock); 6415 6416 if (sti->sti_next_so != NULL) 6417 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6418 if (sti->sti_prev_so != NULL) 6419 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6420 else 6421 socklist.sl_list = sti->sti_next_so; 6422 6423 mutex_exit(&socklist.sl_lock); 6424 6425 socktpi_destructor(buf, cdrarg); 6426 } 6427 6428 int 6429 socktpi_init(void) 6430 { 6431 /* 6432 * Create sonode caches. We create a special one for AF_UNIX so 6433 * that we can track them for netstat(1m). 6434 */ 6435 socktpi_cache = kmem_cache_create("socktpi_cache", 6436 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6437 socktpi_destructor, NULL, NULL, NULL, 0); 6438 6439 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6440 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6441 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6442 6443 return (0); 6444 } 6445 6446 /* 6447 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6448 * 6449 * Caller must still update state and mode using sotpi_update_state(). 6450 */ 6451 int 6452 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6453 boolean_t *direct, queue_t **qp, struct cred *cr) 6454 { 6455 sotpi_info_t *sti; 6456 struct sockparams *origsp = so->so_sockparams; 6457 sock_lower_handle_t handle = so->so_proto_handle; 6458 struct stdata *stp; 6459 struct vnode *vp; 6460 queue_t *q; 6461 int error = 0; 6462 6463 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6464 SS_FALLBACK_PENDING); 6465 ASSERT(SOCK_IS_NONSTR(so)); 6466 6467 *qp = NULL; 6468 *direct = B_FALSE; 6469 so->so_sockparams = newsp; 6470 /* 6471 * Allocate and initalize fields required by TPI. 6472 */ 6473 (void) sotpi_info_create(so, KM_SLEEP); 6474 sotpi_info_init(so); 6475 6476 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6477 sotpi_info_fini(so); 6478 sotpi_info_destroy(so); 6479 return (error); 6480 } 6481 ASSERT(handle == so->so_proto_handle); 6482 sti = SOTOTPI(so); 6483 if (sti->sti_direct != 0) 6484 *direct = B_TRUE; 6485 6486 /* 6487 * When it comes to urgent data we have two cases to deal with; 6488 * (1) The oob byte has already arrived, or (2) the protocol has 6489 * notified that oob data is pending, but it has not yet arrived. 6490 * 6491 * For (1) all we need to do is send a T_EXDATA_IND to indicate were 6492 * in the byte stream the oob byte is. For (2) we have to send a 6493 * SIGURG (M_PCSIG), followed by a zero-length mblk indicating whether 6494 * the oob byte will be the next byte from the protocol. 6495 * 6496 * So in the worst case we need two mblks, one for the signal, another 6497 * for mark indication. In that case we use the exdata_mp for the sig. 6498 */ 6499 sti->sti_exdata_mp = allocb_wait(sizeof (struct T_exdata_ind), BPRI_MED, 6500 STR_NOSIG, NULL); 6501 sti->sti_urgmark_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL); 6502 6503 /* 6504 * Keep the original sp around so we can properly dispose of the 6505 * sonode when the socket is being closed. 6506 */ 6507 sti->sti_orig_sp = origsp; 6508 6509 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6510 so_alloc_addr(so, so->so_max_addr_len); 6511 6512 /* 6513 * If the application has done a SIOCSPGRP, make sure the 6514 * STREAM head is aware. This needs to take place before 6515 * the protocol start sending up messages. Otherwise we 6516 * might miss to generate SIGPOLL. 6517 * 6518 * It is possible that the application will receive duplicate 6519 * signals if some were already generated for either data or 6520 * connection indications. 6521 */ 6522 if (so->so_pgrp != 0) { 6523 if (so_set_events(so, so->so_vnode, cr) != 0) 6524 so->so_pgrp = 0; 6525 } 6526 6527 /* 6528 * Determine which queue to use. 6529 */ 6530 vp = SOTOV(so); 6531 stp = vp->v_stream; 6532 ASSERT(stp != NULL); 6533 q = stp->sd_wrq->q_next; 6534 6535 /* 6536 * Skip any modules that may have been auto pushed when the device 6537 * was opened 6538 */ 6539 while (q->q_next != NULL) 6540 q = q->q_next; 6541 *qp = _RD(q); 6542 6543 /* This is now a STREAMS sockets */ 6544 so->so_not_str = B_FALSE; 6545 6546 return (error); 6547 } 6548 6549 /* 6550 * Revert a TPI sonode. It is only allowed to revert the sonode during 6551 * the fallback process. 6552 */ 6553 void 6554 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6555 { 6556 vnode_t *vp = SOTOV(so); 6557 6558 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6559 SS_FALLBACK_PENDING); 6560 ASSERT(!SOCK_IS_NONSTR(so)); 6561 ASSERT(vp->v_stream != NULL); 6562 6563 if (SOTOTPI(so)->sti_exdata_mp != NULL) { 6564 freeb(SOTOTPI(so)->sti_exdata_mp); 6565 SOTOTPI(so)->sti_exdata_mp = NULL; 6566 } 6567 6568 if (SOTOTPI(so)->sti_urgmark_mp != NULL) { 6569 freeb(SOTOTPI(so)->sti_urgmark_mp); 6570 SOTOTPI(so)->sti_urgmark_mp = NULL; 6571 } 6572 6573 strclean(vp); 6574 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6575 6576 /* 6577 * Restore the original sockparams. The caller is responsible for 6578 * dropping the ref to the new sp. 6579 */ 6580 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6581 6582 sotpi_info_fini(so); 6583 sotpi_info_destroy(so); 6584 6585 /* This is no longer a STREAMS sockets */ 6586 so->so_not_str = B_TRUE; 6587 } 6588 6589 void 6590 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6591 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6592 socklen_t faddrlen, short opts) 6593 { 6594 sotpi_info_t *sti = SOTOTPI(so); 6595 6596 so_proc_tcapability_ack(so, tcap); 6597 6598 so->so_options |= opts; 6599 6600 /* 6601 * Determine whether the foreign and local address are valid 6602 */ 6603 if (laddrlen != 0) { 6604 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6605 sti->sti_laddr_len = laddrlen; 6606 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6607 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6608 } 6609 6610 if (faddrlen != 0) { 6611 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6612 sti->sti_faddr_len = faddrlen; 6613 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6614 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6615 } 6616 6617 } 6618 6619 /* 6620 * Allocate enough space to cache the local and foreign addresses. 6621 */ 6622 void 6623 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6624 { 6625 sotpi_info_t *sti = SOTOTPI(so); 6626 6627 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6628 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6629 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6630 P2ROUNDUP(maxlen, KMEM_ALIGN); 6631 so->so_max_addr_len = sti->sti_laddr_maxlen; 6632 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6633 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6634 + sti->sti_laddr_maxlen); 6635 6636 if (so->so_family == AF_UNIX) { 6637 /* 6638 * Initialize AF_UNIX related fields. 6639 */ 6640 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6641 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6642 } 6643 } 6644 6645 6646 sotpi_info_t * 6647 sotpi_sototpi(struct sonode *so) 6648 { 6649 sotpi_info_t *sti; 6650 6651 ASSERT(so != NULL); 6652 6653 sti = (sotpi_info_t *)so->so_priv; 6654 6655 ASSERT(sti != NULL); 6656 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6657 6658 return (sti); 6659 } 6660 6661 static int 6662 i_sotpi_info_constructor(sotpi_info_t *sti) 6663 { 6664 sti->sti_magic = SOTPI_INFO_MAGIC; 6665 sti->sti_ack_mp = NULL; 6666 sti->sti_discon_ind_mp = NULL; 6667 sti->sti_ux_bound_vp = NULL; 6668 sti->sti_unbind_mp = NULL; 6669 6670 sti->sti_conn_ind_head = NULL; 6671 sti->sti_conn_ind_tail = NULL; 6672 6673 sti->sti_laddr_sa = NULL; 6674 sti->sti_faddr_sa = NULL; 6675 6676 sti->sti_nl7c_flags = 0; 6677 sti->sti_nl7c_uri = NULL; 6678 sti->sti_nl7c_rcv_mp = NULL; 6679 6680 sti->sti_exdata_mp = NULL; 6681 sti->sti_urgmark_mp = NULL; 6682 6683 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6684 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6685 6686 return (0); 6687 } 6688 6689 static void 6690 i_sotpi_info_destructor(sotpi_info_t *sti) 6691 { 6692 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6693 ASSERT(sti->sti_ack_mp == NULL); 6694 ASSERT(sti->sti_discon_ind_mp == NULL); 6695 ASSERT(sti->sti_ux_bound_vp == NULL); 6696 ASSERT(sti->sti_unbind_mp == NULL); 6697 6698 ASSERT(sti->sti_conn_ind_head == NULL); 6699 ASSERT(sti->sti_conn_ind_tail == NULL); 6700 6701 ASSERT(sti->sti_laddr_sa == NULL); 6702 ASSERT(sti->sti_faddr_sa == NULL); 6703 6704 ASSERT(sti->sti_nl7c_flags == 0); 6705 ASSERT(sti->sti_nl7c_uri == NULL); 6706 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6707 6708 ASSERT(sti->sti_exdata_mp == NULL); 6709 ASSERT(sti->sti_urgmark_mp == NULL); 6710 6711 mutex_destroy(&sti->sti_plumb_lock); 6712 cv_destroy(&sti->sti_ack_cv); 6713 } 6714 6715 /* 6716 * Creates and attaches TPI information to the given sonode 6717 */ 6718 static boolean_t 6719 sotpi_info_create(struct sonode *so, int kmflags) 6720 { 6721 sotpi_info_t *sti; 6722 6723 ASSERT(so->so_priv == NULL); 6724 6725 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6726 return (B_FALSE); 6727 6728 if (i_sotpi_info_constructor(sti) != 0) { 6729 kmem_free(sti, sizeof (*sti)); 6730 return (B_FALSE); 6731 } 6732 6733 so->so_priv = (void *)sti; 6734 return (B_TRUE); 6735 } 6736 6737 /* 6738 * Initializes the TPI information. 6739 */ 6740 static void 6741 sotpi_info_init(struct sonode *so) 6742 { 6743 struct vnode *vp = SOTOV(so); 6744 sotpi_info_t *sti = SOTOTPI(so); 6745 time_t now; 6746 6747 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6748 vp->v_rdev = sti->sti_dev; 6749 6750 sti->sti_orig_sp = NULL; 6751 6752 sti->sti_pushcnt = 0; 6753 6754 now = gethrestime_sec(); 6755 sti->sti_atime = now; 6756 sti->sti_mtime = now; 6757 sti->sti_ctime = now; 6758 6759 sti->sti_eaddr_mp = NULL; 6760 sti->sti_delayed_error = 0; 6761 6762 sti->sti_provinfo = NULL; 6763 6764 sti->sti_oobcnt = 0; 6765 sti->sti_oobsigcnt = 0; 6766 6767 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6768 6769 sti->sti_laddr_sa = 0; 6770 sti->sti_faddr_sa = 0; 6771 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6772 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6773 6774 sti->sti_laddr_valid = 0; 6775 sti->sti_faddr_valid = 0; 6776 sti->sti_faddr_noxlate = 0; 6777 6778 sti->sti_direct = 0; 6779 6780 ASSERT(sti->sti_ack_mp == NULL); 6781 ASSERT(sti->sti_ux_bound_vp == NULL); 6782 ASSERT(sti->sti_unbind_mp == NULL); 6783 6784 ASSERT(sti->sti_conn_ind_head == NULL); 6785 ASSERT(sti->sti_conn_ind_tail == NULL); 6786 6787 /* Initialize the kernel SSL proxy fields */ 6788 sti->sti_kssl_type = KSSL_NO_PROXY; 6789 sti->sti_kssl_ent = NULL; 6790 sti->sti_kssl_ctx = NULL; 6791 } 6792 6793 /* 6794 * Given a sonode, grab the TPI info and free any data. 6795 */ 6796 static void 6797 sotpi_info_fini(struct sonode *so) 6798 { 6799 sotpi_info_t *sti = SOTOTPI(so); 6800 mblk_t *mp; 6801 6802 ASSERT(sti->sti_discon_ind_mp == NULL); 6803 6804 if ((mp = sti->sti_conn_ind_head) != NULL) { 6805 mblk_t *mp1; 6806 6807 while (mp) { 6808 mp1 = mp->b_next; 6809 mp->b_next = NULL; 6810 freemsg(mp); 6811 mp = mp1; 6812 } 6813 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6814 } 6815 6816 /* 6817 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6818 * indirect them. It also uses so_count as a validity test. 6819 */ 6820 mutex_enter(&so->so_lock); 6821 6822 if (sti->sti_laddr_sa) { 6823 ASSERT((caddr_t)sti->sti_faddr_sa == 6824 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6825 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6826 sti->sti_laddr_valid = 0; 6827 sti->sti_faddr_valid = 0; 6828 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6829 sti->sti_laddr_sa = NULL; 6830 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6831 sti->sti_faddr_sa = NULL; 6832 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6833 } 6834 6835 mutex_exit(&so->so_lock); 6836 6837 if ((mp = sti->sti_eaddr_mp) != NULL) { 6838 freemsg(mp); 6839 sti->sti_eaddr_mp = NULL; 6840 sti->sti_delayed_error = 0; 6841 } 6842 6843 if ((mp = sti->sti_ack_mp) != NULL) { 6844 freemsg(mp); 6845 sti->sti_ack_mp = NULL; 6846 } 6847 6848 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6849 sti->sti_nl7c_rcv_mp = NULL; 6850 freemsg(mp); 6851 } 6852 sti->sti_nl7c_rcv_rval = 0; 6853 if (sti->sti_nl7c_uri != NULL) { 6854 nl7c_urifree(so); 6855 /* urifree() cleared nl7c_uri */ 6856 } 6857 if (sti->sti_nl7c_flags) { 6858 sti->sti_nl7c_flags = 0; 6859 } 6860 6861 ASSERT(sti->sti_ux_bound_vp == NULL); 6862 if ((mp = sti->sti_unbind_mp) != NULL) { 6863 freemsg(mp); 6864 sti->sti_unbind_mp = NULL; 6865 } 6866 } 6867 6868 /* 6869 * Destroys the TPI information attached to a sonode. 6870 */ 6871 static void 6872 sotpi_info_destroy(struct sonode *so) 6873 { 6874 sotpi_info_t *sti = SOTOTPI(so); 6875 6876 i_sotpi_info_destructor(sti); 6877 kmem_free(sti, sizeof (*sti)); 6878 6879 so->so_priv = NULL; 6880 } 6881 6882 /* 6883 * Create the global sotpi socket module entry. It will never be freed. 6884 */ 6885 smod_info_t * 6886 sotpi_smod_create(void) 6887 { 6888 smod_info_t *smodp; 6889 6890 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6891 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6892 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6893 /* 6894 * Initialize the smod_refcnt to 1 so it will never be freed. 6895 */ 6896 smodp->smod_refcnt = 1; 6897 smodp->smod_uc_version = SOCK_UC_VERSION; 6898 smodp->smod_dc_version = SOCK_DC_VERSION; 6899 smodp->smod_sock_create_func = &sotpi_create; 6900 smodp->smod_sock_destroy_func = &sotpi_destroy; 6901 return (smodp); 6902 } 6903