1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015, Joyent, Inc. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2022 Garrett D'Amore 27 */ 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/kmem_impl.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/open.h> 46 #include <sys/user.h> 47 #include <sys/termios.h> 48 #include <sys/stream.h> 49 #include <sys/strsubr.h> 50 #include <sys/strsun.h> 51 #include <sys/suntpi.h> 52 #include <sys/ddi.h> 53 #include <sys/esunddi.h> 54 #include <sys/flock.h> 55 #include <sys/modctl.h> 56 #include <sys/vtrace.h> 57 #include <sys/cmn_err.h> 58 #include <sys/pathname.h> 59 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <sys/un.h> 65 #include <sys/strsun.h> 66 67 #include <sys/tiuser.h> 68 #define _SUN_TPI_VERSION 2 69 #include <sys/tihdr.h> 70 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 71 72 #include <c2/audit.h> 73 74 #include <inet/common.h> 75 #include <inet/ip.h> 76 #include <inet/ip6.h> 77 #include <inet/tcp.h> 78 #include <inet/udp_impl.h> 79 80 #include <sys/zone.h> 81 82 #include <fs/sockfs/sockcommon.h> 83 #include <fs/sockfs/socktpi.h> 84 #include <fs/sockfs/socktpi_impl.h> 85 86 /* 87 * Possible failures when memory can't be allocated. The documented behavior: 88 * 89 * 5.5: 4.X: XNET: 90 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 91 * EINTR 92 * (4.X does not document EINTR but returns it) 93 * bind: ENOSR - ENOBUFS/ENOSR 94 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 95 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 96 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 97 * (4.X getpeername and getsockname do not fail in practice) 98 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 99 * listen: - - ENOBUFS 100 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 101 * EINTR 102 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 105 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 106 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 107 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 108 * 109 * Resolution. When allocation fails: 110 * recv: return EINTR 111 * send: return EINTR 112 * connect, accept: EINTR 113 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 114 * socket, socketpair: ENOBUFS 115 * getpeername, getsockname: sleep 116 * getsockopt, setsockopt: sleep 117 */ 118 119 #ifdef SOCK_TEST 120 /* 121 * Variables that make sockfs do something other than the standard TPI 122 * for the AF_INET transports. 123 * 124 * solisten_tpi_tcp: 125 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 126 * the transport is already bound. This is needed to avoid loosing the 127 * port number should listen() do a T_UNBIND_REQ followed by a 128 * O_T_BIND_REQ. 129 * 130 * soconnect_tpi_udp: 131 * UDP and ICMP can handle a T_CONN_REQ. 132 * This is needed to make the sequence of connect(), getsockname() 133 * return the local IP address used to send packets to the connected to 134 * destination. 135 * 136 * soconnect_tpi_tcp: 137 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 138 * Set this to non-zero to send TPI conformant messages to TCP in this 139 * respect. This is a performance optimization. 140 * 141 * soaccept_tpi_tcp: 142 * TCP can handle a T_CONN_REQ without the acceptor being bound. 143 * This is a performance optimization that has been picked up in XTI. 144 * 145 * soaccept_tpi_multioptions: 146 * When inheriting SOL_SOCKET options from the listener to the accepting 147 * socket send them as a single message for AF_INET{,6}. 148 */ 149 int solisten_tpi_tcp = 0; 150 int soconnect_tpi_udp = 0; 151 int soconnect_tpi_tcp = 0; 152 int soaccept_tpi_tcp = 0; 153 int soaccept_tpi_multioptions = 1; 154 #else /* SOCK_TEST */ 155 #define soconnect_tpi_tcp 0 156 #define soconnect_tpi_udp 0 157 #define solisten_tpi_tcp 0 158 #define soaccept_tpi_tcp 0 159 #define soaccept_tpi_multioptions 1 160 #endif /* SOCK_TEST */ 161 162 #ifdef SOCK_TEST 163 extern int do_useracc; 164 extern clock_t sock_test_timelimit; 165 #endif /* SOCK_TEST */ 166 167 extern uint32_t ucredsize; 168 169 /* 170 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 171 * applications working. Turn on this flag to disable these checks. 172 */ 173 int xnet_skip_checks = 0; 174 int xnet_check_print = 0; 175 int xnet_truncate_print = 0; 176 177 static void sotpi_destroy(struct sonode *); 178 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 179 int, int *, cred_t *cr); 180 181 static boolean_t sotpi_info_create(struct sonode *, int); 182 static void sotpi_info_init(struct sonode *); 183 static void sotpi_info_fini(struct sonode *); 184 static void sotpi_info_destroy(struct sonode *); 185 186 /* 187 * Do direct function call to the transport layer below; this would 188 * also allow the transport to utilize read-side synchronous stream 189 * interface if necessary. This is a /etc/system tunable that must 190 * not be modified on a running system. By default this is enabled 191 * for performance reasons and may be disabled for debugging purposes. 192 */ 193 boolean_t socktpi_direct = B_TRUE; 194 195 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 196 197 extern void sigintr(k_sigset_t *, int); 198 extern void sigunintr(k_sigset_t *); 199 200 static int sotpi_unbind(struct sonode *, int); 201 202 /* TPI sockfs sonode operations */ 203 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 204 int); 205 static int sotpi_accept(struct sonode *, int, struct cred *, 206 struct sonode **); 207 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 208 int, struct cred *); 209 static int sotpi_listen(struct sonode *, int, struct cred *); 210 static int sotpi_connect(struct sonode *, struct sockaddr *, 211 socklen_t, int, int, struct cred *); 212 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 213 struct uio *, struct cred *); 214 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 215 struct uio *, struct cred *); 216 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 217 struct cred *, mblk_t **); 218 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 219 struct uio *, void *, t_uscalar_t, int); 220 static int sodgram_direct(struct sonode *, struct sockaddr *, 221 socklen_t, struct uio *, int); 222 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 223 socklen_t *, boolean_t, struct cred *); 224 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 225 socklen_t *, struct cred *); 226 static int sotpi_shutdown(struct sonode *, int, struct cred *); 227 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 228 socklen_t *, int, struct cred *); 229 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 230 socklen_t, struct cred *); 231 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 232 int32_t *); 233 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 234 struct cred *, int32_t *); 235 static int sotpi_poll(struct sonode *, short, int, short *, 236 struct pollhead **); 237 static int sotpi_close(struct sonode *, int, struct cred *); 238 239 static int i_sotpi_info_constructor(sotpi_info_t *); 240 static void i_sotpi_info_destructor(sotpi_info_t *); 241 242 sonodeops_t sotpi_sonodeops = { 243 sotpi_init, /* sop_init */ 244 sotpi_accept, /* sop_accept */ 245 sotpi_bind, /* sop_bind */ 246 sotpi_listen, /* sop_listen */ 247 sotpi_connect, /* sop_connect */ 248 sotpi_recvmsg, /* sop_recvmsg */ 249 sotpi_sendmsg, /* sop_sendmsg */ 250 sotpi_sendmblk, /* sop_sendmblk */ 251 sotpi_getpeername, /* sop_getpeername */ 252 sotpi_getsockname, /* sop_getsockname */ 253 sotpi_shutdown, /* sop_shutdown */ 254 sotpi_getsockopt, /* sop_getsockopt */ 255 sotpi_setsockopt, /* sop_setsockopt */ 256 sotpi_ioctl, /* sop_ioctl */ 257 sotpi_poll, /* sop_poll */ 258 sotpi_close, /* sop_close */ 259 }; 260 261 /* 262 * Return a TPI socket vnode. 263 * 264 * Note that sockets assume that the driver will clone (either itself 265 * or by using the clone driver) i.e. a socket() call will always 266 * result in a new vnode being created. 267 */ 268 269 /* 270 * Common create code for socket and accept. If tso is set the values 271 * from that node is used instead of issuing a T_INFO_REQ. 272 */ 273 274 /* ARGSUSED */ 275 static struct sonode * 276 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 277 int version, int sflags, int *errorp, cred_t *cr) 278 { 279 struct sonode *so; 280 kmem_cache_t *cp; 281 int sfamily = family; 282 283 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 284 285 if (family == AF_NCA) { 286 /* 287 * The request is for an NCA socket so for NL7C use the 288 * INET domain instead and mark NL7C_AF_NCA below. 289 */ 290 family = AF_INET; 291 /* 292 * NL7C is not supported in the non-global zone, 293 * we enforce this restriction here. 294 */ 295 if (getzoneid() != GLOBAL_ZONEID) { 296 *errorp = ENOTSUP; 297 return (NULL); 298 } 299 } 300 301 /* 302 * to be compatible with old tpi socket implementation ignore 303 * sleep flag (sflags) passed in 304 */ 305 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 306 so = kmem_cache_alloc(cp, KM_SLEEP); 307 if (so == NULL) { 308 *errorp = ENOMEM; 309 return (NULL); 310 } 311 312 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 313 sotpi_info_init(so); 314 315 if (version == SOV_DEFAULT) 316 version = so_default_version; 317 318 so->so_version = (short)version; 319 *errorp = 0; 320 321 return (so); 322 } 323 324 static void 325 sotpi_destroy(struct sonode *so) 326 { 327 kmem_cache_t *cp; 328 struct sockparams *origsp; 329 330 /* 331 * If there is a new dealloc function (ie. smod_destroy_func), 332 * then it should check the correctness of the ops. 333 */ 334 335 ASSERT(so->so_ops == &sotpi_sonodeops); 336 337 origsp = SOTOTPI(so)->sti_orig_sp; 338 339 sotpi_info_fini(so); 340 341 if (so->so_state & SS_FALLBACK_COMP) { 342 /* 343 * A fallback happend, which means that a sotpi_info_t struct 344 * was allocated (as opposed to being allocated from the TPI 345 * sonode cache. Therefore we explicitly free the struct 346 * here. 347 */ 348 sotpi_info_destroy(so); 349 ASSERT(origsp != NULL); 350 351 origsp->sp_smod_info->smod_sock_destroy_func(so); 352 SOCKPARAMS_DEC_REF(origsp); 353 } else { 354 sonode_fini(so); 355 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 356 socktpi_cache; 357 kmem_cache_free(cp, so); 358 } 359 } 360 361 /* ARGSUSED1 */ 362 int 363 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 364 { 365 major_t maj; 366 dev_t newdev; 367 struct vnode *vp; 368 int error = 0; 369 struct stdata *stp; 370 371 sotpi_info_t *sti = SOTOTPI(so); 372 373 dprint(1, ("sotpi_init()\n")); 374 375 /* 376 * over write the sleep flag passed in but that is ok 377 * as tpi socket does not honor sleep flag. 378 */ 379 flags |= FREAD|FWRITE; 380 381 /* 382 * Record in so_flag that it is a clone. 383 */ 384 if (getmajor(sti->sti_dev) == clone_major) 385 so->so_flag |= SOCLONE; 386 387 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 388 (so->so_family == AF_INET || so->so_family == AF_INET6) && 389 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 390 so->so_protocol == IPPROTO_IP)) { 391 /* Tell tcp or udp that it's talking to sockets */ 392 flags |= SO_SOCKSTR; 393 394 /* 395 * Here we indicate to socktpi_open() our attempt to 396 * make direct calls between sockfs and transport. 397 * The final decision is left to socktpi_open(). 398 */ 399 sti->sti_direct = 1; 400 401 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 402 if (so->so_type == SOCK_STREAM && tso != NULL) { 403 if (SOTOTPI(tso)->sti_direct) { 404 /* 405 * Inherit sti_direct from listener and pass 406 * SO_ACCEPTOR open flag to tcp, indicating 407 * that this is an accept fast-path instance. 408 */ 409 flags |= SO_ACCEPTOR; 410 } else { 411 /* 412 * sti_direct is not set on listener, meaning 413 * that the listener has been converted from 414 * a socket to a stream. Ensure that the 415 * acceptor inherits these settings. 416 */ 417 sti->sti_direct = 0; 418 flags &= ~SO_SOCKSTR; 419 } 420 } 421 } 422 423 /* 424 * Tell local transport that it is talking to sockets. 425 */ 426 if (so->so_family == AF_UNIX) { 427 flags |= SO_SOCKSTR; 428 } 429 430 vp = SOTOV(so); 431 newdev = vp->v_rdev; 432 maj = getmajor(newdev); 433 ASSERT(STREAMSTAB(maj)); 434 435 error = stropen(vp, &newdev, flags, cr); 436 437 stp = vp->v_stream; 438 if (error == 0) { 439 if (so->so_flag & SOCLONE) 440 ASSERT(newdev != vp->v_rdev); 441 mutex_enter(&so->so_lock); 442 sti->sti_dev = newdev; 443 vp->v_rdev = newdev; 444 mutex_exit(&so->so_lock); 445 446 if (stp->sd_flag & STRISTTY) { 447 /* 448 * this is a post SVR4 tty driver - a socket can not 449 * be a controlling terminal. Fail the open. 450 */ 451 (void) sotpi_close(so, flags, cr); 452 return (ENOTTY); /* XXX */ 453 } 454 455 ASSERT(stp->sd_wrq != NULL); 456 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 457 458 /* 459 * If caller is interested in doing direct function call 460 * interface to/from transport module, probe the module 461 * directly beneath the streamhead to see if it qualifies. 462 * 463 * We turn off the direct interface when qualifications fail. 464 * In the acceptor case, we simply turn off the sti_direct 465 * flag on the socket. We do the fallback after the accept 466 * has completed, before the new socket is returned to the 467 * application. 468 */ 469 if (sti->sti_direct) { 470 queue_t *tq = stp->sd_wrq->q_next; 471 472 /* 473 * sti_direct is currently supported and tested 474 * only for tcp/udp; this is the main reason to 475 * have the following assertions. 476 */ 477 ASSERT(so->so_family == AF_INET || 478 so->so_family == AF_INET6); 479 ASSERT(so->so_protocol == IPPROTO_UDP || 480 so->so_protocol == IPPROTO_TCP || 481 so->so_protocol == IPPROTO_IP); 482 ASSERT(so->so_type == SOCK_DGRAM || 483 so->so_type == SOCK_STREAM); 484 485 /* 486 * Abort direct call interface if the module directly 487 * underneath the stream head is not defined with the 488 * _D_DIRECT flag. This could happen in the tcp or 489 * udp case, when some other module is autopushed 490 * above it, or for some reasons the expected module 491 * isn't purely D_MP (which is the main requirement). 492 */ 493 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 494 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 495 int rval; 496 497 /* Continue on without direct calls */ 498 sti->sti_direct = 0; 499 500 /* 501 * Cannot issue ioctl on fallback socket since 502 * there is no conn associated with the queue. 503 * The fallback downcall will notify the proto 504 * of the change. 505 */ 506 if (!(flags & SO_ACCEPTOR) && 507 !(flags & SO_FALLBACK)) { 508 if ((error = strioctl(vp, 509 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 510 cr, &rval)) != 0) { 511 (void) sotpi_close(so, flags, 512 cr); 513 return (error); 514 } 515 } 516 } 517 } 518 519 if (flags & SO_FALLBACK) { 520 /* 521 * The stream created does not have a conn. 522 * do stream set up after conn has been assigned 523 */ 524 return (error); 525 } 526 if (error = so_strinit(so, tso)) { 527 (void) sotpi_close(so, flags, cr); 528 return (error); 529 } 530 531 /* Enable sendfile() on AF_UNIX streams */ 532 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) { 533 mutex_enter(&so->so_lock); 534 so->so_mode |= SM_SENDFILESUPP; 535 mutex_exit(&so->so_lock); 536 } 537 538 /* Wildcard */ 539 if (so->so_protocol != so->so_sockparams->sp_protocol) { 540 int protocol = so->so_protocol; 541 /* 542 * Issue SO_PROTOTYPE setsockopt. 543 */ 544 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 545 &protocol, (t_uscalar_t)sizeof (protocol), cr); 546 if (error != 0) { 547 (void) sotpi_close(so, flags, cr); 548 /* 549 * Setsockopt often fails with ENOPROTOOPT but 550 * socket() should fail with 551 * EPROTONOSUPPORT/EPROTOTYPE. 552 */ 553 return (EPROTONOSUPPORT); 554 } 555 } 556 557 } else { 558 /* 559 * While the same socket can not be reopened (unlike specfs) 560 * the stream head sets STREOPENFAIL when the autopush fails. 561 */ 562 if ((stp != NULL) && 563 (stp->sd_flag & STREOPENFAIL)) { 564 /* 565 * Open failed part way through. 566 */ 567 mutex_enter(&stp->sd_lock); 568 stp->sd_flag &= ~STREOPENFAIL; 569 mutex_exit(&stp->sd_lock); 570 (void) sotpi_close(so, flags, cr); 571 return (error); 572 /*NOTREACHED*/ 573 } 574 ASSERT(stp == NULL); 575 } 576 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 577 "sockfs open:maj %d vp %p so %p error %d", 578 maj, vp, so, error); 579 return (error); 580 } 581 582 /* 583 * Bind the socket to an unspecified address in sockfs only. 584 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 585 * required in all cases. 586 */ 587 static void 588 so_automatic_bind(struct sonode *so) 589 { 590 sotpi_info_t *sti = SOTOTPI(so); 591 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 592 593 ASSERT(MUTEX_HELD(&so->so_lock)); 594 ASSERT(!(so->so_state & SS_ISBOUND)); 595 ASSERT(sti->sti_unbind_mp); 596 597 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 598 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 599 sti->sti_laddr_sa->sa_family = so->so_family; 600 so->so_state |= SS_ISBOUND; 601 } 602 603 604 /* 605 * bind the socket. 606 * 607 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 608 * are passed in we allow rebinding. Note that for backwards compatibility 609 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 610 * Thus the rebinding code is currently not executed. 611 * 612 * The constraints for rebinding are: 613 * - it is a SOCK_DGRAM, or 614 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 615 * and no listen() has been done. 616 * This rebinding code was added based on some language in the XNET book 617 * about not returning EINVAL it the protocol allows rebinding. However, 618 * this language is not present in the Posix socket draft. Thus maybe the 619 * rebinding logic should be deleted from the source. 620 * 621 * A null "name" can be used to unbind the socket if: 622 * - it is a SOCK_DGRAM, or 623 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 624 * and no listen() has been done. 625 */ 626 /* ARGSUSED */ 627 static int 628 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 629 socklen_t namelen, int backlog, int flags, struct cred *cr) 630 { 631 struct T_bind_req bind_req; 632 struct T_bind_ack *bind_ack; 633 int error = 0; 634 mblk_t *mp; 635 void *addr; 636 t_uscalar_t addrlen; 637 int unbind_on_err = 1; 638 boolean_t clear_acceptconn_on_err = B_FALSE; 639 boolean_t restore_backlog_on_err = B_FALSE; 640 int save_so_backlog; 641 t_scalar_t PRIM_type = O_T_BIND_REQ; 642 boolean_t tcp_udp_xport; 643 sotpi_info_t *sti = SOTOTPI(so); 644 645 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 646 (void *)so, (void *)name, namelen, backlog, flags, 647 pr_state(so->so_state, so->so_mode))); 648 649 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 650 651 if (!(flags & _SOBIND_LOCK_HELD)) { 652 mutex_enter(&so->so_lock); 653 so_lock_single(so); /* Set SOLOCKED */ 654 } else { 655 ASSERT(MUTEX_HELD(&so->so_lock)); 656 ASSERT(so->so_flag & SOLOCKED); 657 } 658 659 /* 660 * Make sure that there is a preallocated unbind_req message 661 * before binding. This message allocated when the socket is 662 * created but it might be have been consumed. 663 */ 664 if (sti->sti_unbind_mp == NULL) { 665 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 666 /* NOTE: holding so_lock while sleeping */ 667 sti->sti_unbind_mp = 668 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 669 cr); 670 } 671 672 if (flags & _SOBIND_REBIND) { 673 /* 674 * Called from solisten after doing an sotpi_unbind() or 675 * potentially without the unbind (latter for AF_INET{,6}). 676 */ 677 ASSERT(name == NULL && namelen == 0); 678 679 if (so->so_family == AF_UNIX) { 680 ASSERT(sti->sti_ux_bound_vp); 681 addr = &sti->sti_ux_laddr; 682 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 683 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 684 "addr 0x%p, vp %p\n", 685 addrlen, 686 (void *)((struct so_ux_addr *)addr)->soua_vp, 687 (void *)sti->sti_ux_bound_vp)); 688 } else { 689 addr = sti->sti_laddr_sa; 690 addrlen = (t_uscalar_t)sti->sti_laddr_len; 691 } 692 } else if (flags & _SOBIND_UNSPEC) { 693 ASSERT(name == NULL && namelen == 0); 694 695 /* 696 * The caller checked SS_ISBOUND but not necessarily 697 * under so_lock 698 */ 699 if (so->so_state & SS_ISBOUND) { 700 /* No error */ 701 goto done; 702 } 703 704 /* Set an initial local address */ 705 switch (so->so_family) { 706 case AF_UNIX: 707 /* 708 * Use an address with same size as struct sockaddr 709 * just like BSD. 710 */ 711 sti->sti_laddr_len = 712 (socklen_t)sizeof (struct sockaddr); 713 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 714 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 715 sti->sti_laddr_sa->sa_family = so->so_family; 716 717 /* 718 * Pass down an address with the implicit bind 719 * magic number and the rest all zeros. 720 * The transport will return a unique address. 721 */ 722 sti->sti_ux_laddr.soua_vp = NULL; 723 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 724 addr = &sti->sti_ux_laddr; 725 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 726 break; 727 728 case AF_INET: 729 case AF_INET6: 730 /* 731 * An unspecified bind in TPI has a NULL address. 732 * Set the address in sockfs to have the sa_family. 733 */ 734 sti->sti_laddr_len = (so->so_family == AF_INET) ? 735 (socklen_t)sizeof (sin_t) : 736 (socklen_t)sizeof (sin6_t); 737 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 738 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 739 sti->sti_laddr_sa->sa_family = so->so_family; 740 addr = NULL; 741 addrlen = 0; 742 break; 743 744 default: 745 /* 746 * An unspecified bind in TPI has a NULL address. 747 * Set the address in sockfs to be zero length. 748 * 749 * Can not assume there is a sa_family for all 750 * protocol families. For example, AF_X25 does not 751 * have a family field. 752 */ 753 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 754 sti->sti_laddr_len = 0; /* XXX correct? */ 755 addr = NULL; 756 addrlen = 0; 757 break; 758 } 759 760 } else { 761 if (so->so_state & SS_ISBOUND) { 762 /* 763 * If it is ok to rebind the socket, first unbind 764 * with the transport. A rebind to the NULL address 765 * is interpreted as an unbind. 766 * Note that a bind to NULL in BSD does unbind the 767 * socket but it fails with EINVAL. 768 * Note that regular sockets set SOV_SOCKBSD i.e. 769 * _SOBIND_SOCKBSD gets set here hence no type of 770 * socket does currently allow rebinding. 771 * 772 * If the name is NULL just do an unbind. 773 */ 774 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 775 name != NULL) { 776 error = EINVAL; 777 unbind_on_err = 0; 778 eprintsoline(so, error); 779 goto done; 780 } 781 if ((so->so_mode & SM_CONNREQUIRED) && 782 (so->so_state & SS_CANTREBIND)) { 783 error = EINVAL; 784 unbind_on_err = 0; 785 eprintsoline(so, error); 786 goto done; 787 } 788 error = sotpi_unbind(so, 0); 789 if (error) { 790 eprintsoline(so, error); 791 goto done; 792 } 793 ASSERT(!(so->so_state & SS_ISBOUND)); 794 if (name == NULL) { 795 so->so_state &= 796 ~(SS_ISCONNECTED|SS_ISCONNECTING); 797 goto done; 798 } 799 } 800 801 /* X/Open requires this check */ 802 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 803 if (xnet_check_print) { 804 printf("sockfs: X/Open bind state check " 805 "caused EINVAL\n"); 806 } 807 error = EINVAL; 808 goto done; 809 } 810 811 switch (so->so_family) { 812 case AF_UNIX: 813 /* 814 * All AF_UNIX addresses are nul terminated 815 * when copied (copyin_name) in so the minimum 816 * length is 3 bytes. 817 */ 818 if (name == NULL || 819 (ssize_t)namelen <= sizeof (short) + 1) { 820 error = EISDIR; 821 eprintsoline(so, error); 822 goto done; 823 } 824 /* 825 * Verify so_family matches the bound family. 826 * BSD does not check this for AF_UNIX resulting 827 * in funny mknods. 828 */ 829 if (name->sa_family != so->so_family) { 830 error = EAFNOSUPPORT; 831 goto done; 832 } 833 break; 834 case AF_INET: 835 if (name == NULL) { 836 error = EINVAL; 837 eprintsoline(so, error); 838 goto done; 839 } 840 if ((size_t)namelen != sizeof (sin_t)) { 841 error = name->sa_family != so->so_family ? 842 EAFNOSUPPORT : EINVAL; 843 eprintsoline(so, error); 844 goto done; 845 } 846 if ((flags & _SOBIND_XPG4_2) && 847 (name->sa_family != so->so_family)) { 848 /* 849 * This check has to be made for X/Open 850 * sockets however application failures have 851 * been observed when it is applied to 852 * all sockets. 853 */ 854 error = EAFNOSUPPORT; 855 eprintsoline(so, error); 856 goto done; 857 } 858 /* 859 * Force a zero sa_family to match so_family. 860 * 861 * Some programs like inetd(8) don't set the 862 * family field. Other programs leave 863 * sin_family set to garbage - SunOS 4.X does 864 * not check the family field on a bind. 865 * We use the family field that 866 * was passed in to the socket() call. 867 */ 868 name->sa_family = so->so_family; 869 break; 870 871 case AF_INET6: { 872 #ifdef DEBUG 873 sin6_t *sin6 = (sin6_t *)name; 874 #endif /* DEBUG */ 875 876 if (name == NULL) { 877 error = EINVAL; 878 eprintsoline(so, error); 879 goto done; 880 } 881 if ((size_t)namelen != sizeof (sin6_t)) { 882 error = name->sa_family != so->so_family ? 883 EAFNOSUPPORT : EINVAL; 884 eprintsoline(so, error); 885 goto done; 886 } 887 if (name->sa_family != so->so_family) { 888 /* 889 * With IPv6 we require the family to match 890 * unlike in IPv4. 891 */ 892 error = EAFNOSUPPORT; 893 eprintsoline(so, error); 894 goto done; 895 } 896 #ifdef DEBUG 897 /* 898 * Verify that apps don't forget to clear 899 * sin6_scope_id etc 900 */ 901 if (sin6->sin6_scope_id != 0 && 902 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 903 zcmn_err(getzoneid(), CE_WARN, 904 "bind with uninitialized sin6_scope_id " 905 "(%d) on socket. Pid = %d\n", 906 (int)sin6->sin6_scope_id, 907 (int)curproc->p_pid); 908 } 909 if (sin6->__sin6_src_id != 0) { 910 zcmn_err(getzoneid(), CE_WARN, 911 "bind with uninitialized __sin6_src_id " 912 "(%d) on socket. Pid = %d\n", 913 (int)sin6->__sin6_src_id, 914 (int)curproc->p_pid); 915 } 916 #endif /* DEBUG */ 917 break; 918 } 919 default: 920 /* 921 * Don't do any length or sa_family check to allow 922 * non-sockaddr style addresses. 923 */ 924 if (name == NULL) { 925 error = EINVAL; 926 eprintsoline(so, error); 927 goto done; 928 } 929 break; 930 } 931 932 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 933 error = ENAMETOOLONG; 934 eprintsoline(so, error); 935 goto done; 936 } 937 /* 938 * Save local address. 939 */ 940 sti->sti_laddr_len = (socklen_t)namelen; 941 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 942 bcopy(name, sti->sti_laddr_sa, namelen); 943 944 addr = sti->sti_laddr_sa; 945 addrlen = (t_uscalar_t)sti->sti_laddr_len; 946 switch (so->so_family) { 947 case AF_INET6: 948 case AF_INET: 949 break; 950 case AF_UNIX: { 951 struct sockaddr_un *soun = 952 (struct sockaddr_un *)sti->sti_laddr_sa; 953 struct vnode *vp, *rvp; 954 struct vattr vattr; 955 956 ASSERT(sti->sti_ux_bound_vp == NULL); 957 /* 958 * Create vnode for the specified path name. 959 * Keep vnode held with a reference in sti_ux_bound_vp. 960 * Use the vnode pointer as the address used in the 961 * bind with the transport. 962 * 963 * Use the same mode as in BSD. In particular this does 964 * not observe the umask. 965 */ 966 /* MAXPATHLEN + soun_family + nul termination */ 967 if (sti->sti_laddr_len > 968 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 969 error = ENAMETOOLONG; 970 eprintsoline(so, error); 971 goto done; 972 } 973 vattr.va_type = VSOCK; 974 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 975 vattr.va_mask = AT_TYPE|AT_MODE; 976 /* NOTE: holding so_lock */ 977 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 978 EXCL, 0, &vp, CRMKNOD, 0, 0); 979 if (error) { 980 if (error == EEXIST) 981 error = EADDRINUSE; 982 eprintsoline(so, error); 983 goto done; 984 } 985 /* 986 * Establish pointer from the underlying filesystem 987 * vnode to the socket node. 988 * sti_ux_bound_vp and v_stream->sd_vnode form the 989 * cross-linkage between the underlying filesystem 990 * node and the socket node. 991 */ 992 993 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 994 VN_HOLD(rvp); 995 VN_RELE(vp); 996 vp = rvp; 997 } 998 999 ASSERT(SOTOV(so)->v_stream); 1000 mutex_enter(&vp->v_lock); 1001 vp->v_stream = SOTOV(so)->v_stream; 1002 sti->sti_ux_bound_vp = vp; 1003 mutex_exit(&vp->v_lock); 1004 1005 /* 1006 * Use the vnode pointer value as a unique address 1007 * (together with the magic number to avoid conflicts 1008 * with implicit binds) in the transport provider. 1009 */ 1010 sti->sti_ux_laddr.soua_vp = 1011 (void *)sti->sti_ux_bound_vp; 1012 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1013 addr = &sti->sti_ux_laddr; 1014 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1015 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1016 addrlen, 1017 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1018 break; 1019 } 1020 } /* end switch (so->so_family) */ 1021 } 1022 1023 /* 1024 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1025 * the transport can start passing up T_CONN_IND messages 1026 * as soon as it receives the bind req and strsock_proto() 1027 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1028 */ 1029 if (flags & _SOBIND_LISTEN) { 1030 if ((so->so_state & SS_ACCEPTCONN) == 0) 1031 clear_acceptconn_on_err = B_TRUE; 1032 save_so_backlog = so->so_backlog; 1033 restore_backlog_on_err = B_TRUE; 1034 so->so_state |= SS_ACCEPTCONN; 1035 so->so_backlog = backlog; 1036 } 1037 1038 /* 1039 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1040 * for other transports we will send in a O_T_BIND_REQ. 1041 */ 1042 if (tcp_udp_xport && 1043 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1044 PRIM_type = T_BIND_REQ; 1045 1046 bind_req.PRIM_type = PRIM_type; 1047 bind_req.ADDR_length = addrlen; 1048 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1049 bind_req.CONIND_number = backlog; 1050 /* NOTE: holding so_lock while sleeping */ 1051 mp = soallocproto2(&bind_req, sizeof (bind_req), 1052 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1053 sti->sti_laddr_valid = 0; 1054 1055 /* Done using sti_laddr_sa - can drop the lock */ 1056 mutex_exit(&so->so_lock); 1057 1058 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1059 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1060 if (error) { 1061 eprintsoline(so, error); 1062 mutex_enter(&so->so_lock); 1063 goto done; 1064 } 1065 1066 mutex_enter(&so->so_lock); 1067 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1068 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1069 if (error) { 1070 eprintsoline(so, error); 1071 goto done; 1072 } 1073 ASSERT(mp); 1074 /* 1075 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1076 * strsock_proto while the lock was dropped above, the bind 1077 * is allowed to complete. 1078 */ 1079 1080 /* Mark as bound. This will be undone if we detect errors below. */ 1081 if (flags & _SOBIND_NOXLATE) { 1082 ASSERT(so->so_family == AF_UNIX); 1083 sti->sti_faddr_noxlate = 1; 1084 } 1085 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1086 so->so_state |= SS_ISBOUND; 1087 ASSERT(sti->sti_unbind_mp); 1088 1089 /* note that we've already set SS_ACCEPTCONN above */ 1090 1091 /* 1092 * Recompute addrlen - an unspecied bind sent down an 1093 * address of length zero but we expect the appropriate length 1094 * in return. 1095 */ 1096 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1097 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1098 1099 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1100 /* 1101 * The alignment restriction is really too strict but 1102 * we want enough alignment to inspect the fields of 1103 * a sockaddr_in. 1104 */ 1105 addr = sogetoff(mp, bind_ack->ADDR_offset, 1106 bind_ack->ADDR_length, 1107 __TPI_ALIGN_SIZE); 1108 if (addr == NULL) { 1109 freemsg(mp); 1110 error = EPROTO; 1111 eprintsoline(so, error); 1112 goto done; 1113 } 1114 if (!(flags & _SOBIND_UNSPEC)) { 1115 /* 1116 * Verify that the transport didn't return something we 1117 * did not want e.g. an address other than what we asked for. 1118 * 1119 * NOTE: These checks would go away if/when we switch to 1120 * using the new TPI (in which the transport would fail 1121 * the request instead of assigning a different address). 1122 * 1123 * NOTE2: For protocols that we don't know (i.e. any 1124 * other than AF_INET6, AF_INET and AF_UNIX), we 1125 * cannot know if the transport should be expected to 1126 * return the same address as that requested. 1127 * 1128 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1129 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1130 * 1131 * For example, in the case of netatalk it may be 1132 * inappropriate for the transport to return the 1133 * requested address (as it may have allocated a local 1134 * port number in behaviour similar to that of an 1135 * AF_INET bind request with a port number of zero). 1136 * 1137 * Given the definition of O_T_BIND_REQ, where the 1138 * transport may bind to an address other than the 1139 * requested address, it's not possible to determine 1140 * whether a returned address that differs from the 1141 * requested address is a reason to fail (because the 1142 * requested address was not available) or succeed 1143 * (because the transport allocated an appropriate 1144 * address and/or port). 1145 * 1146 * sockfs currently requires that the transport return 1147 * the requested address in the T_BIND_ACK, unless 1148 * there is code here to allow for any discrepancy. 1149 * Such code exists for AF_INET and AF_INET6. 1150 * 1151 * Netatalk chooses to return the requested address 1152 * rather than the (correct) allocated address. This 1153 * means that netatalk violates the TPI specification 1154 * (and would not function correctly if used from a 1155 * TLI application), but it does mean that it works 1156 * with sockfs. 1157 * 1158 * As noted above, using the newer XTI bind primitive 1159 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1160 * allow sockfs to be more sure about whether or not 1161 * the bind request had succeeded (as transports are 1162 * not permitted to bind to a different address than 1163 * that requested - they must return failure). 1164 * Unfortunately, support for T_BIND_REQ may not be 1165 * present in all transport implementations (netatalk, 1166 * for example, doesn't have it), making the 1167 * transition difficult. 1168 */ 1169 if (bind_ack->ADDR_length != addrlen) { 1170 /* Assumes that the requested address was in use */ 1171 freemsg(mp); 1172 error = EADDRINUSE; 1173 eprintsoline(so, error); 1174 goto done; 1175 } 1176 1177 switch (so->so_family) { 1178 case AF_INET6: 1179 case AF_INET: { 1180 sin_t *rname, *aname; 1181 1182 rname = (sin_t *)addr; 1183 aname = (sin_t *)sti->sti_laddr_sa; 1184 1185 /* 1186 * Take advantage of the alignment 1187 * of sin_port and sin6_port which fall 1188 * in the same place in their data structures. 1189 * Just use sin_port for either address family. 1190 * 1191 * This may become a problem if (heaven forbid) 1192 * there's a separate ipv6port_reserved... :-P 1193 * 1194 * Binding to port 0 has the semantics of letting 1195 * the transport bind to any port. 1196 * 1197 * If the transport is TCP or UDP since we had sent 1198 * a T_BIND_REQ we would not get a port other than 1199 * what we asked for. 1200 */ 1201 if (tcp_udp_xport) { 1202 /* 1203 * Pick up the new port number if we bound to 1204 * port 0. 1205 */ 1206 if (aname->sin_port == 0) 1207 aname->sin_port = rname->sin_port; 1208 sti->sti_laddr_valid = 1; 1209 break; 1210 } 1211 if (aname->sin_port != 0 && 1212 aname->sin_port != rname->sin_port) { 1213 freemsg(mp); 1214 error = EADDRINUSE; 1215 eprintsoline(so, error); 1216 goto done; 1217 } 1218 /* 1219 * Pick up the new port number if we bound to port 0. 1220 */ 1221 aname->sin_port = rname->sin_port; 1222 1223 /* 1224 * Unfortunately, addresses aren't _quite_ the same. 1225 */ 1226 if (so->so_family == AF_INET) { 1227 if (aname->sin_addr.s_addr != 1228 rname->sin_addr.s_addr) { 1229 freemsg(mp); 1230 error = EADDRNOTAVAIL; 1231 eprintsoline(so, error); 1232 goto done; 1233 } 1234 } else { 1235 sin6_t *rname6 = (sin6_t *)rname; 1236 sin6_t *aname6 = (sin6_t *)aname; 1237 1238 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1239 &rname6->sin6_addr)) { 1240 freemsg(mp); 1241 error = EADDRNOTAVAIL; 1242 eprintsoline(so, error); 1243 goto done; 1244 } 1245 } 1246 break; 1247 } 1248 case AF_UNIX: 1249 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1250 freemsg(mp); 1251 error = EADDRINUSE; 1252 eprintsoline(so, error); 1253 eprintso(so, 1254 ("addrlen %d, addr 0x%x, vp %p\n", 1255 addrlen, *((int *)addr), 1256 (void *)sti->sti_ux_bound_vp)); 1257 goto done; 1258 } 1259 sti->sti_laddr_valid = 1; 1260 break; 1261 default: 1262 /* 1263 * NOTE: This assumes that addresses can be 1264 * byte-compared for equivalence. 1265 */ 1266 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1267 freemsg(mp); 1268 error = EADDRINUSE; 1269 eprintsoline(so, error); 1270 goto done; 1271 } 1272 /* 1273 * Don't mark sti_laddr_valid, as we cannot be 1274 * sure that the returned address is the real 1275 * bound address when talking to an unknown 1276 * transport. 1277 */ 1278 break; 1279 } 1280 } else { 1281 /* 1282 * Save for returned address for getsockname. 1283 * Needed for unspecific bind unless transport supports 1284 * the TI_GETMYNAME ioctl. 1285 * Do this for AF_INET{,6} even though they do, as 1286 * caching info here is much better performance than 1287 * a TPI/STREAMS trip to the transport for getsockname. 1288 * Any which can't for some reason _must_ _not_ set 1289 * sti_laddr_valid here for the caching version of 1290 * getsockname to not break; 1291 */ 1292 switch (so->so_family) { 1293 case AF_UNIX: 1294 /* 1295 * Record the address bound with the transport 1296 * for use by socketpair. 1297 */ 1298 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1299 sti->sti_laddr_valid = 1; 1300 break; 1301 case AF_INET: 1302 case AF_INET6: 1303 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1304 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1305 sti->sti_laddr_valid = 1; 1306 break; 1307 default: 1308 /* 1309 * Don't mark sti_laddr_valid, as we cannot be 1310 * sure that the returned address is the real 1311 * bound address when talking to an unknown 1312 * transport. 1313 */ 1314 break; 1315 } 1316 } 1317 1318 freemsg(mp); 1319 1320 done: 1321 if (error) { 1322 /* reset state & backlog to values held on entry */ 1323 if (clear_acceptconn_on_err == B_TRUE) 1324 so->so_state &= ~SS_ACCEPTCONN; 1325 if (restore_backlog_on_err == B_TRUE) 1326 so->so_backlog = save_so_backlog; 1327 1328 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1329 int err; 1330 1331 err = sotpi_unbind(so, 0); 1332 /* LINTED - statement has no consequent: if */ 1333 if (err) { 1334 eprintsoline(so, error); 1335 } else { 1336 ASSERT(!(so->so_state & SS_ISBOUND)); 1337 } 1338 } 1339 } 1340 if (!(flags & _SOBIND_LOCK_HELD)) { 1341 so_unlock_single(so, SOLOCKED); 1342 mutex_exit(&so->so_lock); 1343 } else { 1344 ASSERT(MUTEX_HELD(&so->so_lock)); 1345 ASSERT(so->so_flag & SOLOCKED); 1346 } 1347 return (error); 1348 } 1349 1350 /* bind the socket */ 1351 static int 1352 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1353 int flags, struct cred *cr) 1354 { 1355 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1356 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1357 1358 flags &= ~_SOBIND_SOCKETPAIR; 1359 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1360 } 1361 1362 /* 1363 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1364 * address, or when listen needs to unbind and bind. 1365 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1366 * so that a sobind can pick them up. 1367 */ 1368 static int 1369 sotpi_unbind(struct sonode *so, int flags) 1370 { 1371 struct T_unbind_req unbind_req; 1372 int error = 0; 1373 mblk_t *mp; 1374 sotpi_info_t *sti = SOTOTPI(so); 1375 1376 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1377 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1378 1379 ASSERT(MUTEX_HELD(&so->so_lock)); 1380 ASSERT(so->so_flag & SOLOCKED); 1381 1382 if (!(so->so_state & SS_ISBOUND)) { 1383 error = EINVAL; 1384 eprintsoline(so, error); 1385 goto done; 1386 } 1387 1388 mutex_exit(&so->so_lock); 1389 1390 /* 1391 * Flush the read and write side (except stream head read queue) 1392 * and send down T_UNBIND_REQ. 1393 */ 1394 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1395 1396 unbind_req.PRIM_type = T_UNBIND_REQ; 1397 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1398 0, _ALLOC_SLEEP, CRED()); 1399 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1400 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1401 mutex_enter(&so->so_lock); 1402 if (error) { 1403 eprintsoline(so, error); 1404 goto done; 1405 } 1406 1407 error = sowaitokack(so, T_UNBIND_REQ); 1408 if (error) { 1409 eprintsoline(so, error); 1410 goto done; 1411 } 1412 1413 /* 1414 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1415 * strsock_proto while the lock was dropped above, the unbind 1416 * is allowed to complete. 1417 */ 1418 if (!(flags & _SOUNBIND_REBIND)) { 1419 /* 1420 * Clear out bound address. 1421 */ 1422 vnode_t *vp; 1423 1424 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1425 sti->sti_ux_bound_vp = NULL; 1426 vn_rele_stream(vp); 1427 } 1428 /* Clear out address */ 1429 sti->sti_laddr_len = 0; 1430 } 1431 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1432 sti->sti_laddr_valid = 0; 1433 1434 done: 1435 1436 /* If the caller held the lock don't release it here */ 1437 ASSERT(MUTEX_HELD(&so->so_lock)); 1438 ASSERT(so->so_flag & SOLOCKED); 1439 1440 return (error); 1441 } 1442 1443 /* 1444 * listen on the socket. 1445 * For TPI conforming transports this has to first unbind with the transport 1446 * and then bind again using the new backlog. 1447 */ 1448 /* ARGSUSED */ 1449 int 1450 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1451 { 1452 int error = 0; 1453 sotpi_info_t *sti = SOTOTPI(so); 1454 1455 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1456 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1457 1458 if (sti->sti_serv_type == T_CLTS) 1459 return (EOPNOTSUPP); 1460 1461 /* 1462 * If the socket is ready to accept connections already, then 1463 * return without doing anything. This avoids a problem where 1464 * a second listen() call fails if a connection is pending and 1465 * leaves the socket unbound. Only when we are not unbinding 1466 * with the transport can we safely increase the backlog. 1467 */ 1468 if (so->so_state & SS_ACCEPTCONN && 1469 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1470 /*CONSTCOND*/ 1471 !solisten_tpi_tcp)) 1472 return (0); 1473 1474 if (so->so_state & SS_ISCONNECTED) 1475 return (EINVAL); 1476 1477 mutex_enter(&so->so_lock); 1478 so_lock_single(so); /* Set SOLOCKED */ 1479 1480 /* 1481 * If the listen doesn't change the backlog we do nothing. 1482 * This avoids an EPROTO error from the transport. 1483 */ 1484 if ((so->so_state & SS_ACCEPTCONN) && 1485 so->so_backlog == backlog) 1486 goto done; 1487 1488 if (!(so->so_state & SS_ISBOUND)) { 1489 /* 1490 * Must have been explicitly bound in the UNIX domain. 1491 */ 1492 if (so->so_family == AF_UNIX) { 1493 error = EINVAL; 1494 goto done; 1495 } 1496 error = sotpi_bindlisten(so, NULL, 0, backlog, 1497 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1498 } else if (backlog > 0) { 1499 /* 1500 * AF_INET{,6} hack to avoid losing the port. 1501 * Assumes that all AF_INET{,6} transports can handle a 1502 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1503 * has already bound thus it is possible to avoid the unbind. 1504 */ 1505 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1506 /*CONSTCOND*/ 1507 !solisten_tpi_tcp)) { 1508 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1509 if (error) 1510 goto done; 1511 } 1512 error = sotpi_bindlisten(so, NULL, 0, backlog, 1513 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1514 } else { 1515 so->so_state |= SS_ACCEPTCONN; 1516 so->so_backlog = backlog; 1517 } 1518 if (error) 1519 goto done; 1520 ASSERT(so->so_state & SS_ACCEPTCONN); 1521 done: 1522 so_unlock_single(so, SOLOCKED); 1523 mutex_exit(&so->so_lock); 1524 return (error); 1525 } 1526 1527 /* 1528 * Disconnect either a specified seqno or all (-1). 1529 * The former is used on listening sockets only. 1530 * 1531 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1532 * the current use of sodisconnect(seqno == -1) is only for shutdown 1533 * so there is no point (and potentially incorrect) to unbind. 1534 */ 1535 static int 1536 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1537 { 1538 struct T_discon_req discon_req; 1539 int error = 0; 1540 mblk_t *mp; 1541 1542 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1543 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1544 1545 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1546 mutex_enter(&so->so_lock); 1547 so_lock_single(so); /* Set SOLOCKED */ 1548 } else { 1549 ASSERT(MUTEX_HELD(&so->so_lock)); 1550 ASSERT(so->so_flag & SOLOCKED); 1551 } 1552 1553 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1554 error = EINVAL; 1555 eprintsoline(so, error); 1556 goto done; 1557 } 1558 1559 mutex_exit(&so->so_lock); 1560 /* 1561 * Flush the write side (unless this is a listener) 1562 * and then send down a T_DISCON_REQ. 1563 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1564 * and other messages.) 1565 */ 1566 if (!(so->so_state & SS_ACCEPTCONN)) 1567 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1568 1569 discon_req.PRIM_type = T_DISCON_REQ; 1570 discon_req.SEQ_number = seqno; 1571 mp = soallocproto1(&discon_req, sizeof (discon_req), 1572 0, _ALLOC_SLEEP, CRED()); 1573 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1574 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1575 mutex_enter(&so->so_lock); 1576 if (error) { 1577 eprintsoline(so, error); 1578 goto done; 1579 } 1580 1581 error = sowaitokack(so, T_DISCON_REQ); 1582 if (error) { 1583 eprintsoline(so, error); 1584 goto done; 1585 } 1586 /* 1587 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1588 * strsock_proto while the lock was dropped above, the disconnect 1589 * is allowed to complete. However, it is not possible to 1590 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1591 */ 1592 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1593 SOTOTPI(so)->sti_laddr_valid = 0; 1594 SOTOTPI(so)->sti_faddr_valid = 0; 1595 done: 1596 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1597 so_unlock_single(so, SOLOCKED); 1598 mutex_exit(&so->so_lock); 1599 } else { 1600 /* If the caller held the lock don't release it here */ 1601 ASSERT(MUTEX_HELD(&so->so_lock)); 1602 ASSERT(so->so_flag & SOLOCKED); 1603 } 1604 return (error); 1605 } 1606 1607 /* ARGSUSED */ 1608 int 1609 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1610 struct sonode **nsop) 1611 { 1612 struct T_conn_ind *conn_ind; 1613 struct T_conn_res *conn_res; 1614 int error = 0; 1615 mblk_t *mp, *ack_mp; 1616 struct sonode *nso; 1617 vnode_t *nvp; 1618 void *src; 1619 t_uscalar_t srclen; 1620 void *opt; 1621 t_uscalar_t optlen; 1622 t_scalar_t PRIM_type; 1623 t_scalar_t SEQ_number; 1624 size_t sinlen; 1625 sotpi_info_t *sti = SOTOTPI(so); 1626 sotpi_info_t *nsti; 1627 1628 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1629 (void *)so, fflag, (void *)nsop, 1630 pr_state(so->so_state, so->so_mode))); 1631 1632 /* 1633 * Defer single-threading the accepting socket until 1634 * the T_CONN_IND has been received and parsed and the 1635 * new sonode has been opened. 1636 */ 1637 1638 /* Check that we are not already connected */ 1639 if ((so->so_state & SS_ACCEPTCONN) == 0) 1640 goto conn_bad; 1641 again: 1642 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1643 goto e_bad; 1644 1645 ASSERT(mp != NULL); 1646 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1647 1648 /* 1649 * Save SEQ_number for error paths. 1650 */ 1651 SEQ_number = conn_ind->SEQ_number; 1652 1653 srclen = conn_ind->SRC_length; 1654 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1655 if (src == NULL) { 1656 error = EPROTO; 1657 freemsg(mp); 1658 eprintsoline(so, error); 1659 goto disconnect_unlocked; 1660 } 1661 optlen = conn_ind->OPT_length; 1662 switch (so->so_family) { 1663 case AF_INET: 1664 case AF_INET6: 1665 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1666 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1667 &opt, conn_ind->OPT_length); 1668 } else { 1669 /* 1670 * The transport (in this case TCP) hasn't sent up 1671 * a pointer to an instance for the accept fast-path. 1672 * Disable fast-path completely because the call to 1673 * sotpi_create() below would otherwise create an 1674 * incomplete TCP instance, which would lead to 1675 * problems when sockfs sends a normal T_CONN_RES 1676 * message down the new stream. 1677 */ 1678 if (sti->sti_direct) { 1679 int rval; 1680 /* 1681 * For consistency we inform tcp to disable 1682 * direct interface on the listener, though 1683 * we can certainly live without doing this 1684 * because no data will ever travel upstream 1685 * on the listening socket. 1686 */ 1687 sti->sti_direct = 0; 1688 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1689 0, 0, K_TO_K, cr, &rval); 1690 } 1691 opt = NULL; 1692 optlen = 0; 1693 } 1694 break; 1695 case AF_UNIX: 1696 default: 1697 if (optlen != 0) { 1698 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1699 __TPI_ALIGN_SIZE); 1700 if (opt == NULL) { 1701 error = EPROTO; 1702 freemsg(mp); 1703 eprintsoline(so, error); 1704 goto disconnect_unlocked; 1705 } 1706 } 1707 if (so->so_family == AF_UNIX) { 1708 if (!sti->sti_faddr_noxlate) { 1709 src = NULL; 1710 srclen = 0; 1711 } 1712 /* Extract src address from options */ 1713 if (optlen != 0) 1714 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1715 } 1716 break; 1717 } 1718 1719 /* 1720 * Create the new socket. 1721 */ 1722 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1723 if (nso == NULL) { 1724 ASSERT(error != 0); 1725 /* 1726 * Accept can not fail with ENOBUFS. sotpi_create 1727 * sleeps waiting for memory until a signal is caught 1728 * so return EINTR. 1729 */ 1730 freemsg(mp); 1731 if (error == ENOBUFS) 1732 error = EINTR; 1733 goto e_disc_unl; 1734 } 1735 nvp = SOTOV(nso); 1736 nsti = SOTOTPI(nso); 1737 1738 #ifdef DEBUG 1739 /* 1740 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1741 * it's inherited early to allow debugging of the accept code itself. 1742 */ 1743 nso->so_options |= so->so_options & SO_DEBUG; 1744 #endif /* DEBUG */ 1745 1746 /* 1747 * Save the SRC address from the T_CONN_IND 1748 * for getpeername to work on AF_UNIX and on transports that do not 1749 * support TI_GETPEERNAME. 1750 * 1751 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1752 * copyin_name(). 1753 */ 1754 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1755 error = EINVAL; 1756 freemsg(mp); 1757 eprintsoline(so, error); 1758 goto disconnect_vp_unlocked; 1759 } 1760 nsti->sti_faddr_len = (socklen_t)srclen; 1761 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1762 bcopy(src, nsti->sti_faddr_sa, srclen); 1763 nsti->sti_faddr_valid = 1; 1764 1765 /* 1766 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1767 */ 1768 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1769 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1770 cred_t *cr; 1771 pid_t cpid; 1772 1773 cr = msg_getcred(mp, &cpid); 1774 if (cr != NULL) { 1775 crhold(cr); 1776 nso->so_peercred = cr; 1777 nso->so_cpid = cpid; 1778 } 1779 freemsg(mp); 1780 1781 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1782 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1783 if (mp == NULL) { 1784 /* 1785 * Accept can not fail with ENOBUFS. 1786 * A signal was caught so return EINTR. 1787 */ 1788 error = EINTR; 1789 eprintsoline(so, error); 1790 goto disconnect_vp_unlocked; 1791 } 1792 conn_res = (struct T_conn_res *)mp->b_rptr; 1793 } else { 1794 /* 1795 * For efficency reasons we use msg_extractcred; no crhold 1796 * needed since db_credp is cleared (i.e., we move the cred 1797 * from the message to so_peercred. 1798 */ 1799 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1800 1801 mp->b_rptr = DB_BASE(mp); 1802 conn_res = (struct T_conn_res *)mp->b_rptr; 1803 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1804 1805 mblk_setcred(mp, cr, curproc->p_pid); 1806 } 1807 1808 /* 1809 * New socket must be bound at least in sockfs and, except for AF_INET, 1810 * (or AF_INET6) it also has to be bound in the transport provider. 1811 * We set the local address in the sonode from the T_OK_ACK of the 1812 * T_CONN_RES. For this reason the address we bind to here isn't 1813 * important. 1814 */ 1815 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1816 /*CONSTCOND*/ 1817 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1818 /* 1819 * Optimization for AF_INET{,6} transports 1820 * that can handle a T_CONN_RES without being bound. 1821 */ 1822 mutex_enter(&nso->so_lock); 1823 so_automatic_bind(nso); 1824 mutex_exit(&nso->so_lock); 1825 } else { 1826 /* Perform NULL bind with the transport provider. */ 1827 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1828 cr)) != 0) { 1829 ASSERT(error != ENOBUFS); 1830 freemsg(mp); 1831 eprintsoline(nso, error); 1832 goto disconnect_vp_unlocked; 1833 } 1834 } 1835 1836 /* 1837 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1838 * so that any data arriving on the new socket will cause the 1839 * appropriate signals to be delivered for the new socket. 1840 * 1841 * No other thread (except strsock_proto and strsock_misc) 1842 * can access the new socket thus we relax the locking. 1843 */ 1844 nso->so_pgrp = so->so_pgrp; 1845 nso->so_state |= so->so_state & SS_ASYNC; 1846 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1847 1848 if (nso->so_pgrp != 0) { 1849 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1850 eprintsoline(nso, error); 1851 error = 0; 1852 nso->so_pgrp = 0; 1853 } 1854 } 1855 1856 /* 1857 * Make note of the socket level options. TCP and IP level options 1858 * are already inherited. We could do all this after accept is 1859 * successful but doing it here simplifies code and no harm done 1860 * for error case. 1861 */ 1862 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1863 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1864 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1865 nso->so_sndbuf = so->so_sndbuf; 1866 nso->so_rcvbuf = so->so_rcvbuf; 1867 if (nso->so_options & SO_LINGER) 1868 nso->so_linger = so->so_linger; 1869 1870 /* 1871 * Note that the following sti_direct code path should be 1872 * removed once we are confident that the direct sockets 1873 * do not result in any degradation. 1874 */ 1875 if (sti->sti_direct) { 1876 1877 ASSERT(opt != NULL); 1878 1879 conn_res->OPT_length = optlen; 1880 conn_res->OPT_offset = MBLKL(mp); 1881 bcopy(&opt, mp->b_wptr, optlen); 1882 mp->b_wptr += optlen; 1883 conn_res->PRIM_type = T_CONN_RES; 1884 conn_res->ACCEPTOR_id = 0; 1885 PRIM_type = T_CONN_RES; 1886 1887 /* Send down the T_CONN_RES on acceptor STREAM */ 1888 error = kstrputmsg(SOTOV(nso), mp, NULL, 1889 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1890 if (error) { 1891 mutex_enter(&so->so_lock); 1892 so_lock_single(so); 1893 eprintsoline(so, error); 1894 goto disconnect_vp; 1895 } 1896 mutex_enter(&nso->so_lock); 1897 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1898 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1899 if (error) { 1900 mutex_exit(&nso->so_lock); 1901 mutex_enter(&so->so_lock); 1902 so_lock_single(so); 1903 eprintsoline(so, error); 1904 goto disconnect_vp; 1905 } 1906 if (nso->so_family == AF_INET) { 1907 sin_t *sin; 1908 1909 sin = (sin_t *)(ack_mp->b_rptr + 1910 sizeof (struct T_ok_ack)); 1911 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1912 nsti->sti_laddr_len = sizeof (sin_t); 1913 } else { 1914 sin6_t *sin6; 1915 1916 sin6 = (sin6_t *)(ack_mp->b_rptr + 1917 sizeof (struct T_ok_ack)); 1918 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1919 nsti->sti_laddr_len = sizeof (sin6_t); 1920 } 1921 freemsg(ack_mp); 1922 1923 nso->so_state |= SS_ISCONNECTED; 1924 nso->so_proto_handle = (sock_lower_handle_t)opt; 1925 nsti->sti_laddr_valid = 1; 1926 1927 mutex_exit(&nso->so_lock); 1928 1929 /* 1930 * It's possible, through the use of autopush for example, 1931 * that the acceptor stream may not support sti_direct 1932 * semantics. If the new socket does not support sti_direct 1933 * we issue a _SIOCSOCKFALLBACK to inform the transport 1934 * as we would in the I_PUSH case. 1935 */ 1936 if (nsti->sti_direct == 0) { 1937 int rval; 1938 1939 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1940 0, 0, K_TO_K, cr, &rval)) != 0) { 1941 mutex_enter(&so->so_lock); 1942 so_lock_single(so); 1943 eprintsoline(so, error); 1944 goto disconnect_vp; 1945 } 1946 } 1947 1948 /* 1949 * Pass out new socket. 1950 */ 1951 if (nsop != NULL) 1952 *nsop = nso; 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1959 * which don't support the FireEngine accept fast-path. It is also 1960 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1961 * again. Neither sockfs nor TCP attempt to find out if some other 1962 * random module has been inserted in between (in which case we 1963 * should follow TLI accept behaviour). We blindly assume the worst 1964 * case and revert back to old behaviour i.e. TCP will not send us 1965 * any option (eager) and the accept should happen on the listener 1966 * queue. Any queued T_conn_ind have already got their options removed 1967 * by so_sock2_stream() when "sockmod" was I_POP'd. 1968 */ 1969 /* 1970 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1971 */ 1972 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1973 #ifdef _ILP32 1974 queue_t *q; 1975 1976 /* 1977 * Find read queue in driver 1978 * Can safely do this since we "own" nso/nvp. 1979 */ 1980 q = strvp2wq(nvp)->q_next; 1981 while (SAMESTR(q)) 1982 q = q->q_next; 1983 q = RD(q); 1984 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1985 #else 1986 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1987 #endif /* _ILP32 */ 1988 conn_res->PRIM_type = O_T_CONN_RES; 1989 PRIM_type = O_T_CONN_RES; 1990 } else { 1991 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 1992 conn_res->PRIM_type = T_CONN_RES; 1993 PRIM_type = T_CONN_RES; 1994 } 1995 conn_res->SEQ_number = SEQ_number; 1996 conn_res->OPT_length = 0; 1997 conn_res->OPT_offset = 0; 1998 1999 mutex_enter(&so->so_lock); 2000 so_lock_single(so); /* Set SOLOCKED */ 2001 mutex_exit(&so->so_lock); 2002 2003 error = kstrputmsg(SOTOV(so), mp, NULL, 2004 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2005 mutex_enter(&so->so_lock); 2006 if (error) { 2007 eprintsoline(so, error); 2008 goto disconnect_vp; 2009 } 2010 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2011 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2012 if (error) { 2013 eprintsoline(so, error); 2014 goto disconnect_vp; 2015 } 2016 mutex_exit(&so->so_lock); 2017 /* 2018 * If there is a sin/sin6 appended onto the T_OK_ACK use 2019 * that to set the local address. If this is not present 2020 * then we zero out the address and don't set the 2021 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2022 * the pathname from the listening socket. 2023 * In the case where this is TCP or an AF_UNIX socket the 2024 * client side may have queued data or a T_ORDREL in the 2025 * transport. Having now sent the T_CONN_RES we may receive 2026 * those queued messages at any time. Hold the acceptor 2027 * so_lock until its state and laddr are finalized. 2028 */ 2029 mutex_enter(&nso->so_lock); 2030 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2031 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2032 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2033 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2034 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2035 nsti->sti_laddr_len = sinlen; 2036 nsti->sti_laddr_valid = 1; 2037 } else if (nso->so_family == AF_UNIX) { 2038 ASSERT(so->so_family == AF_UNIX); 2039 nsti->sti_laddr_len = sti->sti_laddr_len; 2040 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2041 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2042 nsti->sti_laddr_len); 2043 nsti->sti_laddr_valid = 1; 2044 } else { 2045 nsti->sti_laddr_len = sti->sti_laddr_len; 2046 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2047 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2048 nsti->sti_laddr_sa->sa_family = nso->so_family; 2049 } 2050 nso->so_state |= SS_ISCONNECTED; 2051 mutex_exit(&nso->so_lock); 2052 2053 freemsg(ack_mp); 2054 2055 mutex_enter(&so->so_lock); 2056 so_unlock_single(so, SOLOCKED); 2057 mutex_exit(&so->so_lock); 2058 2059 /* 2060 * Pass out new socket. 2061 */ 2062 if (nsop != NULL) 2063 *nsop = nso; 2064 2065 return (0); 2066 2067 2068 eproto_disc_unl: 2069 error = EPROTO; 2070 e_disc_unl: 2071 eprintsoline(so, error); 2072 goto disconnect_unlocked; 2073 2074 pr_disc_vp_unl: 2075 eprintsoline(so, error); 2076 disconnect_vp_unlocked: 2077 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2078 VN_RELE(nvp); 2079 disconnect_unlocked: 2080 (void) sodisconnect(so, SEQ_number, 0); 2081 return (error); 2082 2083 pr_disc_vp: 2084 eprintsoline(so, error); 2085 disconnect_vp: 2086 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2087 so_unlock_single(so, SOLOCKED); 2088 mutex_exit(&so->so_lock); 2089 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2090 VN_RELE(nvp); 2091 return (error); 2092 2093 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2094 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2095 ? EOPNOTSUPP : EINVAL; 2096 e_bad: 2097 eprintsoline(so, error); 2098 return (error); 2099 } 2100 2101 /* 2102 * connect a socket. 2103 * 2104 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2105 * unconnect (by specifying a null address). 2106 */ 2107 int 2108 sotpi_connect(struct sonode *so, 2109 struct sockaddr *name, 2110 socklen_t namelen, 2111 int fflag, 2112 int flags, 2113 struct cred *cr) 2114 { 2115 struct T_conn_req conn_req; 2116 int error = 0; 2117 mblk_t *mp; 2118 void *src; 2119 socklen_t srclen; 2120 void *addr; 2121 socklen_t addrlen; 2122 boolean_t need_unlock; 2123 sotpi_info_t *sti = SOTOTPI(so); 2124 2125 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2126 (void *)so, (void *)name, namelen, fflag, flags, 2127 pr_state(so->so_state, so->so_mode))); 2128 2129 /* 2130 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2131 * avoid sleeping for memory with SOLOCKED held. 2132 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2133 * + sizeof (struct T_opthdr). 2134 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2135 * exceed sti_faddr_maxlen). 2136 */ 2137 mp = soallocproto(sizeof (struct T_conn_req) + 2138 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2139 cr); 2140 if (mp == NULL) { 2141 /* 2142 * Connect can not fail with ENOBUFS. A signal was 2143 * caught so return EINTR. 2144 */ 2145 error = EINTR; 2146 eprintsoline(so, error); 2147 return (error); 2148 } 2149 2150 mutex_enter(&so->so_lock); 2151 /* 2152 * Make sure there is a preallocated T_unbind_req message 2153 * before any binding. This message is allocated when the 2154 * socket is created. Since another thread can consume 2155 * so_unbind_mp by the time we return from so_lock_single(), 2156 * we should check the availability of so_unbind_mp after 2157 * we return from so_lock_single(). 2158 */ 2159 2160 so_lock_single(so); /* Set SOLOCKED */ 2161 need_unlock = B_TRUE; 2162 2163 if (sti->sti_unbind_mp == NULL) { 2164 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2165 /* NOTE: holding so_lock while sleeping */ 2166 sti->sti_unbind_mp = 2167 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2168 if (sti->sti_unbind_mp == NULL) { 2169 error = EINTR; 2170 goto done; 2171 } 2172 } 2173 2174 /* 2175 * Can't have done a listen before connecting. 2176 */ 2177 if (so->so_state & SS_ACCEPTCONN) { 2178 error = EOPNOTSUPP; 2179 goto done; 2180 } 2181 2182 /* 2183 * Must be bound with the transport 2184 */ 2185 if (!(so->so_state & SS_ISBOUND)) { 2186 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2187 /*CONSTCOND*/ 2188 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2189 /* 2190 * Optimization for AF_INET{,6} transports 2191 * that can handle a T_CONN_REQ without being bound. 2192 */ 2193 so_automatic_bind(so); 2194 } else { 2195 error = sotpi_bind(so, NULL, 0, 2196 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2197 if (error) 2198 goto done; 2199 } 2200 ASSERT(so->so_state & SS_ISBOUND); 2201 flags |= _SOCONNECT_DID_BIND; 2202 } 2203 2204 /* 2205 * Handle a connect to a name parameter of type AF_UNSPEC like a 2206 * connect to a null address. This is the portable method to 2207 * unconnect a socket. 2208 */ 2209 if ((namelen >= sizeof (sa_family_t)) && 2210 (name->sa_family == AF_UNSPEC)) { 2211 name = NULL; 2212 namelen = 0; 2213 } 2214 2215 /* 2216 * Check that we are not already connected. 2217 * A connection-oriented socket cannot be reconnected. 2218 * A connected connection-less socket can be 2219 * - connected to a different address by a subsequent connect 2220 * - "unconnected" by a connect to the NULL address 2221 */ 2222 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2223 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2224 if (so->so_mode & SM_CONNREQUIRED) { 2225 /* Connection-oriented socket */ 2226 error = so->so_state & SS_ISCONNECTED ? 2227 EISCONN : EALREADY; 2228 goto done; 2229 } 2230 /* Connection-less socket */ 2231 if (name == NULL) { 2232 /* 2233 * Remove the connected state and clear SO_DGRAM_ERRIND 2234 * since it was set when the socket was connected. 2235 * If this is UDP also send down a T_DISCON_REQ. 2236 */ 2237 int val; 2238 2239 if ((so->so_family == AF_INET || 2240 so->so_family == AF_INET6) && 2241 (so->so_type == SOCK_DGRAM || 2242 so->so_type == SOCK_RAW) && 2243 /*CONSTCOND*/ 2244 !soconnect_tpi_udp) { 2245 /* XXX What about implicitly unbinding here? */ 2246 error = sodisconnect(so, -1, 2247 _SODISCONNECT_LOCK_HELD); 2248 } else { 2249 so->so_state &= 2250 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2251 sti->sti_faddr_valid = 0; 2252 sti->sti_faddr_len = 0; 2253 } 2254 2255 /* Remove SOLOCKED since setsockopt will grab it */ 2256 so_unlock_single(so, SOLOCKED); 2257 mutex_exit(&so->so_lock); 2258 2259 val = 0; 2260 (void) sotpi_setsockopt(so, SOL_SOCKET, 2261 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2262 cr); 2263 2264 mutex_enter(&so->so_lock); 2265 so_lock_single(so); /* Set SOLOCKED */ 2266 goto done; 2267 } 2268 } 2269 ASSERT(so->so_state & SS_ISBOUND); 2270 2271 if (name == NULL || namelen == 0) { 2272 error = EINVAL; 2273 goto done; 2274 } 2275 /* 2276 * Mark the socket if sti_faddr_sa represents the transport level 2277 * address. 2278 */ 2279 if (flags & _SOCONNECT_NOXLATE) { 2280 struct sockaddr_ux *soaddr_ux; 2281 2282 ASSERT(so->so_family == AF_UNIX); 2283 if (namelen != sizeof (struct sockaddr_ux)) { 2284 error = EINVAL; 2285 goto done; 2286 } 2287 soaddr_ux = (struct sockaddr_ux *)name; 2288 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2289 namelen = sizeof (soaddr_ux->sou_addr); 2290 sti->sti_faddr_noxlate = 1; 2291 } 2292 2293 /* 2294 * Length and family checks. 2295 */ 2296 error = so_addr_verify(so, name, namelen); 2297 if (error) 2298 goto bad; 2299 2300 /* 2301 * Save foreign address. Needed for AF_UNIX as well as 2302 * transport providers that do not support TI_GETPEERNAME. 2303 * Also used for cached foreign address for TCP and UDP. 2304 */ 2305 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2306 error = EINVAL; 2307 goto done; 2308 } 2309 sti->sti_faddr_len = (socklen_t)namelen; 2310 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2311 bcopy(name, sti->sti_faddr_sa, namelen); 2312 sti->sti_faddr_valid = 1; 2313 2314 if (so->so_family == AF_UNIX) { 2315 if (sti->sti_faddr_noxlate) { 2316 /* 2317 * sti_faddr is a transport-level address, so 2318 * don't pass it as an option. Do save it in 2319 * sti_ux_faddr, used for connected DG send. 2320 */ 2321 src = NULL; 2322 srclen = 0; 2323 addr = sti->sti_faddr_sa; 2324 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2325 bcopy(addr, &sti->sti_ux_faddr, 2326 sizeof (sti->sti_ux_faddr)); 2327 } else { 2328 /* 2329 * Pass the sockaddr_un source address as an option 2330 * and translate the remote address. 2331 * Holding so_lock thus sti_laddr_sa can not change. 2332 */ 2333 src = sti->sti_laddr_sa; 2334 srclen = (t_uscalar_t)sti->sti_laddr_len; 2335 dprintso(so, 1, 2336 ("sotpi_connect UNIX: srclen %d, src %p\n", 2337 srclen, src)); 2338 /* 2339 * Translate the destination address into our 2340 * internal form, and save it in sti_ux_faddr. 2341 * After this call, addr==&sti->sti_ux_taddr, 2342 * and we copy that to sti->sti_ux_faddr so 2343 * we save the connected peer address. 2344 */ 2345 error = so_ux_addr_xlate(so, 2346 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2347 (flags & _SOCONNECT_XPG4_2), 2348 &addr, &addrlen); 2349 if (error) 2350 goto bad; 2351 bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr, 2352 sizeof (sti->sti_ux_faddr)); 2353 } 2354 } else { 2355 addr = sti->sti_faddr_sa; 2356 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2357 src = NULL; 2358 srclen = 0; 2359 } 2360 /* 2361 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2362 * option which asks the transport provider to send T_UDERR_IND 2363 * messages. These T_UDERR_IND messages are used to return connected 2364 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2365 * 2366 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2367 * we send down a T_CONN_REQ. This is needed to let the 2368 * transport assign a local address that is consistent with 2369 * the remote address. Applications depend on a getsockname() 2370 * after a connect() to retrieve the "source" IP address for 2371 * the connected socket. Invalidate the cached local address 2372 * to force getsockname() to enquire of the transport. 2373 */ 2374 if (!(so->so_mode & SM_CONNREQUIRED)) { 2375 /* 2376 * Datagram socket. 2377 */ 2378 int32_t val; 2379 2380 so_unlock_single(so, SOLOCKED); 2381 mutex_exit(&so->so_lock); 2382 2383 val = 1; 2384 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2385 &val, (t_uscalar_t)sizeof (val), cr); 2386 2387 mutex_enter(&so->so_lock); 2388 so_lock_single(so); /* Set SOLOCKED */ 2389 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2390 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2391 soconnect_tpi_udp) { 2392 soisconnected(so); 2393 goto done; 2394 } 2395 /* 2396 * Send down T_CONN_REQ etc. 2397 * Clear fflag to avoid returning EWOULDBLOCK. 2398 */ 2399 fflag = 0; 2400 ASSERT(so->so_family != AF_UNIX); 2401 sti->sti_laddr_valid = 0; 2402 } else if (sti->sti_laddr_len != 0) { 2403 /* 2404 * If the local address or port was "any" then it may be 2405 * changed by the transport as a result of the 2406 * connect. Invalidate the cached version if we have one. 2407 */ 2408 switch (so->so_family) { 2409 case AF_INET: 2410 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2411 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2412 INADDR_ANY || 2413 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2414 sti->sti_laddr_valid = 0; 2415 break; 2416 2417 case AF_INET6: 2418 ASSERT(sti->sti_laddr_len == 2419 (socklen_t)sizeof (sin6_t)); 2420 if (IN6_IS_ADDR_UNSPECIFIED( 2421 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2422 IN6_IS_ADDR_V4MAPPED_ANY( 2423 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2424 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2425 sti->sti_laddr_valid = 0; 2426 break; 2427 2428 default: 2429 break; 2430 } 2431 } 2432 2433 /* 2434 * Check for failure of an earlier call 2435 */ 2436 if (so->so_error != 0) 2437 goto so_bad; 2438 2439 /* 2440 * Send down T_CONN_REQ. Message was allocated above. 2441 */ 2442 conn_req.PRIM_type = T_CONN_REQ; 2443 conn_req.DEST_length = addrlen; 2444 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2445 if (srclen == 0) { 2446 conn_req.OPT_length = 0; 2447 conn_req.OPT_offset = 0; 2448 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2449 soappendmsg(mp, addr, addrlen); 2450 } else { 2451 /* 2452 * There is a AF_UNIX sockaddr_un to include as a source 2453 * address option. 2454 */ 2455 struct T_opthdr toh; 2456 2457 toh.level = SOL_SOCKET; 2458 toh.name = SO_SRCADDR; 2459 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2460 toh.status = 0; 2461 conn_req.OPT_length = 2462 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2463 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2464 _TPI_ALIGN_TOPT(addrlen)); 2465 2466 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2467 soappendmsg(mp, addr, addrlen); 2468 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2469 soappendmsg(mp, &toh, sizeof (toh)); 2470 soappendmsg(mp, src, srclen); 2471 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2472 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2473 } 2474 /* 2475 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2476 * in order to have the right state when the T_CONN_CON shows up. 2477 */ 2478 soisconnecting(so); 2479 mutex_exit(&so->so_lock); 2480 2481 if (AU_AUDITING()) 2482 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2483 2484 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2485 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2486 mp = NULL; 2487 mutex_enter(&so->so_lock); 2488 if (error != 0) 2489 goto bad; 2490 2491 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2492 goto bad; 2493 2494 /* Allow other threads to access the socket */ 2495 so_unlock_single(so, SOLOCKED); 2496 need_unlock = B_FALSE; 2497 2498 /* 2499 * Wait until we get a T_CONN_CON or an error 2500 */ 2501 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2502 so_lock_single(so); /* Set SOLOCKED */ 2503 need_unlock = B_TRUE; 2504 } 2505 2506 done: 2507 freemsg(mp); 2508 switch (error) { 2509 case EINPROGRESS: 2510 case EALREADY: 2511 case EISCONN: 2512 case EINTR: 2513 /* Non-fatal errors */ 2514 sti->sti_laddr_valid = 0; 2515 /* FALLTHRU */ 2516 case 0: 2517 break; 2518 default: 2519 ASSERT(need_unlock); 2520 /* 2521 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2522 * and invalidate local-address cache 2523 */ 2524 so->so_state &= ~SS_ISCONNECTING; 2525 sti->sti_laddr_valid = 0; 2526 /* A discon_ind might have already unbound us */ 2527 if ((flags & _SOCONNECT_DID_BIND) && 2528 (so->so_state & SS_ISBOUND)) { 2529 int err; 2530 2531 err = sotpi_unbind(so, 0); 2532 /* LINTED - statement has no conseq */ 2533 if (err) { 2534 eprintsoline(so, err); 2535 } 2536 } 2537 break; 2538 } 2539 if (need_unlock) 2540 so_unlock_single(so, SOLOCKED); 2541 mutex_exit(&so->so_lock); 2542 return (error); 2543 2544 so_bad: error = sogeterr(so, B_TRUE); 2545 bad: eprintsoline(so, error); 2546 goto done; 2547 } 2548 2549 /* ARGSUSED */ 2550 int 2551 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2552 { 2553 struct T_ordrel_req ordrel_req; 2554 mblk_t *mp; 2555 uint_t old_state, state_change; 2556 int error = 0; 2557 sotpi_info_t *sti = SOTOTPI(so); 2558 2559 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2560 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2561 2562 mutex_enter(&so->so_lock); 2563 so_lock_single(so); /* Set SOLOCKED */ 2564 2565 /* 2566 * SunOS 4.X has no check for datagram sockets. 2567 * 5.X checks that it is connected (ENOTCONN) 2568 * X/Open requires that we check the connected state. 2569 */ 2570 if (!(so->so_state & SS_ISCONNECTED)) { 2571 if (!xnet_skip_checks) { 2572 error = ENOTCONN; 2573 if (xnet_check_print) { 2574 printf("sockfs: X/Open shutdown check " 2575 "caused ENOTCONN\n"); 2576 } 2577 } 2578 goto done; 2579 } 2580 /* 2581 * Record the current state and then perform any state changes. 2582 * Then use the difference between the old and new states to 2583 * determine which messages need to be sent. 2584 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2585 * duplicate calls to shutdown(). 2586 */ 2587 old_state = so->so_state; 2588 2589 switch (how) { 2590 case 0: 2591 socantrcvmore(so); 2592 break; 2593 case 1: 2594 socantsendmore(so); 2595 break; 2596 case 2: 2597 socantsendmore(so); 2598 socantrcvmore(so); 2599 break; 2600 default: 2601 error = EINVAL; 2602 goto done; 2603 } 2604 2605 /* 2606 * Assumes that the SS_CANT* flags are never cleared in the above code. 2607 */ 2608 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2609 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2610 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2611 2612 switch (state_change) { 2613 case 0: 2614 dprintso(so, 1, 2615 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2616 so->so_state)); 2617 goto done; 2618 2619 case SS_CANTRCVMORE: 2620 mutex_exit(&so->so_lock); 2621 strseteof(SOTOV(so), 1); 2622 /* 2623 * strseteof takes care of read side wakeups, 2624 * pollwakeups, and signals. 2625 */ 2626 /* 2627 * Get the read lock before flushing data to avoid problems 2628 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2629 */ 2630 mutex_enter(&so->so_lock); 2631 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2632 mutex_exit(&so->so_lock); 2633 2634 /* Flush read side queue */ 2635 strflushrq(SOTOV(so), FLUSHALL); 2636 2637 mutex_enter(&so->so_lock); 2638 so_unlock_read(so); /* Clear SOREADLOCKED */ 2639 break; 2640 2641 case SS_CANTSENDMORE: 2642 mutex_exit(&so->so_lock); 2643 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2644 mutex_enter(&so->so_lock); 2645 break; 2646 2647 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2648 mutex_exit(&so->so_lock); 2649 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2650 strseteof(SOTOV(so), 1); 2651 /* 2652 * strseteof takes care of read side wakeups, 2653 * pollwakeups, and signals. 2654 */ 2655 /* 2656 * Get the read lock before flushing data to avoid problems 2657 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2658 */ 2659 mutex_enter(&so->so_lock); 2660 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2661 mutex_exit(&so->so_lock); 2662 2663 /* Flush read side queue */ 2664 strflushrq(SOTOV(so), FLUSHALL); 2665 2666 mutex_enter(&so->so_lock); 2667 so_unlock_read(so); /* Clear SOREADLOCKED */ 2668 break; 2669 } 2670 2671 ASSERT(MUTEX_HELD(&so->so_lock)); 2672 2673 /* 2674 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2675 * was set due to this call and the new state has both of them set: 2676 * Send the AF_UNIX close indication 2677 * For T_COTS send a discon_ind 2678 * 2679 * If cantsend was set due to this call: 2680 * For T_COTSORD send an ordrel_ind 2681 * 2682 * Note that for T_CLTS there is no message sent here. 2683 */ 2684 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2685 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2686 /* 2687 * For SunOS 4.X compatibility we tell the other end 2688 * that we are unable to receive at this point. 2689 */ 2690 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2691 so_unix_close(so); 2692 2693 if (sti->sti_serv_type == T_COTS) 2694 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2695 } 2696 if ((state_change & SS_CANTSENDMORE) && 2697 (sti->sti_serv_type == T_COTS_ORD)) { 2698 /* Send an orderly release */ 2699 ordrel_req.PRIM_type = T_ORDREL_REQ; 2700 2701 mutex_exit(&so->so_lock); 2702 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2703 0, _ALLOC_SLEEP, cr); 2704 /* 2705 * Send down the T_ORDREL_REQ even if there is flow control. 2706 * This prevents shutdown from blocking. 2707 * Note that there is no T_OK_ACK for ordrel_req. 2708 */ 2709 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2710 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2711 mutex_enter(&so->so_lock); 2712 if (error) { 2713 eprintsoline(so, error); 2714 goto done; 2715 } 2716 } 2717 2718 done: 2719 so_unlock_single(so, SOLOCKED); 2720 mutex_exit(&so->so_lock); 2721 return (error); 2722 } 2723 2724 /* 2725 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2726 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2727 * that we have closed. 2728 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2729 * T_UNITDATA_REQ containing the same option. 2730 * 2731 * For SOCK_DGRAM half-connections (somebody connected to this end 2732 * but this end is not connect) we don't know where to send any 2733 * SO_UNIX_CLOSE. 2734 * 2735 * We have to ignore stream head errors just in case there has been 2736 * a shutdown(output). 2737 * Ignore any flow control to try to get the message more quickly to the peer. 2738 * While locally ignoring flow control solves the problem when there 2739 * is only the loopback transport on the stream it would not provide 2740 * the correct AF_UNIX socket semantics when one or more modules have 2741 * been pushed. 2742 */ 2743 void 2744 so_unix_close(struct sonode *so) 2745 { 2746 struct T_opthdr toh; 2747 mblk_t *mp; 2748 sotpi_info_t *sti = SOTOTPI(so); 2749 2750 ASSERT(MUTEX_HELD(&so->so_lock)); 2751 2752 ASSERT(so->so_family == AF_UNIX); 2753 2754 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2755 (SS_ISCONNECTED|SS_ISBOUND)) 2756 return; 2757 2758 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2759 (void *)so, pr_state(so->so_state, so->so_mode))); 2760 2761 toh.level = SOL_SOCKET; 2762 toh.name = SO_UNIX_CLOSE; 2763 2764 /* zero length + header */ 2765 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2766 toh.status = 0; 2767 2768 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2769 struct T_optdata_req tdr; 2770 2771 tdr.PRIM_type = T_OPTDATA_REQ; 2772 tdr.DATA_flag = 0; 2773 2774 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2775 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2776 2777 /* NOTE: holding so_lock while sleeping */ 2778 mp = soallocproto2(&tdr, sizeof (tdr), 2779 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2780 } else { 2781 struct T_unitdata_req tudr; 2782 void *addr; 2783 socklen_t addrlen; 2784 void *src; 2785 socklen_t srclen; 2786 struct T_opthdr toh2; 2787 t_scalar_t size; 2788 2789 /* 2790 * We know this is an AF_UNIX connected DGRAM socket. 2791 * We therefore already have the destination address 2792 * in the internal form needed for this send. This is 2793 * similar to the sosend_dgram call later in this file 2794 * when there's no user-specified destination address. 2795 */ 2796 if (sti->sti_faddr_noxlate) { 2797 /* 2798 * Already have a transport internal address. Do not 2799 * pass any (transport internal) source address. 2800 */ 2801 addr = sti->sti_faddr_sa; 2802 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2803 src = NULL; 2804 srclen = 0; 2805 } else { 2806 /* 2807 * Pass the sockaddr_un source address as an option 2808 * and translate the remote address. 2809 * Holding so_lock thus sti_laddr_sa can not change. 2810 */ 2811 src = sti->sti_laddr_sa; 2812 srclen = (socklen_t)sti->sti_laddr_len; 2813 dprintso(so, 1, 2814 ("so_ux_close: srclen %d, src %p\n", 2815 srclen, src)); 2816 /* 2817 * Use the destination address saved in connect. 2818 */ 2819 addr = &sti->sti_ux_faddr; 2820 addrlen = sizeof (sti->sti_ux_faddr); 2821 } 2822 tudr.PRIM_type = T_UNITDATA_REQ; 2823 tudr.DEST_length = addrlen; 2824 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2825 if (srclen == 0) { 2826 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2827 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2828 _TPI_ALIGN_TOPT(addrlen)); 2829 2830 size = tudr.OPT_offset + tudr.OPT_length; 2831 /* NOTE: holding so_lock while sleeping */ 2832 mp = soallocproto2(&tudr, sizeof (tudr), 2833 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2834 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2835 soappendmsg(mp, &toh, sizeof (toh)); 2836 } else { 2837 /* 2838 * There is a AF_UNIX sockaddr_un to include as a 2839 * source address option. 2840 */ 2841 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2842 _TPI_ALIGN_TOPT(srclen)); 2843 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2844 _TPI_ALIGN_TOPT(addrlen)); 2845 2846 toh2.level = SOL_SOCKET; 2847 toh2.name = SO_SRCADDR; 2848 toh2.len = (t_uscalar_t)(srclen + 2849 sizeof (struct T_opthdr)); 2850 toh2.status = 0; 2851 2852 size = tudr.OPT_offset + tudr.OPT_length; 2853 2854 /* NOTE: holding so_lock while sleeping */ 2855 mp = soallocproto2(&tudr, sizeof (tudr), 2856 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2857 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2858 soappendmsg(mp, &toh, sizeof (toh)); 2859 soappendmsg(mp, &toh2, sizeof (toh2)); 2860 soappendmsg(mp, src, srclen); 2861 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2862 } 2863 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2864 } 2865 mutex_exit(&so->so_lock); 2866 (void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2867 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2868 mutex_enter(&so->so_lock); 2869 } 2870 2871 /* 2872 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2873 * In addition, the caller typically verifies that there is some 2874 * potential state to clear by checking 2875 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2876 * before calling this routine. 2877 * Note that such a check can be made without holding so_lock since 2878 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2879 * decrements sti_oobsigcnt. 2880 * 2881 * When data is read *after* the point that all pending 2882 * oob data has been consumed the oob indication is cleared. 2883 * 2884 * This logic keeps select/poll returning POLLRDBAND and 2885 * SIOCATMARK returning true until we have read past 2886 * the mark. 2887 */ 2888 static void 2889 sorecv_update_oobstate(struct sonode *so) 2890 { 2891 sotpi_info_t *sti = SOTOTPI(so); 2892 2893 mutex_enter(&so->so_lock); 2894 ASSERT(so_verify_oobstate(so)); 2895 dprintso(so, 1, 2896 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2897 sti->sti_oobsigcnt, 2898 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2899 if (sti->sti_oobsigcnt == 0) { 2900 /* No more pending oob indications */ 2901 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2902 freemsg(so->so_oobmsg); 2903 so->so_oobmsg = NULL; 2904 } 2905 ASSERT(so_verify_oobstate(so)); 2906 mutex_exit(&so->so_lock); 2907 } 2908 2909 /* 2910 * Receive the next message on the queue. 2911 * If msg_controllen is non-zero when called the caller is interested in 2912 * any received control info (options). 2913 * If msg_namelen is non-zero when called the caller is interested in 2914 * any received source address. 2915 * The routine returns with msg_control and msg_name pointing to 2916 * kmem_alloc'ed memory which the caller has to free. 2917 */ 2918 /* ARGSUSED */ 2919 int 2920 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 2921 struct cred *cr) 2922 { 2923 union T_primitives *tpr; 2924 mblk_t *mp; 2925 uchar_t pri; 2926 int pflag, opflag; 2927 void *control; 2928 t_uscalar_t controllen; 2929 t_uscalar_t namelen; 2930 int so_state = so->so_state; /* Snapshot */ 2931 ssize_t saved_resid; 2932 rval_t rval; 2933 int flags; 2934 clock_t timout; 2935 int error = 0; 2936 sotpi_info_t *sti = SOTOTPI(so); 2937 2938 flags = msg->msg_flags; 2939 msg->msg_flags = 0; 2940 2941 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2942 (void *)so, (void *)msg, flags, 2943 pr_state(so->so_state, so->so_mode), so->so_error)); 2944 2945 if (so->so_version == SOV_STREAM) { 2946 so_update_attrs(so, SOACC); 2947 /* The imaginary "sockmod" has been popped - act as a stream */ 2948 return (strread(SOTOV(so), uiop, cr)); 2949 } 2950 2951 /* 2952 * If we are not connected because we have never been connected 2953 * we return ENOTCONN. If we have been connected (but are no longer 2954 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2955 * the EOF. 2956 * 2957 * An alternative would be to post an ENOTCONN error in stream head 2958 * (read+write) and clear it when we're connected. However, that error 2959 * would cause incorrect poll/select behavior! 2960 */ 2961 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2962 (so->so_mode & SM_CONNREQUIRED)) { 2963 return (ENOTCONN); 2964 } 2965 2966 /* 2967 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2968 * after checking that the read queue is empty) and returns zero. 2969 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2970 * is zero. 2971 */ 2972 2973 if (flags & MSG_OOB) { 2974 /* Check that the transport supports OOB */ 2975 if (!(so->so_mode & SM_EXDATA)) 2976 return (EOPNOTSUPP); 2977 so_update_attrs(so, SOACC); 2978 return (sorecvoob(so, msg, uiop, flags, 2979 (so->so_options & SO_OOBINLINE))); 2980 } 2981 2982 so_update_attrs(so, SOACC); 2983 2984 /* 2985 * Set msg_controllen and msg_namelen to zero here to make it 2986 * simpler in the cases that no control or name is returned. 2987 */ 2988 controllen = msg->msg_controllen; 2989 namelen = msg->msg_namelen; 2990 msg->msg_controllen = 0; 2991 msg->msg_namelen = 0; 2992 2993 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2994 namelen, controllen)); 2995 2996 mutex_enter(&so->so_lock); 2997 /* 2998 * Only one reader is allowed at any given time. This is needed 2999 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3000 * 3001 * This is slightly different that BSD behavior in that it fails with 3002 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3003 * is single-threaded using sblock(), which is dropped while waiting 3004 * for data to appear. The difference shows up e.g. if one 3005 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3006 * does use nonblocking io and different threads are reading each 3007 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3008 * in this case as long as the read queue doesn't get empty. 3009 * In this implementation the thread using nonblocking io can 3010 * get an EWOULDBLOCK error due to the blocking thread executing 3011 * e.g. in the uiomove in kstrgetmsg. 3012 * This difference is not believed to be significant. 3013 */ 3014 /* Set SOREADLOCKED */ 3015 error = so_lock_read_intr(so, 3016 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3017 mutex_exit(&so->so_lock); 3018 if (error) 3019 return (error); 3020 3021 /* 3022 * Tell kstrgetmsg to not inspect the stream head errors until all 3023 * queued data has been consumed. 3024 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3025 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3026 * 3027 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3028 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3029 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3030 */ 3031 pflag = MSG_ANY | MSG_DELAYERROR; 3032 if (flags & MSG_PEEK) { 3033 pflag |= MSG_IPEEK; 3034 flags &= ~MSG_WAITALL; 3035 } 3036 if (so->so_mode & SM_ATOMIC) 3037 pflag |= MSG_DISCARDTAIL; 3038 3039 if (flags & MSG_DONTWAIT) 3040 timout = 0; 3041 else if (so->so_rcvtimeo != 0) 3042 timout = TICK_TO_MSEC(so->so_rcvtimeo); 3043 else 3044 timout = -1; 3045 opflag = pflag; 3046 retry: 3047 saved_resid = uiop->uio_resid; 3048 pri = 0; 3049 mp = NULL; 3050 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3051 timout, &rval); 3052 if (error != 0) { 3053 /* kstrgetmsg returns ETIME when timeout expires */ 3054 if (error == ETIME) 3055 error = EWOULDBLOCK; 3056 goto out; 3057 } 3058 /* 3059 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3060 * For non-datagrams MOREDATA is used to set MSG_EOR. 3061 */ 3062 ASSERT(!(rval.r_val1 & MORECTL)); 3063 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3064 msg->msg_flags |= MSG_TRUNC; 3065 3066 if (mp == NULL) { 3067 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3068 /* 3069 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3070 * The draft Posix socket spec states that the mark should 3071 * not be cleared when peeking. We follow the latter. 3072 */ 3073 if ((so->so_state & 3074 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3075 (uiop->uio_resid != saved_resid) && 3076 !(flags & MSG_PEEK)) { 3077 sorecv_update_oobstate(so); 3078 } 3079 3080 mutex_enter(&so->so_lock); 3081 /* Set MSG_EOR based on MOREDATA */ 3082 if (!(rval.r_val1 & MOREDATA)) { 3083 if (so->so_state & SS_SAVEDEOR) { 3084 msg->msg_flags |= MSG_EOR; 3085 so->so_state &= ~SS_SAVEDEOR; 3086 } 3087 } 3088 /* 3089 * If some data was received (i.e. not EOF) and the 3090 * read/recv* has not been satisfied wait for some more. 3091 */ 3092 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3093 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3094 mutex_exit(&so->so_lock); 3095 pflag = opflag | MSG_NOMARK; 3096 goto retry; 3097 } 3098 goto out_locked; 3099 } 3100 3101 /* strsock_proto has already verified length and alignment */ 3102 tpr = (union T_primitives *)mp->b_rptr; 3103 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3104 3105 switch (tpr->type) { 3106 case T_DATA_IND: { 3107 if ((so->so_state & 3108 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3109 (uiop->uio_resid != saved_resid) && 3110 !(flags & MSG_PEEK)) { 3111 sorecv_update_oobstate(so); 3112 } 3113 3114 /* 3115 * Set msg_flags to MSG_EOR based on 3116 * MORE_flag and MOREDATA. 3117 */ 3118 mutex_enter(&so->so_lock); 3119 so->so_state &= ~SS_SAVEDEOR; 3120 if (!(tpr->data_ind.MORE_flag & 1)) { 3121 if (!(rval.r_val1 & MOREDATA)) 3122 msg->msg_flags |= MSG_EOR; 3123 else 3124 so->so_state |= SS_SAVEDEOR; 3125 } 3126 freemsg(mp); 3127 /* 3128 * If some data was received (i.e. not EOF) and the 3129 * read/recv* has not been satisfied wait for some more. 3130 */ 3131 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3132 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3133 mutex_exit(&so->so_lock); 3134 pflag = opflag | MSG_NOMARK; 3135 goto retry; 3136 } 3137 goto out_locked; 3138 } 3139 case T_UNITDATA_IND: { 3140 void *addr; 3141 t_uscalar_t addrlen; 3142 void *abuf; 3143 t_uscalar_t optlen; 3144 void *opt; 3145 3146 if ((so->so_state & 3147 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3148 (uiop->uio_resid != saved_resid) && 3149 !(flags & MSG_PEEK)) { 3150 sorecv_update_oobstate(so); 3151 } 3152 3153 if (namelen != 0) { 3154 /* Caller wants source address */ 3155 addrlen = tpr->unitdata_ind.SRC_length; 3156 addr = sogetoff(mp, 3157 tpr->unitdata_ind.SRC_offset, 3158 addrlen, 1); 3159 if (addr == NULL) { 3160 freemsg(mp); 3161 error = EPROTO; 3162 eprintsoline(so, error); 3163 goto out; 3164 } 3165 if (so->so_family == AF_UNIX) { 3166 /* 3167 * Can not use the transport level address. 3168 * If there is a SO_SRCADDR option carrying 3169 * the socket level address it will be 3170 * extracted below. 3171 */ 3172 addr = NULL; 3173 addrlen = 0; 3174 } 3175 } 3176 optlen = tpr->unitdata_ind.OPT_length; 3177 if (optlen != 0) { 3178 t_uscalar_t ncontrollen; 3179 3180 /* 3181 * Extract any source address option. 3182 * Determine how large cmsg buffer is needed. 3183 */ 3184 opt = sogetoff(mp, 3185 tpr->unitdata_ind.OPT_offset, 3186 optlen, __TPI_ALIGN_SIZE); 3187 3188 if (opt == NULL) { 3189 freemsg(mp); 3190 error = EPROTO; 3191 eprintsoline(so, error); 3192 goto out; 3193 } 3194 if (so->so_family == AF_UNIX) 3195 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3196 ncontrollen = so_cmsglen(mp, opt, optlen, 3197 !(flags & MSG_XPG4_2)); 3198 if (controllen != 0) 3199 controllen = ncontrollen; 3200 else if (ncontrollen != 0) 3201 msg->msg_flags |= MSG_CTRUNC; 3202 } else { 3203 controllen = 0; 3204 } 3205 3206 if (namelen != 0) { 3207 /* 3208 * Return address to caller. 3209 * Caller handles truncation if length 3210 * exceeds msg_namelen. 3211 * NOTE: AF_UNIX NUL termination is ensured by 3212 * the sender's copyin_name(). 3213 */ 3214 abuf = kmem_alloc(addrlen, KM_SLEEP); 3215 3216 bcopy(addr, abuf, addrlen); 3217 msg->msg_name = abuf; 3218 msg->msg_namelen = addrlen; 3219 } 3220 3221 if (controllen != 0) { 3222 /* 3223 * Return control msg to caller. 3224 * Caller handles truncation if length 3225 * exceeds msg_controllen. 3226 */ 3227 control = kmem_zalloc(controllen, KM_SLEEP); 3228 3229 error = so_opt2cmsg(mp, opt, optlen, 3230 !(flags & MSG_XPG4_2), 3231 control, controllen); 3232 if (error) { 3233 freemsg(mp); 3234 if (msg->msg_namelen != 0) 3235 kmem_free(msg->msg_name, 3236 msg->msg_namelen); 3237 kmem_free(control, controllen); 3238 eprintsoline(so, error); 3239 goto out; 3240 } 3241 msg->msg_control = control; 3242 msg->msg_controllen = controllen; 3243 } 3244 3245 freemsg(mp); 3246 goto out; 3247 } 3248 case T_OPTDATA_IND: { 3249 struct T_optdata_req *tdr; 3250 void *opt; 3251 t_uscalar_t optlen; 3252 3253 if ((so->so_state & 3254 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3255 (uiop->uio_resid != saved_resid) && 3256 !(flags & MSG_PEEK)) { 3257 sorecv_update_oobstate(so); 3258 } 3259 3260 tdr = (struct T_optdata_req *)mp->b_rptr; 3261 optlen = tdr->OPT_length; 3262 if (optlen != 0) { 3263 t_uscalar_t ncontrollen; 3264 /* 3265 * Determine how large cmsg buffer is needed. 3266 */ 3267 opt = sogetoff(mp, 3268 tpr->optdata_ind.OPT_offset, 3269 optlen, __TPI_ALIGN_SIZE); 3270 3271 if (opt == NULL) { 3272 freemsg(mp); 3273 error = EPROTO; 3274 eprintsoline(so, error); 3275 goto out; 3276 } 3277 3278 ncontrollen = so_cmsglen(mp, opt, optlen, 3279 !(flags & MSG_XPG4_2)); 3280 if (controllen != 0) 3281 controllen = ncontrollen; 3282 else if (ncontrollen != 0) 3283 msg->msg_flags |= MSG_CTRUNC; 3284 } else { 3285 controllen = 0; 3286 } 3287 3288 if (controllen != 0) { 3289 /* 3290 * Return control msg to caller. 3291 * Caller handles truncation if length 3292 * exceeds msg_controllen. 3293 */ 3294 control = kmem_zalloc(controllen, KM_SLEEP); 3295 3296 error = so_opt2cmsg(mp, opt, optlen, 3297 !(flags & MSG_XPG4_2), 3298 control, controllen); 3299 if (error) { 3300 freemsg(mp); 3301 kmem_free(control, controllen); 3302 eprintsoline(so, error); 3303 goto out; 3304 } 3305 msg->msg_control = control; 3306 msg->msg_controllen = controllen; 3307 } 3308 3309 /* 3310 * Set msg_flags to MSG_EOR based on 3311 * DATA_flag and MOREDATA. 3312 */ 3313 mutex_enter(&so->so_lock); 3314 so->so_state &= ~SS_SAVEDEOR; 3315 if (!(tpr->data_ind.MORE_flag & 1)) { 3316 if (!(rval.r_val1 & MOREDATA)) 3317 msg->msg_flags |= MSG_EOR; 3318 else 3319 so->so_state |= SS_SAVEDEOR; 3320 } 3321 freemsg(mp); 3322 /* 3323 * If some data was received (i.e. not EOF) and the 3324 * read/recv* has not been satisfied wait for some more. 3325 * Not possible to wait if control info was received. 3326 */ 3327 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3328 controllen == 0 && 3329 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3330 mutex_exit(&so->so_lock); 3331 pflag = opflag | MSG_NOMARK; 3332 goto retry; 3333 } 3334 goto out_locked; 3335 } 3336 case T_EXDATA_IND: { 3337 dprintso(so, 1, 3338 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3339 "state %s\n", 3340 sti->sti_oobsigcnt, sti->sti_oobcnt, 3341 saved_resid - uiop->uio_resid, 3342 pr_state(so->so_state, so->so_mode))); 3343 /* 3344 * kstrgetmsg handles MSGMARK so there is nothing to 3345 * inspect in the T_EXDATA_IND. 3346 * strsock_proto makes the stream head queue the T_EXDATA_IND 3347 * as a separate message with no M_DATA component. Furthermore, 3348 * the stream head does not consolidate M_DATA messages onto 3349 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3350 * remains a message by itself. This is needed since MSGMARK 3351 * marks both the whole message as well as the last byte 3352 * of the message. 3353 */ 3354 freemsg(mp); 3355 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3356 if (flags & MSG_PEEK) { 3357 /* 3358 * Even though we are peeking we consume the 3359 * T_EXDATA_IND thereby moving the mark information 3360 * to SS_RCVATMARK. Then the oob code below will 3361 * retry the peeking kstrgetmsg. 3362 * Note that the stream head read queue is 3363 * never flushed without holding SOREADLOCKED 3364 * thus the T_EXDATA_IND can not disappear 3365 * underneath us. 3366 */ 3367 dprintso(so, 1, 3368 ("sotpi_recvmsg: consume EXDATA_IND " 3369 "counts %d/%d state %s\n", 3370 sti->sti_oobsigcnt, 3371 sti->sti_oobcnt, 3372 pr_state(so->so_state, so->so_mode))); 3373 3374 pflag = MSG_ANY | MSG_DELAYERROR; 3375 if (so->so_mode & SM_ATOMIC) 3376 pflag |= MSG_DISCARDTAIL; 3377 3378 pri = 0; 3379 mp = NULL; 3380 3381 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3382 &pri, &pflag, (clock_t)-1, &rval); 3383 ASSERT(uiop->uio_resid == saved_resid); 3384 3385 if (error) { 3386 #ifdef SOCK_DEBUG 3387 if (error != EWOULDBLOCK && error != EINTR) { 3388 eprintsoline(so, error); 3389 } 3390 #endif /* SOCK_DEBUG */ 3391 goto out; 3392 } 3393 ASSERT(mp); 3394 tpr = (union T_primitives *)mp->b_rptr; 3395 ASSERT(tpr->type == T_EXDATA_IND); 3396 freemsg(mp); 3397 } /* end "if (flags & MSG_PEEK)" */ 3398 3399 /* 3400 * Decrement the number of queued and pending oob. 3401 * 3402 * SS_RCVATMARK is cleared when we read past a mark. 3403 * SS_HAVEOOBDATA is cleared when we've read past the 3404 * last mark. 3405 * SS_OOBPEND is cleared if we've read past the last 3406 * mark and no (new) SIGURG has been posted. 3407 */ 3408 mutex_enter(&so->so_lock); 3409 ASSERT(so_verify_oobstate(so)); 3410 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3411 ASSERT(sti->sti_oobsigcnt > 0); 3412 sti->sti_oobsigcnt--; 3413 ASSERT(sti->sti_oobcnt > 0); 3414 sti->sti_oobcnt--; 3415 /* 3416 * Since the T_EXDATA_IND has been removed from the stream 3417 * head, but we have not read data past the mark, 3418 * sockfs needs to track that the socket is still at the mark. 3419 * 3420 * Since no data was received call kstrgetmsg again to wait 3421 * for data. 3422 */ 3423 so->so_state |= SS_RCVATMARK; 3424 mutex_exit(&so->so_lock); 3425 dprintso(so, 1, 3426 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3427 sti->sti_oobsigcnt, sti->sti_oobcnt, 3428 pr_state(so->so_state, so->so_mode))); 3429 pflag = opflag; 3430 goto retry; 3431 } 3432 default: 3433 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3434 (void *)so, tpr->type, (void *)mp); 3435 ASSERT(0); 3436 freemsg(mp); 3437 error = EPROTO; 3438 eprintsoline(so, error); 3439 goto out; 3440 } 3441 /* NOTREACHED */ 3442 out: 3443 mutex_enter(&so->so_lock); 3444 out_locked: 3445 so_unlock_read(so); /* Clear SOREADLOCKED */ 3446 mutex_exit(&so->so_lock); 3447 return (error); 3448 } 3449 3450 /* 3451 * Sending data with options on a datagram socket. 3452 * Assumes caller has verified that SS_ISBOUND etc. are set. 3453 * 3454 * For AF_UNIX the destination address may be already in 3455 * internal form, as indicated by sti->sti_faddr_noxlate 3456 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3457 * translate the destination address to internal form. 3458 * 3459 * The source address is passed as an option. If passing 3460 * file descriptors, those are passed as file pointers in 3461 * another option. 3462 */ 3463 static int 3464 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3465 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3466 { 3467 struct T_unitdata_req tudr; 3468 mblk_t *mp; 3469 int error; 3470 void *addr; 3471 socklen_t addrlen; 3472 void *src; 3473 socklen_t srclen; 3474 ssize_t len; 3475 int size; 3476 struct T_opthdr toh; 3477 struct fdbuf *fdbuf; 3478 t_uscalar_t optlen; 3479 void *fds; 3480 int fdlen; 3481 sotpi_info_t *sti = SOTOTPI(so); 3482 3483 ASSERT(name && namelen); 3484 ASSERT(control && controllen); 3485 3486 len = uiop->uio_resid; 3487 if (len > (ssize_t)sti->sti_tidu_size) { 3488 return (EMSGSIZE); 3489 } 3490 3491 if (sti->sti_faddr_noxlate == 0 && 3492 (flags & MSG_SENDTO_NOXLATE) == 0) { 3493 /* 3494 * Length and family checks. 3495 * Don't verify internal form. 3496 */ 3497 error = so_addr_verify(so, name, namelen); 3498 if (error) { 3499 eprintsoline(so, error); 3500 return (error); 3501 } 3502 } 3503 3504 if (so->so_family == AF_UNIX) { 3505 if (sti->sti_faddr_noxlate) { 3506 /* 3507 * Already have a transport internal address. Do not 3508 * pass any (transport internal) source address. 3509 */ 3510 addr = name; 3511 addrlen = namelen; 3512 src = NULL; 3513 srclen = 0; 3514 } else if (flags & MSG_SENDTO_NOXLATE) { 3515 /* 3516 * Have an internal form dest. address. 3517 * Pass the source address as usual. 3518 */ 3519 addr = name; 3520 addrlen = namelen; 3521 src = sti->sti_laddr_sa; 3522 srclen = (socklen_t)sti->sti_laddr_len; 3523 } else { 3524 /* 3525 * Pass the sockaddr_un source address as an option 3526 * and translate the remote address. 3527 * 3528 * Note that this code does not prevent sti_laddr_sa 3529 * from changing while it is being used. Thus 3530 * if an unbind+bind occurs concurrently with this 3531 * send the peer might see a partially new and a 3532 * partially old "from" address. 3533 */ 3534 src = sti->sti_laddr_sa; 3535 srclen = (socklen_t)sti->sti_laddr_len; 3536 dprintso(so, 1, 3537 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3538 srclen, src)); 3539 /* 3540 * The sendmsg caller specified a destination 3541 * address, which we must translate into our 3542 * internal form. addr = &sti->sti_ux_taddr 3543 */ 3544 error = so_ux_addr_xlate(so, name, namelen, 3545 (flags & MSG_XPG4_2), 3546 &addr, &addrlen); 3547 if (error) { 3548 eprintsoline(so, error); 3549 return (error); 3550 } 3551 } 3552 } else { 3553 addr = name; 3554 addrlen = namelen; 3555 src = NULL; 3556 srclen = 0; 3557 } 3558 optlen = so_optlen(control, controllen, 3559 !(flags & MSG_XPG4_2)); 3560 tudr.PRIM_type = T_UNITDATA_REQ; 3561 tudr.DEST_length = addrlen; 3562 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3563 if (srclen != 0) 3564 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3565 _TPI_ALIGN_TOPT(srclen)); 3566 else 3567 tudr.OPT_length = optlen; 3568 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3569 _TPI_ALIGN_TOPT(addrlen)); 3570 3571 size = tudr.OPT_offset + tudr.OPT_length; 3572 3573 /* 3574 * File descriptors only when SM_FDPASSING set. 3575 */ 3576 error = so_getfdopt(control, controllen, 3577 !(flags & MSG_XPG4_2), &fds, &fdlen); 3578 if (error) 3579 return (error); 3580 if (fdlen != -1) { 3581 if (!(so->so_mode & SM_FDPASSING)) 3582 return (EOPNOTSUPP); 3583 3584 error = fdbuf_create(fds, fdlen, &fdbuf); 3585 if (error) 3586 return (error); 3587 3588 /* 3589 * Pre-allocate enough additional space for lower level modules 3590 * to append an option (e.g. see tl_unitdata). The following 3591 * is enough extra space for the largest option we might append. 3592 */ 3593 size += sizeof (struct T_opthdr) + ucredsize; 3594 mp = fdbuf_allocmsg(size, fdbuf); 3595 } else { 3596 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3597 if (mp == NULL) { 3598 /* 3599 * Caught a signal waiting for memory. 3600 * Let send* return EINTR. 3601 */ 3602 return (EINTR); 3603 } 3604 } 3605 soappendmsg(mp, &tudr, sizeof (tudr)); 3606 soappendmsg(mp, addr, addrlen); 3607 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3608 3609 if (fdlen != -1) { 3610 ASSERT(fdbuf != NULL); 3611 toh.level = SOL_SOCKET; 3612 toh.name = SO_FILEP; 3613 toh.len = fdbuf->fd_size + 3614 (t_uscalar_t)sizeof (struct T_opthdr); 3615 toh.status = 0; 3616 soappendmsg(mp, &toh, sizeof (toh)); 3617 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3618 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3619 } 3620 if (srclen != 0) { 3621 /* 3622 * There is a AF_UNIX sockaddr_un to include as a source 3623 * address option. 3624 */ 3625 toh.level = SOL_SOCKET; 3626 toh.name = SO_SRCADDR; 3627 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3628 toh.status = 0; 3629 soappendmsg(mp, &toh, sizeof (toh)); 3630 soappendmsg(mp, src, srclen); 3631 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3632 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3633 } 3634 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3635 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3636 /* 3637 * Normally at most 3 bytes left in the message, but we might have 3638 * allowed for extra space if we're passing fd's through. 3639 */ 3640 ASSERT(MBLKL(mp) <= (ssize_t)size); 3641 3642 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3643 if (AU_AUDITING()) 3644 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3645 3646 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3647 #ifdef SOCK_DEBUG 3648 if (error) { 3649 eprintsoline(so, error); 3650 } 3651 #endif /* SOCK_DEBUG */ 3652 return (error); 3653 } 3654 3655 /* 3656 * Sending data with options on a connected stream socket. 3657 * Assumes caller has verified that SS_ISCONNECTED is set. 3658 */ 3659 static int 3660 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3661 t_uscalar_t controllen, int flags) 3662 { 3663 struct T_optdata_req tdr; 3664 mblk_t *mp; 3665 int error; 3666 ssize_t iosize; 3667 int size; 3668 struct fdbuf *fdbuf; 3669 t_uscalar_t optlen; 3670 void *fds; 3671 int fdlen; 3672 struct T_opthdr toh; 3673 sotpi_info_t *sti = SOTOTPI(so); 3674 3675 dprintso(so, 1, 3676 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3677 3678 /* 3679 * Has to be bound and connected. However, since no locks are 3680 * held the state could have changed after sotpi_sendmsg checked it 3681 * thus it is not possible to ASSERT on the state. 3682 */ 3683 3684 /* Options on connection-oriented only when SM_OPTDATA set. */ 3685 if (!(so->so_mode & SM_OPTDATA)) 3686 return (EOPNOTSUPP); 3687 3688 do { 3689 /* 3690 * Set the MORE flag if uio_resid does not fit in this 3691 * message or if the caller passed in "more". 3692 * Error for transports with zero tidu_size. 3693 */ 3694 tdr.PRIM_type = T_OPTDATA_REQ; 3695 iosize = sti->sti_tidu_size; 3696 if (iosize <= 0) 3697 return (EMSGSIZE); 3698 if (uiop->uio_resid > iosize) { 3699 tdr.DATA_flag = 1; 3700 } else { 3701 if (more) 3702 tdr.DATA_flag = 1; 3703 else 3704 tdr.DATA_flag = 0; 3705 iosize = uiop->uio_resid; 3706 } 3707 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3708 tdr.DATA_flag, iosize)); 3709 3710 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3711 tdr.OPT_length = optlen; 3712 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3713 3714 size = (int)sizeof (tdr) + optlen; 3715 /* 3716 * File descriptors only when SM_FDPASSING set. 3717 */ 3718 error = so_getfdopt(control, controllen, 3719 !(flags & MSG_XPG4_2), &fds, &fdlen); 3720 if (error) 3721 return (error); 3722 if (fdlen != -1) { 3723 if (!(so->so_mode & SM_FDPASSING)) 3724 return (EOPNOTSUPP); 3725 3726 error = fdbuf_create(fds, fdlen, &fdbuf); 3727 if (error) 3728 return (error); 3729 3730 /* 3731 * Pre-allocate enough additional space for lower level 3732 * modules to append an option (e.g. see tl_unitdata). 3733 * The following is enough extra space for the largest 3734 * option we might append. 3735 */ 3736 size += sizeof (struct T_opthdr) + ucredsize; 3737 mp = fdbuf_allocmsg(size, fdbuf); 3738 } else { 3739 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3740 if (mp == NULL) { 3741 /* 3742 * Caught a signal waiting for memory. 3743 * Let send* return EINTR. 3744 */ 3745 return (EINTR); 3746 } 3747 } 3748 soappendmsg(mp, &tdr, sizeof (tdr)); 3749 3750 if (fdlen != -1) { 3751 ASSERT(fdbuf != NULL); 3752 toh.level = SOL_SOCKET; 3753 toh.name = SO_FILEP; 3754 toh.len = fdbuf->fd_size + 3755 (t_uscalar_t)sizeof (struct T_opthdr); 3756 toh.status = 0; 3757 soappendmsg(mp, &toh, sizeof (toh)); 3758 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3759 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3760 } 3761 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3762 /* 3763 * Normally at most 3 bytes left in the message, but we might 3764 * have allowed for extra space if we're passing fd's through. 3765 */ 3766 ASSERT(MBLKL(mp) <= (ssize_t)size); 3767 3768 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3769 3770 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3771 0, MSG_BAND, 0); 3772 if (error) { 3773 eprintsoline(so, error); 3774 return (error); 3775 } 3776 control = NULL; 3777 if (uiop->uio_resid > 0) { 3778 /* 3779 * Recheck for fatal errors. Fail write even though 3780 * some data have been written. This is consistent 3781 * with strwrite semantics and BSD sockets semantics. 3782 */ 3783 if (so->so_state & SS_CANTSENDMORE) { 3784 eprintsoline(so, error); 3785 return (EPIPE); 3786 } 3787 if (so->so_error != 0) { 3788 mutex_enter(&so->so_lock); 3789 error = sogeterr(so, B_TRUE); 3790 mutex_exit(&so->so_lock); 3791 if (error != 0) { 3792 eprintsoline(so, error); 3793 return (error); 3794 } 3795 } 3796 } 3797 } while (uiop->uio_resid > 0); 3798 return (0); 3799 } 3800 3801 /* 3802 * Sending data on a datagram socket. 3803 * Assumes caller has verified that SS_ISBOUND etc. are set. 3804 * 3805 * For AF_UNIX the destination address may be already in 3806 * internal form, as indicated by sti->sti_faddr_noxlate 3807 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3808 * translate the destination address to internal form. 3809 * 3810 * The source address is passed as an option. 3811 */ 3812 int 3813 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3814 struct uio *uiop, int flags) 3815 { 3816 struct T_unitdata_req tudr; 3817 mblk_t *mp; 3818 int error; 3819 void *addr; 3820 socklen_t addrlen; 3821 void *src; 3822 socklen_t srclen; 3823 ssize_t len; 3824 sotpi_info_t *sti = SOTOTPI(so); 3825 3826 ASSERT(name != NULL && namelen != 0); 3827 3828 len = uiop->uio_resid; 3829 if (len > sti->sti_tidu_size) { 3830 error = EMSGSIZE; 3831 goto done; 3832 } 3833 3834 if (sti->sti_faddr_noxlate == 0 && 3835 (flags & MSG_SENDTO_NOXLATE) == 0) { 3836 /* 3837 * Length and family checks. 3838 * Don't verify internal form. 3839 */ 3840 error = so_addr_verify(so, name, namelen); 3841 if (error != 0) 3842 goto done; 3843 } 3844 3845 if (sti->sti_direct) /* Never on AF_UNIX */ 3846 return (sodgram_direct(so, name, namelen, uiop, flags)); 3847 3848 if (so->so_family == AF_UNIX) { 3849 if (sti->sti_faddr_noxlate) { 3850 /* 3851 * Already have a transport internal address. Do not 3852 * pass any (transport internal) source address. 3853 */ 3854 addr = name; 3855 addrlen = namelen; 3856 src = NULL; 3857 srclen = 0; 3858 } else if (flags & MSG_SENDTO_NOXLATE) { 3859 /* 3860 * Have an internal form dest. address. 3861 * Pass the source address as usual. 3862 */ 3863 addr = name; 3864 addrlen = namelen; 3865 src = sti->sti_laddr_sa; 3866 srclen = (socklen_t)sti->sti_laddr_len; 3867 } else { 3868 /* 3869 * Pass the sockaddr_un source address as an option 3870 * and translate the remote address. 3871 * 3872 * Note that this code does not prevent sti_laddr_sa 3873 * from changing while it is being used. Thus 3874 * if an unbind+bind occurs concurrently with this 3875 * send the peer might see a partially new and a 3876 * partially old "from" address. 3877 */ 3878 src = sti->sti_laddr_sa; 3879 srclen = (socklen_t)sti->sti_laddr_len; 3880 dprintso(so, 1, 3881 ("sosend_dgram UNIX: srclen %d, src %p\n", 3882 srclen, src)); 3883 /* 3884 * The sendmsg caller specified a destination 3885 * address, which we must translate into our 3886 * internal form. addr = &sti->sti_ux_taddr 3887 */ 3888 error = so_ux_addr_xlate(so, name, namelen, 3889 (flags & MSG_XPG4_2), 3890 &addr, &addrlen); 3891 if (error) { 3892 eprintsoline(so, error); 3893 goto done; 3894 } 3895 } 3896 } else { 3897 addr = name; 3898 addrlen = namelen; 3899 src = NULL; 3900 srclen = 0; 3901 } 3902 tudr.PRIM_type = T_UNITDATA_REQ; 3903 tudr.DEST_length = addrlen; 3904 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3905 if (srclen == 0) { 3906 tudr.OPT_length = 0; 3907 tudr.OPT_offset = 0; 3908 3909 mp = soallocproto2(&tudr, sizeof (tudr), 3910 addr, addrlen, 0, _ALLOC_INTR, CRED()); 3911 if (mp == NULL) { 3912 /* 3913 * Caught a signal waiting for memory. 3914 * Let send* return EINTR. 3915 */ 3916 error = EINTR; 3917 goto done; 3918 } 3919 } else { 3920 /* 3921 * There is a AF_UNIX sockaddr_un to include as a source 3922 * address option. 3923 */ 3924 struct T_opthdr toh; 3925 ssize_t size; 3926 3927 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3928 _TPI_ALIGN_TOPT(srclen)); 3929 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3930 _TPI_ALIGN_TOPT(addrlen)); 3931 3932 toh.level = SOL_SOCKET; 3933 toh.name = SO_SRCADDR; 3934 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3935 toh.status = 0; 3936 3937 size = tudr.OPT_offset + tudr.OPT_length; 3938 mp = soallocproto2(&tudr, sizeof (tudr), 3939 addr, addrlen, size, _ALLOC_INTR, CRED()); 3940 if (mp == NULL) { 3941 /* 3942 * Caught a signal waiting for memory. 3943 * Let send* return EINTR. 3944 */ 3945 error = EINTR; 3946 goto done; 3947 } 3948 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3949 soappendmsg(mp, &toh, sizeof (toh)); 3950 soappendmsg(mp, src, srclen); 3951 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3952 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3953 } 3954 3955 if (AU_AUDITING()) 3956 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3957 3958 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3959 done: 3960 #ifdef SOCK_DEBUG 3961 if (error) { 3962 eprintsoline(so, error); 3963 } 3964 #endif /* SOCK_DEBUG */ 3965 return (error); 3966 } 3967 3968 /* 3969 * Sending data on a connected stream socket. 3970 * Assumes caller has verified that SS_ISCONNECTED is set. 3971 */ 3972 int 3973 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 3974 int sflag) 3975 { 3976 struct T_data_req tdr; 3977 mblk_t *mp; 3978 int error; 3979 ssize_t iosize; 3980 sotpi_info_t *sti = SOTOTPI(so); 3981 3982 dprintso(so, 1, 3983 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3984 (void *)so, uiop->uio_resid, prim, sflag)); 3985 3986 /* 3987 * Has to be bound and connected. However, since no locks are 3988 * held the state could have changed after sotpi_sendmsg checked it 3989 * thus it is not possible to ASSERT on the state. 3990 */ 3991 3992 do { 3993 /* 3994 * Set the MORE flag if uio_resid does not fit in this 3995 * message or if the caller passed in "more". 3996 * Error for transports with zero tidu_size. 3997 */ 3998 tdr.PRIM_type = prim; 3999 iosize = sti->sti_tidu_size; 4000 if (iosize <= 0) 4001 return (EMSGSIZE); 4002 if (uiop->uio_resid > iosize) { 4003 tdr.MORE_flag = 1; 4004 } else { 4005 if (more) 4006 tdr.MORE_flag = 1; 4007 else 4008 tdr.MORE_flag = 0; 4009 iosize = uiop->uio_resid; 4010 } 4011 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4012 prim, tdr.MORE_flag, iosize)); 4013 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4014 if (mp == NULL) { 4015 /* 4016 * Caught a signal waiting for memory. 4017 * Let send* return EINTR. 4018 */ 4019 return (EINTR); 4020 } 4021 4022 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4023 0, sflag | MSG_BAND, 0); 4024 if (error) { 4025 eprintsoline(so, error); 4026 return (error); 4027 } 4028 if (uiop->uio_resid > 0) { 4029 /* 4030 * Recheck for fatal errors. Fail write even though 4031 * some data have been written. This is consistent 4032 * with strwrite semantics and BSD sockets semantics. 4033 */ 4034 if (so->so_state & SS_CANTSENDMORE) { 4035 eprintsoline(so, error); 4036 return (EPIPE); 4037 } 4038 if (so->so_error != 0) { 4039 mutex_enter(&so->so_lock); 4040 error = sogeterr(so, B_TRUE); 4041 mutex_exit(&so->so_lock); 4042 if (error != 0) { 4043 eprintsoline(so, error); 4044 return (error); 4045 } 4046 } 4047 } 4048 } while (uiop->uio_resid > 0); 4049 return (0); 4050 } 4051 4052 /* 4053 * Check the state for errors and call the appropriate send function. 4054 * 4055 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4056 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4057 * after sending the message. 4058 * 4059 * The caller may optionally specify a destination address, for either 4060 * stream or datagram sockets. This table summarizes the cases: 4061 * 4062 * Socket type Dest. given Connected Result 4063 * ----------- ----------- --------- -------------- 4064 * Stream * Yes send to conn. addr. 4065 * Stream * No error ENOTCONN 4066 * Dgram yes * send to given addr. 4067 * Dgram no yes send to conn. addr. 4068 * Dgram no no error EDESTADDRREQ 4069 * 4070 * There are subtleties around the destination address when using 4071 * AF_UNIX datagram sockets. When the sendmsg call specifies the 4072 * destination address, it's in (struct sockaddr_un) form and we 4073 * need to translate it to our internal form (struct so_ux_addr). 4074 * 4075 * When the sendmsg call does not specify a destination address 4076 * we're using the peer address saved during sotpi_connect, and 4077 * that address is already in internal form. In this case, the 4078 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags 4079 * passed to sosend_dgram or sosend_dgramcmsg to indicate that 4080 * those functions should skip translation to internal form. 4081 * Avoiding that translation is not only more efficient, but it's 4082 * also necessary when a process does a connect on an AF_UNIX 4083 * datagram socket and then drops privileges. After the process 4084 * has dropped privileges, it may no longer be able to lookup the 4085 * the external name in the filesystem, but it should still be 4086 * able to send messages on the connected socket by leaving the 4087 * destination name unspecified. 4088 * 4089 * Yet more subtleties arise with sockets connected by socketpair(), 4090 * which puts internal form addresses in the fields where normally 4091 * the external form is found, and sets sti_faddr_noxlate=1, which 4092 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions 4093 * to skip translation of destination addresses to internal form. 4094 * However, beware that the flag sti_faddr_noxlate=1 also triggers 4095 * different behaviour almost everywhere AF_UNIX addresses appear. 4096 */ 4097 static int 4098 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4099 struct cred *cr) 4100 { 4101 int so_state; 4102 int so_mode; 4103 int error; 4104 struct sockaddr *name; 4105 t_uscalar_t namelen; 4106 int dontroute; 4107 int flags; 4108 sotpi_info_t *sti = SOTOTPI(so); 4109 4110 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4111 (void *)so, (void *)msg, msg->msg_flags, 4112 pr_state(so->so_state, so->so_mode), so->so_error)); 4113 4114 if (so->so_version == SOV_STREAM) { 4115 /* The imaginary "sockmod" has been popped - act as a stream */ 4116 so_update_attrs(so, SOMOD); 4117 return (strwrite(SOTOV(so), uiop, cr)); 4118 } 4119 4120 mutex_enter(&so->so_lock); 4121 so_state = so->so_state; 4122 4123 if (so_state & SS_CANTSENDMORE) { 4124 mutex_exit(&so->so_lock); 4125 return (EPIPE); 4126 } 4127 4128 if (so->so_error != 0) { 4129 error = sogeterr(so, B_TRUE); 4130 if (error != 0) { 4131 mutex_exit(&so->so_lock); 4132 return (error); 4133 } 4134 } 4135 4136 name = (struct sockaddr *)msg->msg_name; 4137 namelen = msg->msg_namelen; 4138 flags = msg->msg_flags; 4139 4140 /* 4141 * Historically, this function does not validate the flags 4142 * passed in, and any errant bits are ignored. However, 4143 * we would not want any such errant flag bits accidently 4144 * being treated as one of the internal-only flags, so 4145 * clear the internal-only flag bits. 4146 */ 4147 flags &= ~MSG_SENDTO_NOXLATE; 4148 4149 so_mode = so->so_mode; 4150 4151 if (name == NULL) { 4152 if (!(so_state & SS_ISCONNECTED)) { 4153 mutex_exit(&so->so_lock); 4154 if (so_mode & SM_CONNREQUIRED) 4155 return (ENOTCONN); 4156 else 4157 return (EDESTADDRREQ); 4158 } 4159 /* 4160 * This is a connected socket. 4161 */ 4162 if (so_mode & SM_CONNREQUIRED) { 4163 /* 4164 * This is a connected STREAM socket, 4165 * destination not specified. 4166 */ 4167 name = NULL; 4168 namelen = 0; 4169 } else { 4170 /* 4171 * Datagram send on connected socket with 4172 * the destination name not specified. 4173 * Use the peer address from connect. 4174 */ 4175 if (so->so_family == AF_UNIX) { 4176 /* 4177 * Use the (internal form) address saved 4178 * in sotpi_connect. See above. 4179 */ 4180 name = (void *)&sti->sti_ux_faddr; 4181 namelen = sizeof (sti->sti_ux_faddr); 4182 flags |= MSG_SENDTO_NOXLATE; 4183 } else { 4184 ASSERT(sti->sti_faddr_sa); 4185 name = sti->sti_faddr_sa; 4186 namelen = (t_uscalar_t)sti->sti_faddr_len; 4187 } 4188 } 4189 } else { 4190 /* 4191 * Sendmsg specifies a destination name 4192 */ 4193 if (!(so_state & SS_ISCONNECTED) && 4194 (so_mode & SM_CONNREQUIRED)) { 4195 /* i.e. TCP not connected */ 4196 mutex_exit(&so->so_lock); 4197 return (ENOTCONN); 4198 } 4199 /* 4200 * Ignore the address on connection-oriented sockets. 4201 * Just like BSD this code does not generate an error for 4202 * TCP (a CONNREQUIRED socket) when sending to an address 4203 * passed in with sendto/sendmsg. Instead the data is 4204 * delivered on the connection as if no address had been 4205 * supplied. 4206 */ 4207 if ((so_state & SS_ISCONNECTED) && 4208 !(so_mode & SM_CONNREQUIRED)) { 4209 mutex_exit(&so->so_lock); 4210 return (EISCONN); 4211 } 4212 if (!(so_state & SS_ISBOUND)) { 4213 so_lock_single(so); /* Set SOLOCKED */ 4214 error = sotpi_bind(so, NULL, 0, 4215 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4216 so_unlock_single(so, SOLOCKED); 4217 if (error) { 4218 mutex_exit(&so->so_lock); 4219 eprintsoline(so, error); 4220 return (error); 4221 } 4222 } 4223 /* 4224 * Handle delayed datagram errors. These are only queued 4225 * when the application sets SO_DGRAM_ERRIND. 4226 * Return the error if we are sending to the address 4227 * that was returned in the last T_UDERROR_IND. 4228 * If sending to some other address discard the delayed 4229 * error indication. 4230 */ 4231 if (sti->sti_delayed_error) { 4232 struct T_uderror_ind *tudi; 4233 void *addr; 4234 t_uscalar_t addrlen; 4235 boolean_t match = B_FALSE; 4236 4237 ASSERT(sti->sti_eaddr_mp); 4238 error = sti->sti_delayed_error; 4239 sti->sti_delayed_error = 0; 4240 tudi = 4241 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4242 addrlen = tudi->DEST_length; 4243 addr = sogetoff(sti->sti_eaddr_mp, 4244 tudi->DEST_offset, addrlen, 1); 4245 ASSERT(addr); /* Checked by strsock_proto */ 4246 switch (so->so_family) { 4247 case AF_INET: { 4248 /* Compare just IP address and port */ 4249 sin_t *sin1 = (sin_t *)name; 4250 sin_t *sin2 = (sin_t *)addr; 4251 4252 if (addrlen == sizeof (sin_t) && 4253 namelen == addrlen && 4254 sin1->sin_port == sin2->sin_port && 4255 sin1->sin_addr.s_addr == 4256 sin2->sin_addr.s_addr) 4257 match = B_TRUE; 4258 break; 4259 } 4260 case AF_INET6: { 4261 /* Compare just IP address and port. Not flow */ 4262 sin6_t *sin1 = (sin6_t *)name; 4263 sin6_t *sin2 = (sin6_t *)addr; 4264 4265 if (addrlen == sizeof (sin6_t) && 4266 namelen == addrlen && 4267 sin1->sin6_port == sin2->sin6_port && 4268 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4269 &sin2->sin6_addr)) 4270 match = B_TRUE; 4271 break; 4272 } 4273 case AF_UNIX: 4274 default: 4275 if (namelen == addrlen && 4276 bcmp(name, addr, namelen) == 0) 4277 match = B_TRUE; 4278 } 4279 if (match) { 4280 freemsg(sti->sti_eaddr_mp); 4281 sti->sti_eaddr_mp = NULL; 4282 mutex_exit(&so->so_lock); 4283 #ifdef DEBUG 4284 dprintso(so, 0, 4285 ("sockfs delayed error %d for %s\n", 4286 error, 4287 pr_addr(so->so_family, name, namelen))); 4288 #endif /* DEBUG */ 4289 return (error); 4290 } 4291 freemsg(sti->sti_eaddr_mp); 4292 sti->sti_eaddr_mp = NULL; 4293 } 4294 } 4295 mutex_exit(&so->so_lock); 4296 4297 dontroute = 0; 4298 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4299 uint32_t val; 4300 4301 val = 1; 4302 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4303 &val, (t_uscalar_t)sizeof (val), cr); 4304 if (error) 4305 return (error); 4306 dontroute = 1; 4307 } 4308 4309 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4310 error = EOPNOTSUPP; 4311 goto done; 4312 } 4313 if (msg->msg_controllen != 0) { 4314 if (!(so_mode & SM_CONNREQUIRED)) { 4315 so_update_attrs(so, SOMOD); 4316 error = sosend_dgramcmsg(so, name, namelen, uiop, 4317 msg->msg_control, msg->msg_controllen, flags); 4318 } else { 4319 if (flags & MSG_OOB) { 4320 /* Can't generate T_EXDATA_REQ with options */ 4321 error = EOPNOTSUPP; 4322 goto done; 4323 } 4324 so_update_attrs(so, SOMOD); 4325 error = sosend_svccmsg(so, uiop, 4326 !(flags & MSG_EOR), 4327 msg->msg_control, msg->msg_controllen, 4328 flags); 4329 } 4330 goto done; 4331 } 4332 4333 so_update_attrs(so, SOMOD); 4334 if (!(so_mode & SM_CONNREQUIRED)) { 4335 /* 4336 * If there is no SO_DONTROUTE to turn off return immediately 4337 * from send_dgram. This can allow tail-call optimizations. 4338 */ 4339 if (!dontroute) { 4340 return (sosend_dgram(so, name, namelen, uiop, flags)); 4341 } 4342 error = sosend_dgram(so, name, namelen, uiop, flags); 4343 } else { 4344 t_scalar_t prim; 4345 int sflag; 4346 4347 /* Ignore msg_name in the connected state */ 4348 if (flags & MSG_OOB) { 4349 prim = T_EXDATA_REQ; 4350 /* 4351 * Send down T_EXDATA_REQ even if there is flow 4352 * control for data. 4353 */ 4354 sflag = MSG_IGNFLOW; 4355 } else { 4356 if (so_mode & SM_BYTESTREAM) { 4357 /* Byte stream transport - use write */ 4358 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4359 4360 /* Send M_DATA messages */ 4361 /* 4362 * If there is no SO_DONTROUTE to turn off, 4363 * sti_direct is on, and there is no flow 4364 * control, we can take the fast path. 4365 */ 4366 if (!dontroute && sti->sti_direct != 0 && 4367 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4368 return (sostream_direct(so, uiop, 4369 NULL, cr)); 4370 } 4371 error = strwrite(SOTOV(so), uiop, cr); 4372 goto done; 4373 } 4374 prim = T_DATA_REQ; 4375 sflag = 0; 4376 } 4377 /* 4378 * If there is no SO_DONTROUTE to turn off return immediately 4379 * from sosend_svc. This can allow tail-call optimizations. 4380 */ 4381 if (!dontroute) 4382 return (sosend_svc(so, uiop, prim, 4383 !(flags & MSG_EOR), sflag)); 4384 error = sosend_svc(so, uiop, prim, 4385 !(flags & MSG_EOR), sflag); 4386 } 4387 ASSERT(dontroute); 4388 done: 4389 if (dontroute) { 4390 uint32_t val; 4391 4392 val = 0; 4393 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4394 &val, (t_uscalar_t)sizeof (val), cr); 4395 } 4396 return (error); 4397 } 4398 4399 /* 4400 * kstrwritemp() has very similar semantics as that of strwrite(). 4401 * The main difference is it obtains mblks from the caller and also 4402 * does not do any copy as done in strwrite() from user buffers to 4403 * kernel buffers. 4404 * 4405 * Currently, this routine is used by sendfile to send data allocated 4406 * within the kernel without any copying. This interface does not use the 4407 * synchronous stream interface as synch. stream interface implies 4408 * copying. 4409 */ 4410 int 4411 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4412 { 4413 struct stdata *stp; 4414 struct queue *wqp; 4415 mblk_t *newmp; 4416 char waitflag; 4417 int tempmode; 4418 int error = 0; 4419 int done = 0; 4420 struct sonode *so; 4421 boolean_t direct; 4422 4423 ASSERT(vp->v_stream); 4424 stp = vp->v_stream; 4425 4426 so = VTOSO(vp); 4427 direct = _SOTOTPI(so)->sti_direct; 4428 4429 /* 4430 * This is the sockfs direct fast path. canputnext() need 4431 * not be accurate so we don't grab the sd_lock here. If 4432 * we get flow-controlled, we grab sd_lock just before the 4433 * do..while loop below to emulate what strwrite() does. 4434 */ 4435 wqp = stp->sd_wrq; 4436 if (canputnext(wqp) && direct && 4437 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4438 return (sostream_direct(so, NULL, mp, CRED())); 4439 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4440 /* Fast check of flags before acquiring the lock */ 4441 mutex_enter(&stp->sd_lock); 4442 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4443 mutex_exit(&stp->sd_lock); 4444 if (error != 0) { 4445 if (!(stp->sd_flag & STPLEX) && 4446 (stp->sd_wput_opt & SW_SIGPIPE)) { 4447 error = EPIPE; 4448 } 4449 return (error); 4450 } 4451 } 4452 4453 waitflag = WRITEWAIT; 4454 if (stp->sd_flag & OLDNDELAY) 4455 tempmode = fmode & ~FNDELAY; 4456 else 4457 tempmode = fmode; 4458 4459 mutex_enter(&stp->sd_lock); 4460 do { 4461 if (canputnext(wqp)) { 4462 mutex_exit(&stp->sd_lock); 4463 if (stp->sd_wputdatafunc != NULL) { 4464 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4465 NULL, NULL, NULL); 4466 if (newmp == NULL) { 4467 /* The caller will free mp */ 4468 return (ECOMM); 4469 } 4470 mp = newmp; 4471 } 4472 putnext(wqp, mp); 4473 return (0); 4474 } 4475 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4476 &done); 4477 } while (error == 0 && !done); 4478 4479 mutex_exit(&stp->sd_lock); 4480 /* 4481 * EAGAIN tells the application to try again. ENOMEM 4482 * is returned only if the memory allocation size 4483 * exceeds the physical limits of the system. ENOMEM 4484 * can't be true here. 4485 */ 4486 if (error == ENOMEM) 4487 error = EAGAIN; 4488 return (error); 4489 } 4490 4491 /* ARGSUSED */ 4492 static int 4493 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4494 struct cred *cr, mblk_t **mpp) 4495 { 4496 int error; 4497 4498 switch (so->so_family) { 4499 case AF_INET: 4500 case AF_INET6: 4501 case AF_UNIX: 4502 break; 4503 default: 4504 return (EAFNOSUPPORT); 4505 4506 } 4507 4508 if (so->so_state & SS_CANTSENDMORE) 4509 return (EPIPE); 4510 4511 if (so->so_type != SOCK_STREAM) 4512 return (EOPNOTSUPP); 4513 4514 if ((so->so_state & SS_ISCONNECTED) == 0) 4515 return (ENOTCONN); 4516 4517 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4518 if (error == 0) 4519 *mpp = NULL; 4520 return (error); 4521 } 4522 4523 /* 4524 * Sending data on a datagram socket. 4525 * Assumes caller has verified that SS_ISBOUND etc. are set. 4526 */ 4527 /* ARGSUSED */ 4528 static int 4529 sodgram_direct(struct sonode *so, struct sockaddr *name, 4530 socklen_t namelen, struct uio *uiop, int flags) 4531 { 4532 struct T_unitdata_req tudr; 4533 mblk_t *mp = NULL; 4534 int error = 0; 4535 void *addr; 4536 socklen_t addrlen; 4537 ssize_t len; 4538 struct stdata *stp = SOTOV(so)->v_stream; 4539 int so_state; 4540 queue_t *udp_wq; 4541 boolean_t connected; 4542 mblk_t *mpdata = NULL; 4543 sotpi_info_t *sti = SOTOTPI(so); 4544 uint32_t auditing = AU_AUDITING(); 4545 4546 ASSERT(name != NULL && namelen != 0); 4547 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4548 ASSERT(!(so->so_mode & SM_EXDATA)); 4549 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4550 ASSERT(SOTOV(so)->v_type == VSOCK); 4551 4552 /* Caller checked for proper length */ 4553 len = uiop->uio_resid; 4554 ASSERT(len <= sti->sti_tidu_size); 4555 4556 /* Length and family checks have been done by caller */ 4557 ASSERT(name->sa_family == so->so_family); 4558 ASSERT(so->so_family == AF_INET || 4559 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4560 ASSERT(so->so_family == AF_INET6 || 4561 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4562 4563 addr = name; 4564 addrlen = namelen; 4565 4566 if (stp->sd_sidp != NULL && 4567 (error = straccess(stp, JCWRITE)) != 0) 4568 goto done; 4569 4570 so_state = so->so_state; 4571 4572 connected = so_state & SS_ISCONNECTED; 4573 if (!connected) { 4574 tudr.PRIM_type = T_UNITDATA_REQ; 4575 tudr.DEST_length = addrlen; 4576 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4577 tudr.OPT_length = 0; 4578 tudr.OPT_offset = 0; 4579 4580 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4581 _ALLOC_INTR, CRED()); 4582 if (mp == NULL) { 4583 /* 4584 * Caught a signal waiting for memory. 4585 * Let send* return EINTR. 4586 */ 4587 error = EINTR; 4588 goto done; 4589 } 4590 } 4591 4592 /* 4593 * For UDP we don't break up the copyin into smaller pieces 4594 * as in the TCP case. That means if ENOMEM is returned by 4595 * mcopyinuio() then the uio vector has not been modified at 4596 * all and we fallback to either strwrite() or kstrputmsg() 4597 * below. Note also that we never generate priority messages 4598 * from here. 4599 */ 4600 udp_wq = stp->sd_wrq->q_next; 4601 if (canput(udp_wq) && 4602 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4603 ASSERT(DB_TYPE(mpdata) == M_DATA); 4604 ASSERT(uiop->uio_resid == 0); 4605 if (!connected) 4606 linkb(mp, mpdata); 4607 else 4608 mp = mpdata; 4609 if (auditing) 4610 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4611 4612 /* Always returns 0... */ 4613 return (udp_wput(udp_wq, mp)); 4614 } 4615 4616 ASSERT(mpdata == NULL); 4617 if (error != 0 && error != ENOMEM) { 4618 freemsg(mp); 4619 return (error); 4620 } 4621 4622 /* 4623 * For connected, let strwrite() handle the blocking case. 4624 * Otherwise we fall thru and use kstrputmsg(). 4625 */ 4626 if (connected) 4627 return (strwrite(SOTOV(so), uiop, CRED())); 4628 4629 if (auditing) 4630 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4631 4632 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4633 done: 4634 #ifdef SOCK_DEBUG 4635 if (error != 0) { 4636 eprintsoline(so, error); 4637 } 4638 #endif /* SOCK_DEBUG */ 4639 return (error); 4640 } 4641 4642 int 4643 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4644 { 4645 struct stdata *stp = SOTOV(so)->v_stream; 4646 ssize_t iosize, rmax, maxblk; 4647 queue_t *tcp_wq = stp->sd_wrq->q_next; 4648 mblk_t *newmp; 4649 int error = 0, wflag = 0; 4650 4651 ASSERT(so->so_mode & SM_BYTESTREAM); 4652 ASSERT(SOTOV(so)->v_type == VSOCK); 4653 4654 if (stp->sd_sidp != NULL && 4655 (error = straccess(stp, JCWRITE)) != 0) 4656 return (error); 4657 4658 if (uiop == NULL) { 4659 /* 4660 * kstrwritemp() should have checked sd_flag and 4661 * flow-control before coming here. If we end up 4662 * here it means that we can simply pass down the 4663 * data to tcp. 4664 */ 4665 ASSERT(mp != NULL); 4666 if (stp->sd_wputdatafunc != NULL) { 4667 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4668 NULL, NULL, NULL); 4669 if (newmp == NULL) { 4670 /* The caller will free mp */ 4671 return (ECOMM); 4672 } 4673 mp = newmp; 4674 } 4675 /* Always returns 0... */ 4676 return (tcp_wput(tcp_wq, mp)); 4677 } 4678 4679 /* Fallback to strwrite() to do proper error handling */ 4680 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4681 return (strwrite(SOTOV(so), uiop, cr)); 4682 4683 rmax = stp->sd_qn_maxpsz; 4684 ASSERT(rmax >= 0 || rmax == INFPSZ); 4685 if (rmax == 0 || uiop->uio_resid <= 0) 4686 return (0); 4687 4688 if (rmax == INFPSZ) 4689 rmax = uiop->uio_resid; 4690 4691 maxblk = stp->sd_maxblk; 4692 4693 for (;;) { 4694 iosize = MIN(uiop->uio_resid, rmax); 4695 4696 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4697 if (mp == NULL) { 4698 /* 4699 * Fallback to strwrite() for ENOMEM; if this 4700 * is our first time in this routine and the uio 4701 * vector has not been modified, we will end up 4702 * calling strwrite() without any flag set. 4703 */ 4704 if (error == ENOMEM) 4705 goto slow_send; 4706 else 4707 return (error); 4708 } 4709 ASSERT(uiop->uio_resid >= 0); 4710 /* 4711 * If mp is non-NULL and ENOMEM is set, it means that 4712 * mcopyinuio() was able to break down some of the user 4713 * data into one or more mblks. Send the partial data 4714 * to tcp and let the rest be handled in strwrite(). 4715 */ 4716 ASSERT(error == 0 || error == ENOMEM); 4717 if (stp->sd_wputdatafunc != NULL) { 4718 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4719 NULL, NULL, NULL); 4720 if (newmp == NULL) { 4721 /* The caller will free mp */ 4722 return (ECOMM); 4723 } 4724 mp = newmp; 4725 } 4726 (void) tcp_wput(tcp_wq, mp); /* Always returns 0 anyway. */ 4727 4728 wflag |= NOINTR; 4729 4730 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4731 ASSERT(error == 0); 4732 break; 4733 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4734 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4735 slow_send: 4736 /* 4737 * We were able to send down partial data using 4738 * the direct call interface, but are now relying 4739 * on strwrite() to handle the non-fastpath cases. 4740 * If the socket is blocking we will sleep in 4741 * strwaitq() until write is permitted, otherwise, 4742 * we will need to return the amount of bytes 4743 * written so far back to the app. This is the 4744 * reason why we pass NOINTR flag to strwrite() 4745 * for non-blocking socket, because we don't want 4746 * to return EAGAIN when portion of the user data 4747 * has actually been sent down. 4748 */ 4749 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4750 } 4751 } 4752 return (0); 4753 } 4754 4755 /* 4756 * Update sti_faddr by asking the transport (unless AF_UNIX). 4757 */ 4758 /* ARGSUSED */ 4759 int 4760 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4761 boolean_t accept, struct cred *cr) 4762 { 4763 struct strbuf strbuf; 4764 int error = 0, res; 4765 void *addr; 4766 t_uscalar_t addrlen; 4767 k_sigset_t smask; 4768 sotpi_info_t *sti = SOTOTPI(so); 4769 4770 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4771 (void *)so, pr_state(so->so_state, so->so_mode))); 4772 4773 ASSERT(*namelen > 0); 4774 mutex_enter(&so->so_lock); 4775 so_lock_single(so); /* Set SOLOCKED */ 4776 4777 if (accept) { 4778 bcopy(sti->sti_faddr_sa, name, 4779 MIN(*namelen, sti->sti_faddr_len)); 4780 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4781 goto done; 4782 } 4783 4784 if (!(so->so_state & SS_ISCONNECTED)) { 4785 error = ENOTCONN; 4786 goto done; 4787 } 4788 /* Added this check for X/Open */ 4789 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4790 error = EINVAL; 4791 if (xnet_check_print) { 4792 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4793 } 4794 goto done; 4795 } 4796 4797 if (sti->sti_faddr_valid) { 4798 bcopy(sti->sti_faddr_sa, name, 4799 MIN(*namelen, sti->sti_faddr_len)); 4800 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4801 goto done; 4802 } 4803 4804 #ifdef DEBUG 4805 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4806 pr_addr(so->so_family, sti->sti_faddr_sa, 4807 (t_uscalar_t)sti->sti_faddr_len))); 4808 #endif /* DEBUG */ 4809 4810 if (so->so_family == AF_UNIX) { 4811 /* Transport has different name space - return local info */ 4812 if (sti->sti_faddr_noxlate) 4813 *namelen = 0; 4814 error = 0; 4815 goto done; 4816 } 4817 4818 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4819 4820 ASSERT(sti->sti_faddr_sa); 4821 /* Allocate local buffer to use with ioctl */ 4822 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4823 mutex_exit(&so->so_lock); 4824 addr = kmem_alloc(addrlen, KM_SLEEP); 4825 4826 /* 4827 * Issue TI_GETPEERNAME with signals masked. 4828 * Put the result in sti_faddr_sa so that getpeername works after 4829 * a shutdown(output). 4830 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4831 * back to the socket. 4832 */ 4833 strbuf.buf = addr; 4834 strbuf.maxlen = addrlen; 4835 strbuf.len = 0; 4836 4837 sigintr(&smask, 0); 4838 res = 0; 4839 ASSERT(cr); 4840 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4841 0, K_TO_K, cr, &res); 4842 sigunintr(&smask); 4843 4844 mutex_enter(&so->so_lock); 4845 /* 4846 * If there is an error record the error in so_error put don't fail 4847 * the getpeername. Instead fallback on the recorded 4848 * sti->sti_faddr_sa. 4849 */ 4850 if (error) { 4851 /* 4852 * Various stream head errors can be returned to the ioctl. 4853 * However, it is impossible to determine which ones of 4854 * these are really socket level errors that were incorrectly 4855 * consumed by the ioctl. Thus this code silently ignores the 4856 * error - to code explicitly does not reinstate the error 4857 * using soseterror(). 4858 * Experiments have shows that at least this set of 4859 * errors are reported and should not be reinstated on the 4860 * socket: 4861 * EINVAL E.g. if an I_LINK was in effect when 4862 * getpeername was called. 4863 * EPIPE The ioctl error semantics prefer the write 4864 * side error over the read side error. 4865 * ENOTCONN The transport just got disconnected but 4866 * sockfs had not yet seen the T_DISCON_IND 4867 * when issuing the ioctl. 4868 */ 4869 error = 0; 4870 } else if (res == 0 && strbuf.len > 0 && 4871 (so->so_state & SS_ISCONNECTED)) { 4872 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4873 sti->sti_faddr_len = (socklen_t)strbuf.len; 4874 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4875 sti->sti_faddr_valid = 1; 4876 4877 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4878 *namelen = sti->sti_faddr_len; 4879 } 4880 kmem_free(addr, addrlen); 4881 #ifdef DEBUG 4882 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4883 pr_addr(so->so_family, sti->sti_faddr_sa, 4884 (t_uscalar_t)sti->sti_faddr_len))); 4885 #endif /* DEBUG */ 4886 done: 4887 so_unlock_single(so, SOLOCKED); 4888 mutex_exit(&so->so_lock); 4889 return (error); 4890 } 4891 4892 /* 4893 * Update sti_laddr by asking the transport (unless AF_UNIX). 4894 */ 4895 int 4896 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4897 struct cred *cr) 4898 { 4899 struct strbuf strbuf; 4900 int error = 0, res; 4901 void *addr; 4902 t_uscalar_t addrlen; 4903 k_sigset_t smask; 4904 sotpi_info_t *sti = SOTOTPI(so); 4905 4906 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4907 (void *)so, pr_state(so->so_state, so->so_mode))); 4908 4909 ASSERT(*namelen > 0); 4910 mutex_enter(&so->so_lock); 4911 so_lock_single(so); /* Set SOLOCKED */ 4912 4913 #ifdef DEBUG 4914 4915 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4916 pr_addr(so->so_family, sti->sti_laddr_sa, 4917 (t_uscalar_t)sti->sti_laddr_len))); 4918 #endif /* DEBUG */ 4919 if (sti->sti_laddr_valid) { 4920 bcopy(sti->sti_laddr_sa, name, 4921 MIN(*namelen, sti->sti_laddr_len)); 4922 *namelen = sti->sti_laddr_len; 4923 goto done; 4924 } 4925 4926 if (so->so_family == AF_UNIX) { 4927 /* 4928 * Transport has different name space - return local info. If we 4929 * have enough space, let consumers know the family. 4930 */ 4931 if (*namelen >= sizeof (sa_family_t)) { 4932 name->sa_family = AF_UNIX; 4933 *namelen = sizeof (sa_family_t); 4934 } else { 4935 *namelen = 0; 4936 } 4937 error = 0; 4938 goto done; 4939 } 4940 if (!(so->so_state & SS_ISBOUND)) { 4941 /* If not bound, then nothing to return. */ 4942 error = 0; 4943 goto done; 4944 } 4945 4946 /* Allocate local buffer to use with ioctl */ 4947 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 4948 mutex_exit(&so->so_lock); 4949 addr = kmem_alloc(addrlen, KM_SLEEP); 4950 4951 /* 4952 * Issue TI_GETMYNAME with signals masked. 4953 * Put the result in sti_laddr_sa so that getsockname works after 4954 * a shutdown(output). 4955 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4956 * back to the socket. 4957 */ 4958 strbuf.buf = addr; 4959 strbuf.maxlen = addrlen; 4960 strbuf.len = 0; 4961 4962 sigintr(&smask, 0); 4963 res = 0; 4964 ASSERT(cr); 4965 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4966 0, K_TO_K, cr, &res); 4967 sigunintr(&smask); 4968 4969 mutex_enter(&so->so_lock); 4970 /* 4971 * If there is an error record the error in so_error put don't fail 4972 * the getsockname. Instead fallback on the recorded 4973 * sti->sti_laddr_sa. 4974 */ 4975 if (error) { 4976 /* 4977 * Various stream head errors can be returned to the ioctl. 4978 * However, it is impossible to determine which ones of 4979 * these are really socket level errors that were incorrectly 4980 * consumed by the ioctl. Thus this code silently ignores the 4981 * error - to code explicitly does not reinstate the error 4982 * using soseterror(). 4983 * Experiments have shows that at least this set of 4984 * errors are reported and should not be reinstated on the 4985 * socket: 4986 * EINVAL E.g. if an I_LINK was in effect when 4987 * getsockname was called. 4988 * EPIPE The ioctl error semantics prefer the write 4989 * side error over the read side error. 4990 */ 4991 error = 0; 4992 } else if (res == 0 && strbuf.len > 0 && 4993 (so->so_state & SS_ISBOUND)) { 4994 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 4995 sti->sti_laddr_len = (socklen_t)strbuf.len; 4996 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 4997 sti->sti_laddr_valid = 1; 4998 4999 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5000 *namelen = sti->sti_laddr_len; 5001 } 5002 kmem_free(addr, addrlen); 5003 #ifdef DEBUG 5004 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5005 pr_addr(so->so_family, sti->sti_laddr_sa, 5006 (t_uscalar_t)sti->sti_laddr_len))); 5007 #endif /* DEBUG */ 5008 done: 5009 so_unlock_single(so, SOLOCKED); 5010 mutex_exit(&so->so_lock); 5011 return (error); 5012 } 5013 5014 /* 5015 * Get socket options. For SOL_SOCKET options some options are handled 5016 * by the sockfs while others use the value recorded in the sonode as a 5017 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5018 * 5019 * On the return most *optlenp bytes are copied to optval. 5020 */ 5021 /* ARGSUSED */ 5022 int 5023 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5024 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5025 { 5026 struct T_optmgmt_req optmgmt_req; 5027 struct T_optmgmt_ack *optmgmt_ack; 5028 struct opthdr oh; 5029 struct opthdr *opt_res; 5030 mblk_t *mp = NULL; 5031 int error = 0; 5032 void *option = NULL; /* Set if fallback value */ 5033 t_uscalar_t maxlen = *optlenp; 5034 t_uscalar_t len; 5035 uint32_t value; 5036 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5037 struct timeval32 tmo_val32; 5038 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5039 5040 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5041 (void *)so, level, option_name, optval, (void *)optlenp, 5042 pr_state(so->so_state, so->so_mode))); 5043 5044 mutex_enter(&so->so_lock); 5045 so_lock_single(so); /* Set SOLOCKED */ 5046 5047 /* 5048 * Check for SOL_SOCKET options. 5049 * Certain SOL_SOCKET options are returned directly whereas 5050 * others only provide a default (fallback) value should 5051 * the T_SVR4_OPTMGMT_REQ fail. 5052 */ 5053 if (level == SOL_SOCKET) { 5054 /* Check parameters */ 5055 switch (option_name) { 5056 case SO_TYPE: 5057 case SO_ERROR: 5058 case SO_DEBUG: 5059 case SO_ACCEPTCONN: 5060 case SO_REUSEADDR: 5061 case SO_KEEPALIVE: 5062 case SO_DONTROUTE: 5063 case SO_BROADCAST: 5064 case SO_USELOOPBACK: 5065 case SO_OOBINLINE: 5066 case SO_SNDBUF: 5067 case SO_RCVBUF: 5068 #ifdef notyet 5069 case SO_SNDLOWAT: 5070 case SO_RCVLOWAT: 5071 #endif /* notyet */ 5072 case SO_DOMAIN: 5073 case SO_DGRAM_ERRIND: 5074 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5075 error = EINVAL; 5076 eprintsoline(so, error); 5077 goto done2; 5078 } 5079 break; 5080 case SO_RCVTIMEO: 5081 case SO_SNDTIMEO: 5082 if (get_udatamodel() == DATAMODEL_NONE || 5083 get_udatamodel() == DATAMODEL_NATIVE) { 5084 if (maxlen < sizeof (struct timeval)) { 5085 error = EINVAL; 5086 eprintsoline(so, error); 5087 goto done2; 5088 } 5089 } else { 5090 if (maxlen < sizeof (struct timeval32)) { 5091 error = EINVAL; 5092 eprintsoline(so, error); 5093 goto done2; 5094 } 5095 5096 } 5097 break; 5098 case SO_LINGER: 5099 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5100 error = EINVAL; 5101 eprintsoline(so, error); 5102 goto done2; 5103 } 5104 break; 5105 case SO_SND_BUFINFO: 5106 if (maxlen < (t_uscalar_t) 5107 sizeof (struct so_snd_bufinfo)) { 5108 error = EINVAL; 5109 eprintsoline(so, error); 5110 goto done2; 5111 } 5112 break; 5113 } 5114 5115 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5116 5117 switch (option_name) { 5118 case SO_TYPE: 5119 value = so->so_type; 5120 option = &value; 5121 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5122 5123 case SO_ERROR: 5124 value = sogeterr(so, B_TRUE); 5125 option = &value; 5126 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5127 5128 case SO_ACCEPTCONN: 5129 if (so->so_state & SS_ACCEPTCONN) 5130 value = SO_ACCEPTCONN; 5131 else 5132 value = 0; 5133 #ifdef DEBUG 5134 if (value) { 5135 dprintso(so, 1, 5136 ("sotpi_getsockopt: 0x%x is set\n", 5137 option_name)); 5138 } else { 5139 dprintso(so, 1, 5140 ("sotpi_getsockopt: 0x%x not set\n", 5141 option_name)); 5142 } 5143 #endif /* DEBUG */ 5144 option = &value; 5145 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5146 5147 case SO_DEBUG: 5148 case SO_REUSEADDR: 5149 case SO_KEEPALIVE: 5150 case SO_DONTROUTE: 5151 case SO_BROADCAST: 5152 case SO_USELOOPBACK: 5153 case SO_OOBINLINE: 5154 case SO_DGRAM_ERRIND: 5155 value = (so->so_options & option_name); 5156 #ifdef DEBUG 5157 if (value) { 5158 dprintso(so, 1, 5159 ("sotpi_getsockopt: 0x%x is set\n", 5160 option_name)); 5161 } else { 5162 dprintso(so, 1, 5163 ("sotpi_getsockopt: 0x%x not set\n", 5164 option_name)); 5165 } 5166 #endif /* DEBUG */ 5167 option = &value; 5168 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5169 5170 /* 5171 * The following options are only returned by sockfs when the 5172 * T_SVR4_OPTMGMT_REQ fails. 5173 */ 5174 case SO_LINGER: 5175 option = &so->so_linger; 5176 len = (t_uscalar_t)sizeof (struct linger); 5177 break; 5178 case SO_SNDBUF: { 5179 ssize_t lvalue; 5180 5181 /* 5182 * If the option has not been set then get a default 5183 * value from the read queue. This value is 5184 * returned if the transport fails 5185 * the T_SVR4_OPTMGMT_REQ. 5186 */ 5187 lvalue = so->so_sndbuf; 5188 if (lvalue == 0) { 5189 mutex_exit(&so->so_lock); 5190 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5191 QHIWAT, 0, &lvalue); 5192 mutex_enter(&so->so_lock); 5193 dprintso(so, 1, 5194 ("got SO_SNDBUF %ld from q\n", lvalue)); 5195 } 5196 value = (int)lvalue; 5197 option = &value; 5198 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5199 break; 5200 } 5201 case SO_RCVBUF: { 5202 ssize_t lvalue; 5203 5204 /* 5205 * If the option has not been set then get a default 5206 * value from the read queue. This value is 5207 * returned if the transport fails 5208 * the T_SVR4_OPTMGMT_REQ. 5209 * 5210 * XXX If SO_RCVBUF has been set and this is an 5211 * XPG 4.2 application then do not ask the transport 5212 * since the transport might adjust the value and not 5213 * return exactly what was set by the application. 5214 * For non-XPG 4.2 application we return the value 5215 * that the transport is actually using. 5216 */ 5217 lvalue = so->so_rcvbuf; 5218 if (lvalue == 0) { 5219 mutex_exit(&so->so_lock); 5220 (void) strqget(RD(strvp2wq(SOTOV(so))), 5221 QHIWAT, 0, &lvalue); 5222 mutex_enter(&so->so_lock); 5223 dprintso(so, 1, 5224 ("got SO_RCVBUF %ld from q\n", lvalue)); 5225 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5226 value = (int)lvalue; 5227 option = &value; 5228 goto copyout; /* skip asking transport */ 5229 } 5230 value = (int)lvalue; 5231 option = &value; 5232 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5233 break; 5234 } 5235 case SO_DOMAIN: 5236 value = so->so_family; 5237 option = &value; 5238 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5239 5240 #ifdef notyet 5241 /* 5242 * We do not implement the semantics of these options 5243 * thus we shouldn't implement the options either. 5244 */ 5245 case SO_SNDLOWAT: 5246 value = so->so_sndlowat; 5247 option = &value; 5248 break; 5249 case SO_RCVLOWAT: 5250 value = so->so_rcvlowat; 5251 option = &value; 5252 break; 5253 #endif /* notyet */ 5254 case SO_SNDTIMEO: 5255 case SO_RCVTIMEO: { 5256 clock_t val; 5257 5258 if (option_name == SO_RCVTIMEO) 5259 val = drv_hztousec(so->so_rcvtimeo); 5260 else 5261 val = drv_hztousec(so->so_sndtimeo); 5262 tmo_val.tv_sec = val / (1000 * 1000); 5263 tmo_val.tv_usec = val % (1000 * 1000); 5264 if (get_udatamodel() == DATAMODEL_NONE || 5265 get_udatamodel() == DATAMODEL_NATIVE) { 5266 option = &tmo_val; 5267 len = sizeof (struct timeval); 5268 } else { 5269 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5270 option = &tmo_val32; 5271 len = sizeof (struct timeval32); 5272 } 5273 break; 5274 } 5275 case SO_SND_BUFINFO: { 5276 snd_bufinfo.sbi_wroff = 5277 (so->so_proto_props).sopp_wroff; 5278 snd_bufinfo.sbi_maxblk = 5279 (so->so_proto_props).sopp_maxblk; 5280 snd_bufinfo.sbi_maxpsz = 5281 (so->so_proto_props).sopp_maxpsz; 5282 snd_bufinfo.sbi_tail = 5283 (so->so_proto_props).sopp_tail; 5284 option = &snd_bufinfo; 5285 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5286 break; 5287 } 5288 } 5289 } 5290 5291 mutex_exit(&so->so_lock); 5292 5293 /* Send request */ 5294 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5295 optmgmt_req.MGMT_flags = T_CHECK; 5296 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5297 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5298 5299 oh.level = level; 5300 oh.name = option_name; 5301 oh.len = maxlen; 5302 5303 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5304 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5305 /* Let option management work in the presence of data flow control */ 5306 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5307 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5308 mp = NULL; 5309 mutex_enter(&so->so_lock); 5310 if (error) { 5311 eprintsoline(so, error); 5312 goto done2; 5313 } 5314 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5315 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5316 if (error) { 5317 if (option != NULL) { 5318 /* We have a fallback value */ 5319 error = 0; 5320 goto copyout; 5321 } 5322 eprintsoline(so, error); 5323 goto done2; 5324 } 5325 ASSERT(mp); 5326 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5327 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5328 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5329 if (opt_res == NULL) { 5330 if (option != NULL) { 5331 /* We have a fallback value */ 5332 error = 0; 5333 goto copyout; 5334 } 5335 error = EPROTO; 5336 eprintsoline(so, error); 5337 goto done; 5338 } 5339 option = &opt_res[1]; 5340 5341 /* check to ensure that the option is within bounds */ 5342 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5343 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5344 if (option != NULL) { 5345 /* We have a fallback value */ 5346 error = 0; 5347 goto copyout; 5348 } 5349 error = EPROTO; 5350 eprintsoline(so, error); 5351 goto done; 5352 } 5353 5354 len = opt_res->len; 5355 5356 copyout: { 5357 t_uscalar_t size = MIN(len, maxlen); 5358 bcopy(option, optval, size); 5359 bcopy(&size, optlenp, sizeof (size)); 5360 } 5361 done: 5362 freemsg(mp); 5363 done2: 5364 so_unlock_single(so, SOLOCKED); 5365 mutex_exit(&so->so_lock); 5366 5367 return (error); 5368 } 5369 5370 /* 5371 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5372 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5373 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5374 * setsockopt has to work even if the transport does not support the option. 5375 */ 5376 /* ARGSUSED */ 5377 int 5378 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5379 const void *optval, t_uscalar_t optlen, struct cred *cr) 5380 { 5381 struct T_optmgmt_req optmgmt_req; 5382 struct opthdr oh; 5383 mblk_t *mp; 5384 int error = 0; 5385 boolean_t handled = B_FALSE; 5386 5387 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5388 (void *)so, level, option_name, optval, optlen, 5389 pr_state(so->so_state, so->so_mode))); 5390 5391 /* X/Open requires this check */ 5392 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5393 if (xnet_check_print) 5394 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5395 return (EINVAL); 5396 } 5397 5398 mutex_enter(&so->so_lock); 5399 so_lock_single(so); /* Set SOLOCKED */ 5400 mutex_exit(&so->so_lock); 5401 5402 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5403 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5404 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5405 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5406 5407 oh.level = level; 5408 oh.name = option_name; 5409 oh.len = optlen; 5410 5411 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5412 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5413 /* Let option management work in the presence of data flow control */ 5414 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5415 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5416 mp = NULL; 5417 mutex_enter(&so->so_lock); 5418 if (error) { 5419 eprintsoline(so, error); 5420 goto done2; 5421 } 5422 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5423 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5424 if (error) { 5425 eprintsoline(so, error); 5426 goto done; 5427 } 5428 ASSERT(mp); 5429 /* No need to verify T_optmgmt_ack */ 5430 freemsg(mp); 5431 done: 5432 /* 5433 * Check for SOL_SOCKET options and record their values. 5434 * If we know about a SOL_SOCKET parameter and the transport 5435 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5436 * EPROTO) we let the setsockopt succeed. 5437 */ 5438 if (level == SOL_SOCKET) { 5439 /* Check parameters */ 5440 switch (option_name) { 5441 case SO_DEBUG: 5442 case SO_REUSEADDR: 5443 case SO_KEEPALIVE: 5444 case SO_DONTROUTE: 5445 case SO_BROADCAST: 5446 case SO_USELOOPBACK: 5447 case SO_OOBINLINE: 5448 case SO_SNDBUF: 5449 case SO_RCVBUF: 5450 #ifdef notyet 5451 case SO_SNDLOWAT: 5452 case SO_RCVLOWAT: 5453 #endif /* notyet */ 5454 case SO_DGRAM_ERRIND: 5455 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5456 error = EINVAL; 5457 eprintsoline(so, error); 5458 goto done2; 5459 } 5460 ASSERT(optval); 5461 handled = B_TRUE; 5462 break; 5463 case SO_SNDTIMEO: 5464 case SO_RCVTIMEO: 5465 if (get_udatamodel() == DATAMODEL_NONE || 5466 get_udatamodel() == DATAMODEL_NATIVE) { 5467 if (optlen != sizeof (struct timeval)) { 5468 error = EINVAL; 5469 eprintsoline(so, error); 5470 goto done2; 5471 } 5472 } else { 5473 if (optlen != sizeof (struct timeval32)) { 5474 error = EINVAL; 5475 eprintsoline(so, error); 5476 goto done2; 5477 } 5478 } 5479 ASSERT(optval); 5480 handled = B_TRUE; 5481 break; 5482 case SO_LINGER: 5483 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5484 error = EINVAL; 5485 eprintsoline(so, error); 5486 goto done2; 5487 } 5488 ASSERT(optval); 5489 handled = B_TRUE; 5490 break; 5491 } 5492 5493 #define intvalue (*(int32_t *)optval) 5494 5495 switch (option_name) { 5496 case SO_TYPE: 5497 case SO_ERROR: 5498 case SO_ACCEPTCONN: 5499 /* Can't be set */ 5500 error = ENOPROTOOPT; 5501 goto done2; 5502 case SO_LINGER: { 5503 struct linger *l = (struct linger *)optval; 5504 5505 so->so_linger.l_linger = l->l_linger; 5506 if (l->l_onoff) { 5507 so->so_linger.l_onoff = SO_LINGER; 5508 so->so_options |= SO_LINGER; 5509 } else { 5510 so->so_linger.l_onoff = 0; 5511 so->so_options &= ~SO_LINGER; 5512 } 5513 break; 5514 } 5515 5516 case SO_DEBUG: 5517 #ifdef SOCK_TEST 5518 if (intvalue & 2) 5519 sock_test_timelimit = 10 * hz; 5520 else 5521 sock_test_timelimit = 0; 5522 5523 if (intvalue & 4) 5524 do_useracc = 0; 5525 else 5526 do_useracc = 1; 5527 #endif /* SOCK_TEST */ 5528 /* FALLTHRU */ 5529 case SO_REUSEADDR: 5530 case SO_KEEPALIVE: 5531 case SO_DONTROUTE: 5532 case SO_BROADCAST: 5533 case SO_USELOOPBACK: 5534 case SO_OOBINLINE: 5535 case SO_DGRAM_ERRIND: 5536 if (intvalue != 0) { 5537 dprintso(so, 1, 5538 ("socket_setsockopt: setting 0x%x\n", 5539 option_name)); 5540 so->so_options |= option_name; 5541 } else { 5542 dprintso(so, 1, 5543 ("socket_setsockopt: clearing 0x%x\n", 5544 option_name)); 5545 so->so_options &= ~option_name; 5546 } 5547 break; 5548 /* 5549 * The following options are only returned by us when the 5550 * transport layer fails. 5551 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5552 * since the transport might adjust the value and not 5553 * return exactly what was set by the application. 5554 */ 5555 case SO_SNDBUF: 5556 so->so_sndbuf = intvalue; 5557 break; 5558 case SO_RCVBUF: 5559 so->so_rcvbuf = intvalue; 5560 break; 5561 case SO_RCVPSH: 5562 so->so_rcv_timer_interval = intvalue; 5563 break; 5564 #ifdef notyet 5565 /* 5566 * We do not implement the semantics of these options 5567 * thus we shouldn't implement the options either. 5568 */ 5569 case SO_SNDLOWAT: 5570 so->so_sndlowat = intvalue; 5571 break; 5572 case SO_RCVLOWAT: 5573 so->so_rcvlowat = intvalue; 5574 break; 5575 #endif /* notyet */ 5576 case SO_SNDTIMEO: 5577 case SO_RCVTIMEO: { 5578 struct timeval tl; 5579 clock_t val; 5580 5581 if (get_udatamodel() == DATAMODEL_NONE || 5582 get_udatamodel() == DATAMODEL_NATIVE) 5583 bcopy(&tl, (struct timeval *)optval, 5584 sizeof (struct timeval)); 5585 else 5586 TIMEVAL32_TO_TIMEVAL(&tl, 5587 (struct timeval32 *)optval); 5588 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5589 if (option_name == SO_RCVTIMEO) 5590 so->so_rcvtimeo = drv_usectohz(val); 5591 else 5592 so->so_sndtimeo = drv_usectohz(val); 5593 break; 5594 } 5595 } 5596 #undef intvalue 5597 5598 if (error) { 5599 if ((error == ENOPROTOOPT || error == EPROTO || 5600 error == EINVAL) && handled) { 5601 dprintso(so, 1, 5602 ("setsockopt: ignoring error %d for 0x%x\n", 5603 error, option_name)); 5604 error = 0; 5605 } 5606 } 5607 } 5608 done2: 5609 so_unlock_single(so, SOLOCKED); 5610 mutex_exit(&so->so_lock); 5611 return (error); 5612 } 5613 5614 /* 5615 * sotpi_close() is called when the last open reference goes away. 5616 */ 5617 /* ARGSUSED */ 5618 int 5619 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5620 { 5621 struct vnode *vp = SOTOV(so); 5622 dev_t dev; 5623 int error = 0; 5624 sotpi_info_t *sti = SOTOTPI(so); 5625 5626 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5627 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5628 5629 dev = sti->sti_dev; 5630 5631 ASSERT(STREAMSTAB(getmajor(dev))); 5632 5633 mutex_enter(&so->so_lock); 5634 so_lock_single(so); /* Set SOLOCKED */ 5635 5636 ASSERT(so_verify_oobstate(so)); 5637 5638 if (vp->v_stream != NULL) { 5639 vnode_t *ux_vp; 5640 5641 if (so->so_family == AF_UNIX) { 5642 /* Could avoid this when CANTSENDMORE for !dgram */ 5643 so_unix_close(so); 5644 } 5645 5646 mutex_exit(&so->so_lock); 5647 /* 5648 * Disassemble the linkage from the AF_UNIX underlying file 5649 * system vnode to this socket (by atomically clearing 5650 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5651 * and frees the stream head. 5652 */ 5653 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5654 ASSERT(ux_vp->v_stream); 5655 sti->sti_ux_bound_vp = NULL; 5656 vn_rele_stream(ux_vp); 5657 } 5658 error = strclose(vp, flag, cr); 5659 vp->v_stream = NULL; 5660 mutex_enter(&so->so_lock); 5661 } 5662 5663 /* 5664 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5665 */ 5666 so_flush_discon_ind(so); 5667 5668 so_unlock_single(so, SOLOCKED); 5669 mutex_exit(&so->so_lock); 5670 5671 /* 5672 * Needed for STREAMs. 5673 * Decrement the device driver's reference count for streams 5674 * opened via the clone dip. The driver was held in clone_open(). 5675 * The absence of clone_close() forces this asymmetry. 5676 */ 5677 if (so->so_flag & SOCLONE) 5678 ddi_rele_driver(getmajor(dev)); 5679 5680 return (error); 5681 } 5682 5683 static int 5684 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5685 struct cred *cr, int32_t *rvalp) 5686 { 5687 struct vnode *vp = SOTOV(so); 5688 sotpi_info_t *sti = SOTOTPI(so); 5689 int error = 0; 5690 5691 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5692 cmd, arg, pr_state(so->so_state, so->so_mode))); 5693 5694 switch (cmd) { 5695 case SIOCSQPTR: 5696 /* 5697 * SIOCSQPTR is valid only when helper stream is created 5698 * by the protocol. 5699 */ 5700 case _I_INSERT: 5701 case _I_REMOVE: 5702 /* 5703 * Since there's no compelling reason to support these ioctls 5704 * on sockets, and doing so would increase the complexity 5705 * markedly, prevent it. 5706 */ 5707 return (EOPNOTSUPP); 5708 5709 case I_FIND: 5710 case I_LIST: 5711 case I_LOOK: 5712 case I_POP: 5713 case I_PUSH: 5714 /* 5715 * To prevent races and inconsistencies between the actual 5716 * state of the stream and the state according to the sonode, 5717 * we serialize all operations which modify or operate on the 5718 * list of modules on the socket's stream. 5719 */ 5720 mutex_enter(&sti->sti_plumb_lock); 5721 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5722 mutex_exit(&sti->sti_plumb_lock); 5723 return (error); 5724 5725 default: 5726 if (so->so_version != SOV_STREAM) 5727 break; 5728 5729 /* 5730 * The imaginary "sockmod" has been popped; act as a stream. 5731 */ 5732 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5733 } 5734 5735 ASSERT(so->so_version != SOV_STREAM); 5736 5737 /* 5738 * Process socket-specific ioctls. 5739 */ 5740 switch (cmd) { 5741 case FIONBIO: { 5742 int32_t value; 5743 5744 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5745 (mode & (int)FKIOCTL))) 5746 return (EFAULT); 5747 5748 mutex_enter(&so->so_lock); 5749 if (value) { 5750 so->so_state |= SS_NDELAY; 5751 } else { 5752 so->so_state &= ~SS_NDELAY; 5753 } 5754 mutex_exit(&so->so_lock); 5755 return (0); 5756 } 5757 5758 case FIOASYNC: { 5759 int32_t value; 5760 5761 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5762 (mode & (int)FKIOCTL))) 5763 return (EFAULT); 5764 5765 mutex_enter(&so->so_lock); 5766 /* 5767 * SS_ASYNC flag not already set correctly? 5768 * (!value != !(so->so_state & SS_ASYNC)) 5769 * but some engineers find that too hard to read. 5770 */ 5771 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5772 value != 0 && (so->so_state & SS_ASYNC) == 0) 5773 error = so_flip_async(so, vp, mode, cr); 5774 mutex_exit(&so->so_lock); 5775 return (error); 5776 } 5777 5778 case SIOCSPGRP: 5779 case FIOSETOWN: { 5780 pid_t pgrp; 5781 5782 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5783 (mode & (int)FKIOCTL))) 5784 return (EFAULT); 5785 5786 mutex_enter(&so->so_lock); 5787 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5788 /* Any change? */ 5789 if (pgrp != so->so_pgrp) 5790 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5791 mutex_exit(&so->so_lock); 5792 return (error); 5793 } 5794 case SIOCGPGRP: 5795 case FIOGETOWN: 5796 if (so_copyout(&so->so_pgrp, (void *)arg, 5797 sizeof (pid_t), (mode & (int)FKIOCTL))) 5798 return (EFAULT); 5799 return (0); 5800 5801 case SIOCATMARK: { 5802 int retval; 5803 uint_t so_state; 5804 5805 /* 5806 * strwaitmark has a finite timeout after which it 5807 * returns -1 if the mark state is undetermined. 5808 * In order to avoid any race between the mark state 5809 * in sockfs and the mark state in the stream head this 5810 * routine loops until the mark state can be determined 5811 * (or the urgent data indication has been removed by some 5812 * other thread). 5813 */ 5814 do { 5815 mutex_enter(&so->so_lock); 5816 so_state = so->so_state; 5817 mutex_exit(&so->so_lock); 5818 if (so_state & SS_RCVATMARK) { 5819 retval = 1; 5820 } else if (!(so_state & SS_OOBPEND)) { 5821 /* 5822 * No SIGURG has been generated -- there is no 5823 * pending or present urgent data. Thus can't 5824 * possibly be at the mark. 5825 */ 5826 retval = 0; 5827 } else { 5828 /* 5829 * Have the stream head wait until there is 5830 * either some messages on the read queue, or 5831 * STRATMARK or STRNOTATMARK gets set. The 5832 * STRNOTATMARK flag is used so that the 5833 * transport can send up a MSGNOTMARKNEXT 5834 * M_DATA to indicate that it is not 5835 * at the mark and additional data is not about 5836 * to be send upstream. 5837 * 5838 * If the mark state is undetermined this will 5839 * return -1 and we will loop rechecking the 5840 * socket state. 5841 */ 5842 retval = strwaitmark(vp); 5843 } 5844 } while (retval == -1); 5845 5846 if (so_copyout(&retval, (void *)arg, sizeof (int), 5847 (mode & (int)FKIOCTL))) 5848 return (EFAULT); 5849 return (0); 5850 } 5851 5852 case I_FDINSERT: 5853 case I_SENDFD: 5854 case I_RECVFD: 5855 case I_ATMARK: 5856 case _SIOCSOCKFALLBACK: 5857 /* 5858 * These ioctls do not apply to sockets. I_FDINSERT can be 5859 * used to send M_PROTO messages without modifying the socket 5860 * state. I_SENDFD/RECVFD should not be used for socket file 5861 * descriptor passing since they assume a twisted stream. 5862 * SIOCATMARK must be used instead of I_ATMARK. 5863 * 5864 * _SIOCSOCKFALLBACK from an application should never be 5865 * processed. It is only generated by socktpi_open() or 5866 * in response to I_POP or I_PUSH. 5867 */ 5868 #ifdef DEBUG 5869 zcmn_err(getzoneid(), CE_WARN, 5870 "Unsupported STREAMS ioctl 0x%x on socket. " 5871 "Pid = %d\n", cmd, curproc->p_pid); 5872 #endif /* DEBUG */ 5873 return (EOPNOTSUPP); 5874 5875 case _I_GETPEERCRED: 5876 if ((mode & FKIOCTL) == 0) 5877 return (EINVAL); 5878 5879 mutex_enter(&so->so_lock); 5880 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 5881 error = ENOTSUP; 5882 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 5883 error = ENOTCONN; 5884 } else if (so->so_peercred != NULL) { 5885 k_peercred_t *kp = (k_peercred_t *)arg; 5886 kp->pc_cr = so->so_peercred; 5887 kp->pc_cpid = so->so_cpid; 5888 crhold(so->so_peercred); 5889 } else { 5890 error = EINVAL; 5891 } 5892 mutex_exit(&so->so_lock); 5893 return (error); 5894 5895 default: 5896 /* 5897 * Do the higher-order bits of the ioctl cmd indicate 5898 * that it is an I_* streams ioctl? 5899 */ 5900 if ((cmd & 0xffffff00U) == STR && 5901 so->so_version == SOV_SOCKBSD) { 5902 #ifdef DEBUG 5903 zcmn_err(getzoneid(), CE_WARN, 5904 "Unsupported STREAMS ioctl 0x%x on socket. " 5905 "Pid = %d\n", cmd, curproc->p_pid); 5906 #endif /* DEBUG */ 5907 return (EOPNOTSUPP); 5908 } 5909 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5910 } 5911 } 5912 5913 /* 5914 * Handle plumbing-related ioctls. 5915 */ 5916 static int 5917 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 5918 struct cred *cr, int32_t *rvalp) 5919 { 5920 static const char sockmod_name[] = "sockmod"; 5921 struct sonode *so = VTOSO(vp); 5922 char mname[FMNAMESZ + 1]; 5923 int error; 5924 sotpi_info_t *sti = SOTOTPI(so); 5925 5926 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 5927 5928 if (so->so_version == SOV_SOCKBSD) 5929 return (EOPNOTSUPP); 5930 5931 if (so->so_version == SOV_STREAM) { 5932 /* 5933 * The imaginary "sockmod" has been popped - act as a stream. 5934 * If this is a push of sockmod then change back to a socket. 5935 */ 5936 if (cmd == I_PUSH) { 5937 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 5938 (void *)arg, mname, sizeof (mname), NULL); 5939 5940 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 5941 dprintso(so, 0, ("socktpi_ioctl: going to " 5942 "socket version\n")); 5943 so_stream2sock(so); 5944 return (0); 5945 } 5946 } 5947 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5948 } 5949 5950 switch (cmd) { 5951 case I_PUSH: 5952 if (sti->sti_direct) { 5953 mutex_enter(&so->so_lock); 5954 so_lock_single(so); 5955 mutex_exit(&so->so_lock); 5956 5957 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 5958 cr, rvalp); 5959 5960 mutex_enter(&so->so_lock); 5961 if (error == 0) 5962 sti->sti_direct = 0; 5963 so_unlock_single(so, SOLOCKED); 5964 mutex_exit(&so->so_lock); 5965 5966 if (error != 0) 5967 return (error); 5968 } 5969 5970 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5971 if (error == 0) 5972 sti->sti_pushcnt++; 5973 return (error); 5974 5975 case I_POP: 5976 if (sti->sti_pushcnt == 0) { 5977 /* Emulate sockmod being popped */ 5978 dprintso(so, 0, 5979 ("socktpi_ioctl: going to STREAMS version\n")); 5980 return (so_sock2stream(so)); 5981 } 5982 5983 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5984 if (error == 0) 5985 sti->sti_pushcnt--; 5986 return (error); 5987 5988 case I_LIST: { 5989 struct str_mlist *kmlistp, *umlistp; 5990 struct str_list kstrlist; 5991 ssize_t kstrlistsize; 5992 int i, nmods; 5993 5994 STRUCT_DECL(str_list, ustrlist); 5995 STRUCT_INIT(ustrlist, mode); 5996 5997 if (arg == 0) { 5998 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5999 if (error == 0) 6000 (*rvalp)++; /* Add one for sockmod */ 6001 return (error); 6002 } 6003 6004 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6005 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6006 if (error != 0) 6007 return (error); 6008 6009 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6010 if (nmods <= 0) 6011 return (EINVAL); 6012 /* 6013 * Ceiling nmods at nstrpush to prevent someone from 6014 * maliciously consuming lots of kernel memory. 6015 */ 6016 nmods = MIN(nmods, nstrpush); 6017 6018 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6019 kstrlist.sl_nmods = nmods; 6020 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6021 6022 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6023 cr, rvalp); 6024 if (error != 0) 6025 goto done; 6026 6027 /* 6028 * Considering the module list as a 0-based array of sl_nmods 6029 * modules, sockmod should conceptually exist at slot 6030 * sti_pushcnt. Insert sockmod at this location by sliding all 6031 * of the module names after so_pushcnt over by one. We know 6032 * that there will be room to do this since we allocated 6033 * sl_modlist with an additional slot. 6034 */ 6035 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6036 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6037 6038 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6039 kstrlist.sl_nmods++; 6040 6041 /* 6042 * Copy all of the entries out to ustrlist. 6043 */ 6044 kmlistp = kstrlist.sl_modlist; 6045 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6046 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6047 error = so_copyout(kmlistp++, umlistp++, 6048 sizeof (struct str_mlist), mode & FKIOCTL); 6049 if (error != 0) 6050 goto done; 6051 } 6052 6053 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6054 mode & FKIOCTL); 6055 if (error == 0) 6056 *rvalp = 0; 6057 done: 6058 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6059 return (error); 6060 } 6061 case I_LOOK: 6062 if (sti->sti_pushcnt == 0) { 6063 return (so_copyout(sockmod_name, (void *)arg, 6064 sizeof (sockmod_name), mode & FKIOCTL)); 6065 } 6066 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6067 6068 case I_FIND: 6069 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6070 if (error && error != EINVAL) 6071 return (error); 6072 6073 /* if not found and string was sockmod return 1 */ 6074 if (*rvalp == 0 || error == EINVAL) { 6075 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6076 (void *)arg, mname, sizeof (mname), NULL); 6077 if (error == ENAMETOOLONG) 6078 error = EINVAL; 6079 6080 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6081 *rvalp = 1; 6082 } 6083 return (error); 6084 6085 default: 6086 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6087 break; 6088 } 6089 6090 return (0); 6091 } 6092 6093 /* 6094 * Wrapper around the streams poll routine that implements socket poll 6095 * semantics. 6096 * The sockfs never calls pollwakeup itself - the stream head take care 6097 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6098 * stream head there can never be a deadlock due to holding so_lock across 6099 * pollwakeup and acquiring so_lock in this routine. 6100 * 6101 * However, since the performance of VOP_POLL is critical we avoid 6102 * acquiring so_lock here. This is based on two assumptions: 6103 * - The poll implementation holds locks to serialize the VOP_POLL call 6104 * and a pollwakeup for the same pollhead. This ensures that should 6105 * e.g. so_state change during a socktpi_poll call the pollwakeup 6106 * (which strsock_* and strrput conspire to issue) is issued after 6107 * the state change. Thus the pollwakeup will block until VOP_POLL has 6108 * returned and then wake up poll and have it call VOP_POLL again. 6109 * - The reading of so_state without holding so_lock does not result in 6110 * stale data that is older than the latest state change that has dropped 6111 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6112 * memory barrier to force the data into the coherency domain. 6113 */ 6114 static int 6115 sotpi_poll( 6116 struct sonode *so, 6117 short events, 6118 int anyyet, 6119 short *reventsp, 6120 struct pollhead **phpp) 6121 { 6122 short origevents = events; 6123 struct vnode *vp = SOTOV(so); 6124 int error; 6125 int so_state = so->so_state; /* snapshot */ 6126 sotpi_info_t *sti = SOTOTPI(so); 6127 6128 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6129 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6130 6131 ASSERT(vp->v_type == VSOCK); 6132 ASSERT(vp->v_stream != NULL); 6133 6134 if (so->so_version == SOV_STREAM) { 6135 /* The imaginary "sockmod" has been popped - act as a stream */ 6136 return (strpoll(vp->v_stream, events, anyyet, 6137 reventsp, phpp)); 6138 } 6139 6140 if (!(so_state & SS_ISCONNECTED) && 6141 (so->so_mode & SM_CONNREQUIRED)) { 6142 /* Not connected yet - turn off write side events */ 6143 events &= ~(POLLOUT|POLLWRBAND); 6144 } 6145 /* 6146 * Check for errors without calling strpoll if the caller wants them. 6147 * In sockets the errors are represented as input/output events 6148 * and there is no need to ask the stream head for this information. 6149 */ 6150 if (so->so_error != 0 && 6151 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6152 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6153 return (0); 6154 } 6155 /* 6156 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6157 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6158 * will not trigger a POLLIN event with POLLRDDATA set. 6159 * The handling of urgent data (causing POLLRDBAND) is done by 6160 * inspecting SS_OOBPEND below. 6161 */ 6162 events |= POLLRDDATA; 6163 6164 /* 6165 * After shutdown(output) a stream head write error is set. 6166 * However, we should not return output events. 6167 */ 6168 events |= POLLNOERR; 6169 error = strpoll(vp->v_stream, events, anyyet, 6170 reventsp, phpp); 6171 if (error) 6172 return (error); 6173 6174 ASSERT(!(*reventsp & POLLERR)); 6175 6176 /* 6177 * Notes on T_CONN_IND handling for sockets. 6178 * 6179 * If strpoll() returned without events, SR_POLLIN is guaranteed 6180 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6181 * 6182 * Since the so_lock is not held, soqueueconnind() may have run 6183 * and a T_CONN_IND may be waiting. We now check for any queued 6184 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6185 * to ensure poll returns. 6186 * 6187 * However: 6188 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6189 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6190 * the following actions will occur; taken together they ensure the 6191 * syscall will return. 6192 * 6193 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6194 * the accept() was run on a non-blocking socket sowaitconnind() 6195 * may have already returned EWOULDBLOCK, so not be waiting to 6196 * process the message. Additionally socktpi_poll() has probably 6197 * proceeded past the sti_conn_ind_head check below. 6198 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6199 * this thread, however that could occur before poll_common() 6200 * has entered cv_wait. 6201 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6202 * 6203 * Before proceeding to cv_wait() in poll_common() for an event, 6204 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6205 * and if set, re-calls strpoll() to ensure the late arriving 6206 * T_CONN_IND is recognized, and pollsys() returns. 6207 */ 6208 6209 if (sti->sti_conn_ind_head != NULL) 6210 *reventsp |= (POLLIN|POLLRDNORM) & events; 6211 6212 if (so->so_state & SS_CANTRCVMORE) { 6213 *reventsp |= POLLRDHUP & events; 6214 6215 if (so->so_state & SS_CANTSENDMORE) 6216 *reventsp |= POLLHUP; 6217 } 6218 6219 if (so->so_state & SS_OOBPEND) 6220 *reventsp |= POLLRDBAND & events; 6221 6222 return (0); 6223 } 6224 6225 /*ARGSUSED*/ 6226 static int 6227 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6228 { 6229 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6230 int error = 0; 6231 6232 error = sonode_constructor(buf, cdrarg, kmflags); 6233 if (error != 0) 6234 return (error); 6235 6236 error = i_sotpi_info_constructor(&st->st_info); 6237 if (error != 0) 6238 sonode_destructor(buf, cdrarg); 6239 6240 st->st_sonode.so_priv = &st->st_info; 6241 6242 return (error); 6243 } 6244 6245 /*ARGSUSED1*/ 6246 static void 6247 socktpi_destructor(void *buf, void *cdrarg) 6248 { 6249 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6250 6251 ASSERT(st->st_sonode.so_priv == &st->st_info); 6252 st->st_sonode.so_priv = NULL; 6253 6254 i_sotpi_info_destructor(&st->st_info); 6255 sonode_destructor(buf, cdrarg); 6256 } 6257 6258 static int 6259 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6260 { 6261 int retval; 6262 6263 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6264 struct sonode *so = (struct sonode *)buf; 6265 sotpi_info_t *sti = SOTOTPI(so); 6266 6267 mutex_enter(&socklist.sl_lock); 6268 6269 sti->sti_next_so = socklist.sl_list; 6270 sti->sti_prev_so = NULL; 6271 if (sti->sti_next_so != NULL) 6272 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6273 socklist.sl_list = so; 6274 6275 mutex_exit(&socklist.sl_lock); 6276 6277 } 6278 return (retval); 6279 } 6280 6281 static void 6282 socktpi_unix_destructor(void *buf, void *cdrarg) 6283 { 6284 struct sonode *so = (struct sonode *)buf; 6285 sotpi_info_t *sti = SOTOTPI(so); 6286 6287 mutex_enter(&socklist.sl_lock); 6288 6289 if (sti->sti_next_so != NULL) 6290 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6291 if (sti->sti_prev_so != NULL) 6292 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6293 else 6294 socklist.sl_list = sti->sti_next_so; 6295 6296 mutex_exit(&socklist.sl_lock); 6297 6298 socktpi_destructor(buf, cdrarg); 6299 } 6300 6301 int 6302 socktpi_init(void) 6303 { 6304 /* 6305 * Create sonode caches. We create a special one for AF_UNIX so 6306 * that we can track them for netstat(8). 6307 */ 6308 socktpi_cache = kmem_cache_create("socktpi_cache", 6309 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6310 socktpi_destructor, NULL, NULL, NULL, 0); 6311 6312 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6313 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6314 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6315 6316 return (0); 6317 } 6318 6319 /* 6320 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6321 * 6322 * Caller must still update state and mode using sotpi_update_state(). 6323 */ 6324 int 6325 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6326 boolean_t *direct, queue_t **qp, struct cred *cr) 6327 { 6328 sotpi_info_t *sti; 6329 struct sockparams *origsp = so->so_sockparams; 6330 sock_lower_handle_t handle = so->so_proto_handle; 6331 struct stdata *stp; 6332 struct vnode *vp; 6333 queue_t *q; 6334 int error = 0; 6335 6336 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6337 SS_FALLBACK_PENDING); 6338 ASSERT(SOCK_IS_NONSTR(so)); 6339 6340 *qp = NULL; 6341 *direct = B_FALSE; 6342 so->so_sockparams = newsp; 6343 /* 6344 * Allocate and initalize fields required by TPI. 6345 */ 6346 (void) sotpi_info_create(so, KM_SLEEP); 6347 sotpi_info_init(so); 6348 6349 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6350 sotpi_info_fini(so); 6351 sotpi_info_destroy(so); 6352 return (error); 6353 } 6354 ASSERT(handle == so->so_proto_handle); 6355 sti = SOTOTPI(so); 6356 if (sti->sti_direct != 0) 6357 *direct = B_TRUE; 6358 6359 /* 6360 * Keep the original sp around so we can properly dispose of the 6361 * sonode when the socket is being closed. 6362 */ 6363 sti->sti_orig_sp = origsp; 6364 6365 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6366 so_alloc_addr(so, so->so_max_addr_len); 6367 6368 /* 6369 * If the application has done a SIOCSPGRP, make sure the 6370 * STREAM head is aware. This needs to take place before 6371 * the protocol start sending up messages. Otherwise we 6372 * might miss to generate SIGPOLL. 6373 * 6374 * It is possible that the application will receive duplicate 6375 * signals if some were already generated for either data or 6376 * connection indications. 6377 */ 6378 if (so->so_pgrp != 0) { 6379 if (so_set_events(so, so->so_vnode, cr) != 0) 6380 so->so_pgrp = 0; 6381 } 6382 6383 /* 6384 * Determine which queue to use. 6385 */ 6386 vp = SOTOV(so); 6387 stp = vp->v_stream; 6388 ASSERT(stp != NULL); 6389 q = stp->sd_wrq->q_next; 6390 6391 /* 6392 * Skip any modules that may have been auto pushed when the device 6393 * was opened 6394 */ 6395 while (q->q_next != NULL) 6396 q = q->q_next; 6397 *qp = _RD(q); 6398 6399 /* This is now a STREAMS sockets */ 6400 so->so_not_str = B_FALSE; 6401 6402 return (error); 6403 } 6404 6405 /* 6406 * Revert a TPI sonode. It is only allowed to revert the sonode during 6407 * the fallback process. 6408 */ 6409 void 6410 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6411 { 6412 vnode_t *vp = SOTOV(so); 6413 6414 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6415 SS_FALLBACK_PENDING); 6416 ASSERT(!SOCK_IS_NONSTR(so)); 6417 ASSERT(vp->v_stream != NULL); 6418 6419 strclean(vp); 6420 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6421 6422 /* 6423 * Restore the original sockparams. The caller is responsible for 6424 * dropping the ref to the new sp. 6425 */ 6426 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6427 6428 sotpi_info_fini(so); 6429 sotpi_info_destroy(so); 6430 6431 /* This is no longer a STREAMS sockets */ 6432 so->so_not_str = B_TRUE; 6433 } 6434 6435 void 6436 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6437 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6438 socklen_t faddrlen, short opts) 6439 { 6440 sotpi_info_t *sti = SOTOTPI(so); 6441 6442 so_proc_tcapability_ack(so, tcap); 6443 6444 so->so_options |= opts; 6445 6446 /* 6447 * Determine whether the foreign and local address are valid 6448 */ 6449 if (laddrlen != 0) { 6450 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6451 sti->sti_laddr_len = laddrlen; 6452 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6453 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6454 } 6455 6456 if (faddrlen != 0) { 6457 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6458 sti->sti_faddr_len = faddrlen; 6459 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6460 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6461 } 6462 6463 } 6464 6465 /* 6466 * Allocate enough space to cache the local and foreign addresses. 6467 */ 6468 void 6469 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6470 { 6471 sotpi_info_t *sti = SOTOTPI(so); 6472 6473 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6474 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6475 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6476 P2ROUNDUP(maxlen, KMEM_ALIGN); 6477 so->so_max_addr_len = sti->sti_laddr_maxlen; 6478 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6479 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6480 + sti->sti_laddr_maxlen); 6481 6482 if (so->so_family == AF_UNIX) { 6483 /* 6484 * Initialize AF_UNIX related fields. 6485 */ 6486 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6487 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6488 } 6489 } 6490 6491 6492 sotpi_info_t * 6493 sotpi_sototpi(struct sonode *so) 6494 { 6495 sotpi_info_t *sti; 6496 6497 ASSERT(so != NULL); 6498 6499 sti = (sotpi_info_t *)so->so_priv; 6500 6501 ASSERT(sti != NULL); 6502 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6503 6504 return (sti); 6505 } 6506 6507 static int 6508 i_sotpi_info_constructor(sotpi_info_t *sti) 6509 { 6510 sti->sti_magic = SOTPI_INFO_MAGIC; 6511 sti->sti_ack_mp = NULL; 6512 sti->sti_discon_ind_mp = NULL; 6513 sti->sti_ux_bound_vp = NULL; 6514 sti->sti_unbind_mp = NULL; 6515 6516 sti->sti_conn_ind_head = NULL; 6517 sti->sti_conn_ind_tail = NULL; 6518 6519 sti->sti_laddr_sa = NULL; 6520 sti->sti_faddr_sa = NULL; 6521 6522 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6523 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6524 6525 return (0); 6526 } 6527 6528 static void 6529 i_sotpi_info_destructor(sotpi_info_t *sti) 6530 { 6531 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6532 ASSERT(sti->sti_ack_mp == NULL); 6533 ASSERT(sti->sti_discon_ind_mp == NULL); 6534 ASSERT(sti->sti_ux_bound_vp == NULL); 6535 ASSERT(sti->sti_unbind_mp == NULL); 6536 6537 ASSERT(sti->sti_conn_ind_head == NULL); 6538 ASSERT(sti->sti_conn_ind_tail == NULL); 6539 6540 ASSERT(sti->sti_laddr_sa == NULL); 6541 ASSERT(sti->sti_faddr_sa == NULL); 6542 6543 mutex_destroy(&sti->sti_plumb_lock); 6544 cv_destroy(&sti->sti_ack_cv); 6545 } 6546 6547 /* 6548 * Creates and attaches TPI information to the given sonode 6549 */ 6550 static boolean_t 6551 sotpi_info_create(struct sonode *so, int kmflags) 6552 { 6553 sotpi_info_t *sti; 6554 6555 ASSERT(so->so_priv == NULL); 6556 6557 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6558 return (B_FALSE); 6559 6560 if (i_sotpi_info_constructor(sti) != 0) { 6561 kmem_free(sti, sizeof (*sti)); 6562 return (B_FALSE); 6563 } 6564 6565 so->so_priv = (void *)sti; 6566 return (B_TRUE); 6567 } 6568 6569 /* 6570 * Initializes the TPI information. 6571 */ 6572 static void 6573 sotpi_info_init(struct sonode *so) 6574 { 6575 struct vnode *vp = SOTOV(so); 6576 sotpi_info_t *sti = SOTOTPI(so); 6577 time_t now; 6578 6579 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6580 vp->v_rdev = sti->sti_dev; 6581 6582 sti->sti_orig_sp = NULL; 6583 6584 sti->sti_pushcnt = 0; 6585 6586 now = gethrestime_sec(); 6587 sti->sti_atime = now; 6588 sti->sti_mtime = now; 6589 sti->sti_ctime = now; 6590 6591 sti->sti_eaddr_mp = NULL; 6592 sti->sti_delayed_error = 0; 6593 6594 sti->sti_provinfo = NULL; 6595 6596 sti->sti_oobcnt = 0; 6597 sti->sti_oobsigcnt = 0; 6598 6599 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6600 6601 sti->sti_laddr_sa = 0; 6602 sti->sti_faddr_sa = 0; 6603 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6604 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6605 6606 sti->sti_laddr_valid = 0; 6607 sti->sti_faddr_valid = 0; 6608 sti->sti_faddr_noxlate = 0; 6609 6610 sti->sti_direct = 0; 6611 6612 ASSERT(sti->sti_ack_mp == NULL); 6613 ASSERT(sti->sti_ux_bound_vp == NULL); 6614 ASSERT(sti->sti_unbind_mp == NULL); 6615 6616 ASSERT(sti->sti_conn_ind_head == NULL); 6617 ASSERT(sti->sti_conn_ind_tail == NULL); 6618 } 6619 6620 /* 6621 * Given a sonode, grab the TPI info and free any data. 6622 */ 6623 static void 6624 sotpi_info_fini(struct sonode *so) 6625 { 6626 sotpi_info_t *sti = SOTOTPI(so); 6627 mblk_t *mp; 6628 6629 ASSERT(sti->sti_discon_ind_mp == NULL); 6630 6631 if ((mp = sti->sti_conn_ind_head) != NULL) { 6632 mblk_t *mp1; 6633 6634 while (mp) { 6635 mp1 = mp->b_next; 6636 mp->b_next = NULL; 6637 freemsg(mp); 6638 mp = mp1; 6639 } 6640 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6641 } 6642 6643 /* 6644 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6645 * indirect them. It also uses so_count as a validity test. 6646 */ 6647 mutex_enter(&so->so_lock); 6648 6649 if (sti->sti_laddr_sa) { 6650 ASSERT((caddr_t)sti->sti_faddr_sa == 6651 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6652 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6653 sti->sti_laddr_valid = 0; 6654 sti->sti_faddr_valid = 0; 6655 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6656 sti->sti_laddr_sa = NULL; 6657 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6658 sti->sti_faddr_sa = NULL; 6659 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6660 } 6661 6662 mutex_exit(&so->so_lock); 6663 6664 if ((mp = sti->sti_eaddr_mp) != NULL) { 6665 freemsg(mp); 6666 sti->sti_eaddr_mp = NULL; 6667 sti->sti_delayed_error = 0; 6668 } 6669 6670 if ((mp = sti->sti_ack_mp) != NULL) { 6671 freemsg(mp); 6672 sti->sti_ack_mp = NULL; 6673 } 6674 6675 ASSERT(sti->sti_ux_bound_vp == NULL); 6676 if ((mp = sti->sti_unbind_mp) != NULL) { 6677 freemsg(mp); 6678 sti->sti_unbind_mp = NULL; 6679 } 6680 } 6681 6682 /* 6683 * Destroys the TPI information attached to a sonode. 6684 */ 6685 static void 6686 sotpi_info_destroy(struct sonode *so) 6687 { 6688 sotpi_info_t *sti = SOTOTPI(so); 6689 6690 i_sotpi_info_destructor(sti); 6691 kmem_free(sti, sizeof (*sti)); 6692 6693 so->so_priv = NULL; 6694 } 6695 6696 /* 6697 * Create the global sotpi socket module entry. It will never be freed. 6698 */ 6699 smod_info_t * 6700 sotpi_smod_create(void) 6701 { 6702 smod_info_t *smodp; 6703 6704 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6705 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6706 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6707 /* 6708 * Initialize the smod_refcnt to 1 so it will never be freed. 6709 */ 6710 smodp->smod_refcnt = 1; 6711 smodp->smod_uc_version = SOCK_UC_VERSION; 6712 smodp->smod_dc_version = SOCK_DC_VERSION; 6713 smodp->smod_sock_create_func = &sotpi_create; 6714 smodp->smod_sock_destroy_func = &sotpi_destroy; 6715 return (smodp); 6716 } 6717