1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015, Joyent, Inc. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2022 Garrett D'Amore 27 * Copyright 2024 Oxide Computer Company 28 */ 29 30 #include <sys/types.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/kmem_impl.h> 39 #include <sys/sysmacros.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/debug.h> 43 #include <sys/errno.h> 44 #include <sys/time.h> 45 #include <sys/file.h> 46 #include <sys/open.h> 47 #include <sys/user.h> 48 #include <sys/termios.h> 49 #include <sys/stream.h> 50 #include <sys/strsubr.h> 51 #include <sys/strsun.h> 52 #include <sys/suntpi.h> 53 #include <sys/ddi.h> 54 #include <sys/esunddi.h> 55 #include <sys/flock.h> 56 #include <sys/modctl.h> 57 #include <sys/vtrace.h> 58 #include <sys/cmn_err.h> 59 #include <sys/pathname.h> 60 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/sockio.h> 64 #include <netinet/in.h> 65 #include <sys/un.h> 66 #include <sys/strsun.h> 67 68 #include <sys/tiuser.h> 69 #define _SUN_TPI_VERSION 2 70 #include <sys/tihdr.h> 71 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 72 73 #include <c2/audit.h> 74 75 #include <inet/common.h> 76 #include <inet/ip.h> 77 #include <inet/ip6.h> 78 #include <inet/tcp.h> 79 #include <inet/udp_impl.h> 80 81 #include <sys/zone.h> 82 83 #include <fs/sockfs/sockcommon.h> 84 #include <fs/sockfs/socktpi.h> 85 #include <fs/sockfs/socktpi_impl.h> 86 87 /* 88 * Possible failures when memory can't be allocated. The documented behavior: 89 * 90 * 5.5: 4.X: XNET: 91 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 92 * EINTR 93 * (4.X does not document EINTR but returns it) 94 * bind: ENOSR - ENOBUFS/ENOSR 95 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 96 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 97 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 98 * (4.X getpeername and getsockname do not fail in practice) 99 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 100 * listen: - - ENOBUFS 101 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 102 * EINTR 103 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 104 * EINTR 105 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 106 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 107 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 108 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 109 * 110 * Resolution. When allocation fails: 111 * recv: return EINTR 112 * send: return EINTR 113 * connect, accept: EINTR 114 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 115 * socket, socketpair: ENOBUFS 116 * getpeername, getsockname: sleep 117 * getsockopt, setsockopt: sleep 118 */ 119 120 #ifdef SOCK_TEST 121 /* 122 * Variables that make sockfs do something other than the standard TPI 123 * for the AF_INET transports. 124 * 125 * solisten_tpi_tcp: 126 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 127 * the transport is already bound. This is needed to avoid loosing the 128 * port number should listen() do a T_UNBIND_REQ followed by a 129 * O_T_BIND_REQ. 130 * 131 * soconnect_tpi_udp: 132 * UDP and ICMP can handle a T_CONN_REQ. 133 * This is needed to make the sequence of connect(), getsockname() 134 * return the local IP address used to send packets to the connected to 135 * destination. 136 * 137 * soconnect_tpi_tcp: 138 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 139 * Set this to non-zero to send TPI conformant messages to TCP in this 140 * respect. This is a performance optimization. 141 * 142 * soaccept_tpi_tcp: 143 * TCP can handle a T_CONN_REQ without the acceptor being bound. 144 * This is a performance optimization that has been picked up in XTI. 145 * 146 * soaccept_tpi_multioptions: 147 * When inheriting SOL_SOCKET options from the listener to the accepting 148 * socket send them as a single message for AF_INET{,6}. 149 */ 150 int solisten_tpi_tcp = 0; 151 int soconnect_tpi_udp = 0; 152 int soconnect_tpi_tcp = 0; 153 int soaccept_tpi_tcp = 0; 154 int soaccept_tpi_multioptions = 1; 155 #else /* SOCK_TEST */ 156 #define soconnect_tpi_tcp 0 157 #define soconnect_tpi_udp 0 158 #define solisten_tpi_tcp 0 159 #define soaccept_tpi_tcp 0 160 #define soaccept_tpi_multioptions 1 161 #endif /* SOCK_TEST */ 162 163 #ifdef SOCK_TEST 164 extern int do_useracc; 165 extern clock_t sock_test_timelimit; 166 #endif /* SOCK_TEST */ 167 168 extern uint32_t ucredsize; 169 170 /* 171 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 172 * applications working. Turn on this flag to disable these checks. 173 */ 174 int xnet_skip_checks = 0; 175 int xnet_check_print = 0; 176 int xnet_truncate_print = 0; 177 178 static void sotpi_destroy(struct sonode *); 179 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 180 int, int *, cred_t *cr); 181 182 static boolean_t sotpi_info_create(struct sonode *, int); 183 static void sotpi_info_init(struct sonode *); 184 static void sotpi_info_fini(struct sonode *); 185 static void sotpi_info_destroy(struct sonode *); 186 187 /* 188 * Do direct function call to the transport layer below; this would 189 * also allow the transport to utilize read-side synchronous stream 190 * interface if necessary. This is a /etc/system tunable that must 191 * not be modified on a running system. By default this is enabled 192 * for performance reasons and may be disabled for debugging purposes. 193 */ 194 boolean_t socktpi_direct = B_TRUE; 195 196 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 197 198 extern void sigintr(k_sigset_t *, int); 199 extern void sigunintr(k_sigset_t *); 200 201 static int sotpi_unbind(struct sonode *, int); 202 203 /* TPI sockfs sonode operations */ 204 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 205 int); 206 static int sotpi_accept(struct sonode *, int, struct cred *, 207 struct sonode **); 208 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 209 int, struct cred *); 210 static int sotpi_listen(struct sonode *, int, struct cred *); 211 static int sotpi_connect(struct sonode *, struct sockaddr *, 212 socklen_t, int, int, struct cred *); 213 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 214 struct uio *, struct cred *); 215 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 216 struct uio *, struct cred *); 217 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 218 struct cred *, mblk_t **); 219 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 220 struct uio *, void *, t_uscalar_t, int); 221 static int sodgram_direct(struct sonode *, struct sockaddr *, 222 socklen_t, struct uio *, int); 223 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 224 socklen_t *, boolean_t, struct cred *); 225 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 226 socklen_t *, struct cred *); 227 static int sotpi_shutdown(struct sonode *, int, struct cred *); 228 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 229 socklen_t *, int, struct cred *); 230 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 231 socklen_t, struct cred *); 232 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 233 int32_t *); 234 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 235 struct cred *, int32_t *); 236 static int sotpi_poll(struct sonode *, short, int, short *, 237 struct pollhead **); 238 static int sotpi_close(struct sonode *, int, struct cred *); 239 240 static int i_sotpi_info_constructor(sotpi_info_t *); 241 static void i_sotpi_info_destructor(sotpi_info_t *); 242 243 sonodeops_t sotpi_sonodeops = { 244 sotpi_init, /* sop_init */ 245 sotpi_accept, /* sop_accept */ 246 sotpi_bind, /* sop_bind */ 247 sotpi_listen, /* sop_listen */ 248 sotpi_connect, /* sop_connect */ 249 sotpi_recvmsg, /* sop_recvmsg */ 250 sotpi_sendmsg, /* sop_sendmsg */ 251 sotpi_sendmblk, /* sop_sendmblk */ 252 sotpi_getpeername, /* sop_getpeername */ 253 sotpi_getsockname, /* sop_getsockname */ 254 sotpi_shutdown, /* sop_shutdown */ 255 sotpi_getsockopt, /* sop_getsockopt */ 256 sotpi_setsockopt, /* sop_setsockopt */ 257 sotpi_ioctl, /* sop_ioctl */ 258 sotpi_poll, /* sop_poll */ 259 sotpi_close, /* sop_close */ 260 }; 261 262 /* 263 * Return a TPI socket vnode. 264 * 265 * Note that sockets assume that the driver will clone (either itself 266 * or by using the clone driver) i.e. a socket() call will always 267 * result in a new vnode being created. 268 */ 269 270 /* 271 * Common create code for socket and accept. If tso is set the values 272 * from that node is used instead of issuing a T_INFO_REQ. 273 */ 274 275 /* ARGSUSED */ 276 static struct sonode * 277 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 278 int version, int sflags, int *errorp, cred_t *cr) 279 { 280 struct sonode *so; 281 kmem_cache_t *cp; 282 283 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 284 285 if (family == AF_NCA) { 286 /* 287 * The request is for an NCA socket so for NL7C use the 288 * INET domain instead and mark NL7C_AF_NCA below. 289 */ 290 family = AF_INET; 291 /* 292 * NL7C is not supported in the non-global zone, 293 * we enforce this restriction here. 294 */ 295 if (getzoneid() != GLOBAL_ZONEID) { 296 *errorp = ENOTSUP; 297 return (NULL); 298 } 299 } 300 301 /* 302 * to be compatible with old tpi socket implementation ignore 303 * sleep flag (sflags) passed in 304 */ 305 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 306 so = kmem_cache_alloc(cp, KM_SLEEP); 307 if (so == NULL) { 308 *errorp = ENOMEM; 309 return (NULL); 310 } 311 312 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 313 sotpi_info_init(so); 314 315 if (version == SOV_DEFAULT) 316 version = so_default_version; 317 318 so->so_version = (short)version; 319 *errorp = 0; 320 321 return (so); 322 } 323 324 static void 325 sotpi_destroy(struct sonode *so) 326 { 327 kmem_cache_t *cp; 328 struct sockparams *origsp; 329 330 /* 331 * If there is a new dealloc function (ie. smod_destroy_func), 332 * then it should check the correctness of the ops. 333 */ 334 335 ASSERT(so->so_ops == &sotpi_sonodeops); 336 337 origsp = SOTOTPI(so)->sti_orig_sp; 338 339 sotpi_info_fini(so); 340 341 if (so->so_state & SS_FALLBACK_COMP) { 342 /* 343 * A fallback happend, which means that a sotpi_info_t struct 344 * was allocated (as opposed to being allocated from the TPI 345 * sonode cache. Therefore we explicitly free the struct 346 * here. 347 */ 348 sotpi_info_destroy(so); 349 ASSERT(origsp != NULL); 350 351 origsp->sp_smod_info->smod_sock_destroy_func(so); 352 SOCKPARAMS_DEC_REF(origsp); 353 } else { 354 sonode_fini(so); 355 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 356 socktpi_cache; 357 kmem_cache_free(cp, so); 358 } 359 } 360 361 /* ARGSUSED1 */ 362 int 363 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 364 { 365 major_t maj; 366 dev_t newdev; 367 struct vnode *vp; 368 int error = 0; 369 struct stdata *stp; 370 371 sotpi_info_t *sti = SOTOTPI(so); 372 373 dprint(1, ("sotpi_init()\n")); 374 375 /* 376 * over write the sleep flag passed in but that is ok 377 * as tpi socket does not honor sleep flag. 378 */ 379 flags |= FREAD|FWRITE; 380 381 /* 382 * Record in so_flag that it is a clone. 383 */ 384 if (getmajor(sti->sti_dev) == clone_major) 385 so->so_flag |= SOCLONE; 386 387 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 388 (so->so_family == AF_INET || so->so_family == AF_INET6) && 389 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 390 so->so_protocol == IPPROTO_IP)) { 391 /* Tell tcp or udp that it's talking to sockets */ 392 flags |= SO_SOCKSTR; 393 394 /* 395 * Here we indicate to socktpi_open() our attempt to 396 * make direct calls between sockfs and transport. 397 * The final decision is left to socktpi_open(). 398 */ 399 sti->sti_direct = 1; 400 401 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 402 if (so->so_type == SOCK_STREAM && tso != NULL) { 403 if (SOTOTPI(tso)->sti_direct) { 404 /* 405 * Inherit sti_direct from listener and pass 406 * SO_ACCEPTOR open flag to tcp, indicating 407 * that this is an accept fast-path instance. 408 */ 409 flags |= SO_ACCEPTOR; 410 } else { 411 /* 412 * sti_direct is not set on listener, meaning 413 * that the listener has been converted from 414 * a socket to a stream. Ensure that the 415 * acceptor inherits these settings. 416 */ 417 sti->sti_direct = 0; 418 flags &= ~SO_SOCKSTR; 419 } 420 } 421 } 422 423 /* 424 * Tell local transport that it is talking to sockets. 425 */ 426 if (so->so_family == AF_UNIX) { 427 flags |= SO_SOCKSTR; 428 } 429 430 vp = SOTOV(so); 431 newdev = vp->v_rdev; 432 maj = getmajor(newdev); 433 ASSERT(STREAMSTAB(maj)); 434 435 error = stropen(vp, &newdev, flags, cr); 436 437 stp = vp->v_stream; 438 if (error == 0) { 439 if (so->so_flag & SOCLONE) 440 ASSERT(newdev != vp->v_rdev); 441 mutex_enter(&so->so_lock); 442 sti->sti_dev = newdev; 443 vp->v_rdev = newdev; 444 mutex_exit(&so->so_lock); 445 446 if (stp->sd_flag & STRISTTY) { 447 /* 448 * this is a post SVR4 tty driver - a socket can not 449 * be a controlling terminal. Fail the open. 450 */ 451 (void) sotpi_close(so, flags, cr); 452 return (ENOTTY); /* XXX */ 453 } 454 455 ASSERT(stp->sd_wrq != NULL); 456 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 457 458 /* 459 * If caller is interested in doing direct function call 460 * interface to/from transport module, probe the module 461 * directly beneath the streamhead to see if it qualifies. 462 * 463 * We turn off the direct interface when qualifications fail. 464 * In the acceptor case, we simply turn off the sti_direct 465 * flag on the socket. We do the fallback after the accept 466 * has completed, before the new socket is returned to the 467 * application. 468 */ 469 if (sti->sti_direct) { 470 queue_t *tq = stp->sd_wrq->q_next; 471 472 /* 473 * sti_direct is currently supported and tested 474 * only for tcp/udp; this is the main reason to 475 * have the following assertions. 476 */ 477 ASSERT(so->so_family == AF_INET || 478 so->so_family == AF_INET6); 479 ASSERT(so->so_protocol == IPPROTO_UDP || 480 so->so_protocol == IPPROTO_TCP || 481 so->so_protocol == IPPROTO_IP); 482 ASSERT(so->so_type == SOCK_DGRAM || 483 so->so_type == SOCK_STREAM); 484 485 /* 486 * Abort direct call interface if the module directly 487 * underneath the stream head is not defined with the 488 * _D_DIRECT flag. This could happen in the tcp or 489 * udp case, when some other module is autopushed 490 * above it, or for some reasons the expected module 491 * isn't purely D_MP (which is the main requirement). 492 */ 493 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 494 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 495 int rval; 496 497 /* Continue on without direct calls */ 498 sti->sti_direct = 0; 499 500 /* 501 * Cannot issue ioctl on fallback socket since 502 * there is no conn associated with the queue. 503 * The fallback downcall will notify the proto 504 * of the change. 505 */ 506 if (!(flags & SO_ACCEPTOR) && 507 !(flags & SO_FALLBACK)) { 508 if ((error = strioctl(vp, 509 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 510 cr, &rval)) != 0) { 511 (void) sotpi_close(so, flags, 512 cr); 513 return (error); 514 } 515 } 516 } 517 } 518 519 if (flags & SO_FALLBACK) { 520 /* 521 * The stream created does not have a conn. 522 * do stream set up after conn has been assigned 523 */ 524 return (error); 525 } 526 error = so_strinit(so, tso); 527 if (error != 0) { 528 (void) sotpi_close(so, flags, cr); 529 return (error); 530 } 531 532 /* Enable sendfile() on AF_UNIX streams */ 533 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) { 534 mutex_enter(&so->so_lock); 535 so->so_mode |= SM_SENDFILESUPP; 536 mutex_exit(&so->so_lock); 537 } 538 539 /* Wildcard */ 540 if (so->so_protocol != so->so_sockparams->sp_protocol) { 541 int protocol = so->so_protocol; 542 /* 543 * Issue SO_PROTOTYPE setsockopt. 544 */ 545 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 546 &protocol, (t_uscalar_t)sizeof (protocol), cr); 547 if (error != 0) { 548 (void) sotpi_close(so, flags, cr); 549 /* 550 * Setsockopt often fails with ENOPROTOOPT but 551 * socket() should fail with 552 * EPROTONOSUPPORT/EPROTOTYPE. 553 */ 554 return (EPROTONOSUPPORT); 555 } 556 } 557 558 } else { 559 /* 560 * While the same socket can not be reopened (unlike specfs) 561 * the stream head sets STREOPENFAIL when the autopush fails. 562 */ 563 if ((stp != NULL) && 564 (stp->sd_flag & STREOPENFAIL)) { 565 /* 566 * Open failed part way through. 567 */ 568 mutex_enter(&stp->sd_lock); 569 stp->sd_flag &= ~STREOPENFAIL; 570 mutex_exit(&stp->sd_lock); 571 (void) sotpi_close(so, flags, cr); 572 return (error); 573 /*NOTREACHED*/ 574 } 575 ASSERT(stp == NULL); 576 } 577 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 578 "sockfs open:maj %d vp %p so %p error %d", 579 maj, vp, so, error); 580 return (error); 581 } 582 583 /* 584 * Bind the socket to an unspecified address in sockfs only. 585 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 586 * required in all cases. 587 */ 588 static void 589 so_automatic_bind(struct sonode *so) 590 { 591 sotpi_info_t *sti = SOTOTPI(so); 592 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 593 594 ASSERT(MUTEX_HELD(&so->so_lock)); 595 ASSERT(!(so->so_state & SS_ISBOUND)); 596 ASSERT(sti->sti_unbind_mp); 597 598 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 599 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 600 sti->sti_laddr_sa->sa_family = so->so_family; 601 so->so_state |= SS_ISBOUND; 602 } 603 604 605 /* 606 * bind the socket. 607 * 608 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 609 * are passed in we allow rebinding. Note that for backwards compatibility 610 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 611 * Thus the rebinding code is currently not executed. 612 * 613 * The constraints for rebinding are: 614 * - it is a SOCK_DGRAM, or 615 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 616 * and no listen() has been done. 617 * This rebinding code was added based on some language in the XNET book 618 * about not returning EINVAL it the protocol allows rebinding. However, 619 * this language is not present in the Posix socket draft. Thus maybe the 620 * rebinding logic should be deleted from the source. 621 * 622 * A null "name" can be used to unbind the socket if: 623 * - it is a SOCK_DGRAM, or 624 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 625 * and no listen() has been done. 626 */ 627 /* ARGSUSED */ 628 static int 629 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 630 socklen_t namelen, int backlog, int flags, struct cred *cr) 631 { 632 struct T_bind_req bind_req; 633 struct T_bind_ack *bind_ack; 634 int error = 0; 635 mblk_t *mp; 636 void *addr; 637 t_uscalar_t addrlen; 638 int unbind_on_err = 1; 639 boolean_t clear_acceptconn_on_err = B_FALSE; 640 boolean_t restore_backlog_on_err = B_FALSE; 641 int save_so_backlog = 0; 642 t_scalar_t PRIM_type = O_T_BIND_REQ; 643 boolean_t tcp_udp_xport; 644 sotpi_info_t *sti = SOTOTPI(so); 645 646 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 647 (void *)so, (void *)name, namelen, backlog, flags, 648 pr_state(so->so_state, so->so_mode))); 649 650 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 651 652 if (!(flags & _SOBIND_LOCK_HELD)) { 653 mutex_enter(&so->so_lock); 654 so_lock_single(so); /* Set SOLOCKED */ 655 } else { 656 ASSERT(MUTEX_HELD(&so->so_lock)); 657 ASSERT(so->so_flag & SOLOCKED); 658 } 659 660 /* 661 * Make sure that there is a preallocated unbind_req message 662 * before binding. This message allocated when the socket is 663 * created but it might be have been consumed. 664 */ 665 if (sti->sti_unbind_mp == NULL) { 666 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 667 /* NOTE: holding so_lock while sleeping */ 668 sti->sti_unbind_mp = 669 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 670 cr); 671 } 672 673 if (flags & _SOBIND_REBIND) { 674 /* 675 * Called from solisten after doing an sotpi_unbind() or 676 * potentially without the unbind (latter for AF_INET{,6}). 677 */ 678 ASSERT(name == NULL && namelen == 0); 679 680 if (so->so_family == AF_UNIX) { 681 ASSERT(sti->sti_ux_bound_vp); 682 addr = &sti->sti_ux_laddr; 683 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 684 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 685 "addr 0x%p, vp %p\n", 686 addrlen, 687 (void *)((struct so_ux_addr *)addr)->soua_vp, 688 (void *)sti->sti_ux_bound_vp)); 689 } else { 690 addr = sti->sti_laddr_sa; 691 addrlen = (t_uscalar_t)sti->sti_laddr_len; 692 } 693 } else if (flags & _SOBIND_UNSPEC) { 694 ASSERT(name == NULL && namelen == 0); 695 696 /* 697 * The caller checked SS_ISBOUND but not necessarily 698 * under so_lock 699 */ 700 if (so->so_state & SS_ISBOUND) { 701 /* No error */ 702 goto done; 703 } 704 705 /* Set an initial local address */ 706 switch (so->so_family) { 707 case AF_UNIX: 708 /* 709 * Use an address with same size as struct sockaddr 710 * just like BSD. 711 */ 712 sti->sti_laddr_len = 713 (socklen_t)sizeof (struct sockaddr); 714 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 715 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 716 sti->sti_laddr_sa->sa_family = so->so_family; 717 718 /* 719 * Pass down an address with the implicit bind 720 * magic number and the rest all zeros. 721 * The transport will return a unique address. 722 */ 723 sti->sti_ux_laddr.soua_vp = NULL; 724 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 725 addr = &sti->sti_ux_laddr; 726 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 727 break; 728 729 case AF_INET: 730 case AF_INET6: 731 /* 732 * An unspecified bind in TPI has a NULL address. 733 * Set the address in sockfs to have the sa_family. 734 */ 735 sti->sti_laddr_len = (so->so_family == AF_INET) ? 736 (socklen_t)sizeof (sin_t) : 737 (socklen_t)sizeof (sin6_t); 738 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 739 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 740 sti->sti_laddr_sa->sa_family = so->so_family; 741 addr = NULL; 742 addrlen = 0; 743 break; 744 745 default: 746 /* 747 * An unspecified bind in TPI has a NULL address. 748 * Set the address in sockfs to be zero length. 749 * 750 * Can not assume there is a sa_family for all 751 * protocol families. For example, AF_X25 does not 752 * have a family field. 753 */ 754 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 755 sti->sti_laddr_len = 0; /* XXX correct? */ 756 addr = NULL; 757 addrlen = 0; 758 break; 759 } 760 761 } else { 762 if (so->so_state & SS_ISBOUND) { 763 /* 764 * If it is ok to rebind the socket, first unbind 765 * with the transport. A rebind to the NULL address 766 * is interpreted as an unbind. 767 * Note that a bind to NULL in BSD does unbind the 768 * socket but it fails with EINVAL. 769 * Note that regular sockets set SOV_SOCKBSD i.e. 770 * _SOBIND_SOCKBSD gets set here hence no type of 771 * socket does currently allow rebinding. 772 * 773 * If the name is NULL just do an unbind. 774 */ 775 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 776 name != NULL) { 777 error = EINVAL; 778 unbind_on_err = 0; 779 eprintsoline(so, error); 780 goto done; 781 } 782 if ((so->so_mode & SM_CONNREQUIRED) && 783 (so->so_state & SS_CANTREBIND)) { 784 error = EINVAL; 785 unbind_on_err = 0; 786 eprintsoline(so, error); 787 goto done; 788 } 789 error = sotpi_unbind(so, 0); 790 if (error) { 791 eprintsoline(so, error); 792 goto done; 793 } 794 ASSERT(!(so->so_state & SS_ISBOUND)); 795 if (name == NULL) { 796 so->so_state &= 797 ~(SS_ISCONNECTED|SS_ISCONNECTING); 798 goto done; 799 } 800 } 801 802 /* X/Open requires this check */ 803 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 804 if (xnet_check_print) { 805 printf("sockfs: X/Open bind state check " 806 "caused EINVAL\n"); 807 } 808 error = EINVAL; 809 goto done; 810 } 811 812 switch (so->so_family) { 813 case AF_UNIX: 814 /* 815 * All AF_UNIX addresses are nul terminated 816 * when copied (copyin_name) in so the minimum 817 * length is 3 bytes. 818 */ 819 if (name == NULL || 820 (ssize_t)namelen <= sizeof (short) + 1) { 821 error = EISDIR; 822 eprintsoline(so, error); 823 goto done; 824 } 825 /* 826 * Verify so_family matches the bound family. 827 * BSD does not check this for AF_UNIX resulting 828 * in funny mknods. 829 */ 830 if (name->sa_family != so->so_family) { 831 error = EAFNOSUPPORT; 832 goto done; 833 } 834 break; 835 case AF_INET: 836 if (name == NULL) { 837 error = EINVAL; 838 eprintsoline(so, error); 839 goto done; 840 } 841 if ((size_t)namelen != sizeof (sin_t)) { 842 error = name->sa_family != so->so_family ? 843 EAFNOSUPPORT : EINVAL; 844 eprintsoline(so, error); 845 goto done; 846 } 847 if ((flags & _SOBIND_XPG4_2) && 848 (name->sa_family != so->so_family)) { 849 /* 850 * This check has to be made for X/Open 851 * sockets however application failures have 852 * been observed when it is applied to 853 * all sockets. 854 */ 855 error = EAFNOSUPPORT; 856 eprintsoline(so, error); 857 goto done; 858 } 859 /* 860 * Force a zero sa_family to match so_family. 861 * 862 * Some programs like inetd(8) don't set the 863 * family field. Other programs leave 864 * sin_family set to garbage - SunOS 4.X does 865 * not check the family field on a bind. 866 * We use the family field that 867 * was passed in to the socket() call. 868 */ 869 name->sa_family = so->so_family; 870 break; 871 872 case AF_INET6: { 873 #ifdef DEBUG 874 sin6_t *sin6 = (sin6_t *)name; 875 #endif /* DEBUG */ 876 877 if (name == NULL) { 878 error = EINVAL; 879 eprintsoline(so, error); 880 goto done; 881 } 882 if ((size_t)namelen != sizeof (sin6_t)) { 883 error = name->sa_family != so->so_family ? 884 EAFNOSUPPORT : EINVAL; 885 eprintsoline(so, error); 886 goto done; 887 } 888 if (name->sa_family != so->so_family) { 889 /* 890 * With IPv6 we require the family to match 891 * unlike in IPv4. 892 */ 893 error = EAFNOSUPPORT; 894 eprintsoline(so, error); 895 goto done; 896 } 897 #ifdef DEBUG 898 /* 899 * Verify that apps don't forget to clear 900 * sin6_scope_id etc 901 */ 902 if (sin6->sin6_scope_id != 0 && 903 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 904 zcmn_err(getzoneid(), CE_WARN, 905 "bind with uninitialized sin6_scope_id " 906 "(%d) on socket. Pid = %d\n", 907 (int)sin6->sin6_scope_id, 908 (int)curproc->p_pid); 909 } 910 if (sin6->__sin6_src_id != 0) { 911 zcmn_err(getzoneid(), CE_WARN, 912 "bind with uninitialized __sin6_src_id " 913 "(%d) on socket. Pid = %d\n", 914 (int)sin6->__sin6_src_id, 915 (int)curproc->p_pid); 916 } 917 #endif /* DEBUG */ 918 break; 919 } 920 default: 921 /* 922 * Don't do any length or sa_family check to allow 923 * non-sockaddr style addresses. 924 */ 925 if (name == NULL) { 926 error = EINVAL; 927 eprintsoline(so, error); 928 goto done; 929 } 930 break; 931 } 932 933 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 934 error = ENAMETOOLONG; 935 eprintsoline(so, error); 936 goto done; 937 } 938 /* 939 * Save local address. 940 */ 941 sti->sti_laddr_len = (socklen_t)namelen; 942 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 943 bcopy(name, sti->sti_laddr_sa, namelen); 944 945 addr = sti->sti_laddr_sa; 946 addrlen = (t_uscalar_t)sti->sti_laddr_len; 947 switch (so->so_family) { 948 case AF_INET6: 949 case AF_INET: 950 break; 951 case AF_UNIX: { 952 struct sockaddr_un *soun = 953 (struct sockaddr_un *)sti->sti_laddr_sa; 954 struct vnode *vp, *rvp; 955 struct vattr vattr; 956 957 ASSERT(sti->sti_ux_bound_vp == NULL); 958 /* 959 * Create vnode for the specified path name. 960 * Keep vnode held with a reference in sti_ux_bound_vp. 961 * Use the vnode pointer as the address used in the 962 * bind with the transport. 963 * 964 * Use the same mode as in BSD. In particular this does 965 * not observe the umask. 966 */ 967 /* MAXPATHLEN + soun_family + nul termination */ 968 if (sti->sti_laddr_len > 969 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 970 error = ENAMETOOLONG; 971 eprintsoline(so, error); 972 goto done; 973 } 974 vattr.va_type = VSOCK; 975 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 976 vattr.va_mask = AT_TYPE|AT_MODE; 977 /* NOTE: holding so_lock */ 978 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 979 EXCL, 0, &vp, CRMKNOD, 0, 0); 980 if (error) { 981 if (error == EEXIST) 982 error = EADDRINUSE; 983 eprintsoline(so, error); 984 goto done; 985 } 986 /* 987 * Establish pointer from the underlying filesystem 988 * vnode to the socket node. 989 * sti_ux_bound_vp and v_stream->sd_vnode form the 990 * cross-linkage between the underlying filesystem 991 * node and the socket node. 992 */ 993 994 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 995 VN_HOLD(rvp); 996 VN_RELE(vp); 997 vp = rvp; 998 } 999 1000 ASSERT(SOTOV(so)->v_stream); 1001 mutex_enter(&vp->v_lock); 1002 vp->v_stream = SOTOV(so)->v_stream; 1003 sti->sti_ux_bound_vp = vp; 1004 mutex_exit(&vp->v_lock); 1005 1006 /* 1007 * Use the vnode pointer value as a unique address 1008 * (together with the magic number to avoid conflicts 1009 * with implicit binds) in the transport provider. 1010 */ 1011 sti->sti_ux_laddr.soua_vp = 1012 (void *)sti->sti_ux_bound_vp; 1013 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1014 addr = &sti->sti_ux_laddr; 1015 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1016 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1017 addrlen, 1018 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1019 break; 1020 } 1021 } /* end switch (so->so_family) */ 1022 } 1023 1024 /* 1025 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1026 * the transport can start passing up T_CONN_IND messages 1027 * as soon as it receives the bind req and strsock_proto() 1028 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1029 */ 1030 if (flags & _SOBIND_LISTEN) { 1031 if ((so->so_state & SS_ACCEPTCONN) == 0) 1032 clear_acceptconn_on_err = B_TRUE; 1033 save_so_backlog = so->so_backlog; 1034 restore_backlog_on_err = B_TRUE; 1035 so->so_state |= SS_ACCEPTCONN; 1036 so->so_backlog = backlog; 1037 } 1038 1039 /* 1040 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1041 * for other transports we will send in a O_T_BIND_REQ. 1042 */ 1043 if (tcp_udp_xport && 1044 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1045 PRIM_type = T_BIND_REQ; 1046 1047 bind_req.PRIM_type = PRIM_type; 1048 bind_req.ADDR_length = addrlen; 1049 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1050 bind_req.CONIND_number = backlog; 1051 /* NOTE: holding so_lock while sleeping */ 1052 mp = soallocproto2(&bind_req, sizeof (bind_req), 1053 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1054 sti->sti_laddr_valid = 0; 1055 1056 /* Done using sti_laddr_sa - can drop the lock */ 1057 mutex_exit(&so->so_lock); 1058 1059 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1060 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1061 if (error) { 1062 eprintsoline(so, error); 1063 mutex_enter(&so->so_lock); 1064 goto done; 1065 } 1066 1067 mutex_enter(&so->so_lock); 1068 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1069 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1070 if (error) { 1071 eprintsoline(so, error); 1072 goto done; 1073 } 1074 ASSERT(mp); 1075 /* 1076 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1077 * strsock_proto while the lock was dropped above, the bind 1078 * is allowed to complete. 1079 */ 1080 1081 /* Mark as bound. This will be undone if we detect errors below. */ 1082 if (flags & _SOBIND_NOXLATE) { 1083 ASSERT(so->so_family == AF_UNIX); 1084 sti->sti_faddr_noxlate = 1; 1085 } 1086 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1087 so->so_state |= SS_ISBOUND; 1088 ASSERT(sti->sti_unbind_mp); 1089 1090 /* note that we've already set SS_ACCEPTCONN above */ 1091 1092 /* 1093 * Recompute addrlen - an unspecied bind sent down an 1094 * address of length zero but we expect the appropriate length 1095 * in return. 1096 */ 1097 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1098 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1099 1100 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1101 /* 1102 * The alignment restriction is really too strict but 1103 * we want enough alignment to inspect the fields of 1104 * a sockaddr_in. 1105 */ 1106 addr = sogetoff(mp, bind_ack->ADDR_offset, 1107 bind_ack->ADDR_length, 1108 __TPI_ALIGN_SIZE); 1109 if (addr == NULL) { 1110 freemsg(mp); 1111 error = EPROTO; 1112 eprintsoline(so, error); 1113 goto done; 1114 } 1115 if (!(flags & _SOBIND_UNSPEC)) { 1116 /* 1117 * Verify that the transport didn't return something we 1118 * did not want e.g. an address other than what we asked for. 1119 * 1120 * NOTE: These checks would go away if/when we switch to 1121 * using the new TPI (in which the transport would fail 1122 * the request instead of assigning a different address). 1123 * 1124 * NOTE2: For protocols that we don't know (i.e. any 1125 * other than AF_INET6, AF_INET and AF_UNIX), we 1126 * cannot know if the transport should be expected to 1127 * return the same address as that requested. 1128 * 1129 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1130 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1131 * 1132 * For example, in the case of netatalk it may be 1133 * inappropriate for the transport to return the 1134 * requested address (as it may have allocated a local 1135 * port number in behaviour similar to that of an 1136 * AF_INET bind request with a port number of zero). 1137 * 1138 * Given the definition of O_T_BIND_REQ, where the 1139 * transport may bind to an address other than the 1140 * requested address, it's not possible to determine 1141 * whether a returned address that differs from the 1142 * requested address is a reason to fail (because the 1143 * requested address was not available) or succeed 1144 * (because the transport allocated an appropriate 1145 * address and/or port). 1146 * 1147 * sockfs currently requires that the transport return 1148 * the requested address in the T_BIND_ACK, unless 1149 * there is code here to allow for any discrepancy. 1150 * Such code exists for AF_INET and AF_INET6. 1151 * 1152 * Netatalk chooses to return the requested address 1153 * rather than the (correct) allocated address. This 1154 * means that netatalk violates the TPI specification 1155 * (and would not function correctly if used from a 1156 * TLI application), but it does mean that it works 1157 * with sockfs. 1158 * 1159 * As noted above, using the newer XTI bind primitive 1160 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1161 * allow sockfs to be more sure about whether or not 1162 * the bind request had succeeded (as transports are 1163 * not permitted to bind to a different address than 1164 * that requested - they must return failure). 1165 * Unfortunately, support for T_BIND_REQ may not be 1166 * present in all transport implementations (netatalk, 1167 * for example, doesn't have it), making the 1168 * transition difficult. 1169 */ 1170 if (bind_ack->ADDR_length != addrlen) { 1171 /* Assumes that the requested address was in use */ 1172 freemsg(mp); 1173 error = EADDRINUSE; 1174 eprintsoline(so, error); 1175 goto done; 1176 } 1177 1178 switch (so->so_family) { 1179 case AF_INET6: 1180 case AF_INET: { 1181 sin_t *rname, *aname; 1182 1183 rname = (sin_t *)addr; 1184 aname = (sin_t *)sti->sti_laddr_sa; 1185 1186 /* 1187 * Take advantage of the alignment 1188 * of sin_port and sin6_port which fall 1189 * in the same place in their data structures. 1190 * Just use sin_port for either address family. 1191 * 1192 * This may become a problem if (heaven forbid) 1193 * there's a separate ipv6port_reserved... :-P 1194 * 1195 * Binding to port 0 has the semantics of letting 1196 * the transport bind to any port. 1197 * 1198 * If the transport is TCP or UDP since we had sent 1199 * a T_BIND_REQ we would not get a port other than 1200 * what we asked for. 1201 */ 1202 if (tcp_udp_xport) { 1203 /* 1204 * Pick up the new port number if we bound to 1205 * port 0. 1206 */ 1207 if (aname->sin_port == 0) 1208 aname->sin_port = rname->sin_port; 1209 sti->sti_laddr_valid = 1; 1210 break; 1211 } 1212 if (aname->sin_port != 0 && 1213 aname->sin_port != rname->sin_port) { 1214 freemsg(mp); 1215 error = EADDRINUSE; 1216 eprintsoline(so, error); 1217 goto done; 1218 } 1219 /* 1220 * Pick up the new port number if we bound to port 0. 1221 */ 1222 aname->sin_port = rname->sin_port; 1223 1224 /* 1225 * Unfortunately, addresses aren't _quite_ the same. 1226 */ 1227 if (so->so_family == AF_INET) { 1228 if (aname->sin_addr.s_addr != 1229 rname->sin_addr.s_addr) { 1230 freemsg(mp); 1231 error = EADDRNOTAVAIL; 1232 eprintsoline(so, error); 1233 goto done; 1234 } 1235 } else { 1236 sin6_t *rname6 = (sin6_t *)rname; 1237 sin6_t *aname6 = (sin6_t *)aname; 1238 1239 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1240 &rname6->sin6_addr)) { 1241 freemsg(mp); 1242 error = EADDRNOTAVAIL; 1243 eprintsoline(so, error); 1244 goto done; 1245 } 1246 } 1247 break; 1248 } 1249 case AF_UNIX: 1250 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1251 freemsg(mp); 1252 error = EADDRINUSE; 1253 eprintsoline(so, error); 1254 eprintso(so, 1255 ("addrlen %d, addr 0x%x, vp %p\n", 1256 addrlen, *((int *)addr), 1257 (void *)sti->sti_ux_bound_vp)); 1258 goto done; 1259 } 1260 sti->sti_laddr_valid = 1; 1261 break; 1262 default: 1263 /* 1264 * NOTE: This assumes that addresses can be 1265 * byte-compared for equivalence. 1266 */ 1267 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1268 freemsg(mp); 1269 error = EADDRINUSE; 1270 eprintsoline(so, error); 1271 goto done; 1272 } 1273 /* 1274 * Don't mark sti_laddr_valid, as we cannot be 1275 * sure that the returned address is the real 1276 * bound address when talking to an unknown 1277 * transport. 1278 */ 1279 break; 1280 } 1281 } else { 1282 /* 1283 * Save for returned address for getsockname. 1284 * Needed for unspecific bind unless transport supports 1285 * the TI_GETMYNAME ioctl. 1286 * Do this for AF_INET{,6} even though they do, as 1287 * caching info here is much better performance than 1288 * a TPI/STREAMS trip to the transport for getsockname. 1289 * Any which can't for some reason _must_ _not_ set 1290 * sti_laddr_valid here for the caching version of 1291 * getsockname to not break; 1292 */ 1293 switch (so->so_family) { 1294 case AF_UNIX: 1295 /* 1296 * Record the address bound with the transport 1297 * for use by socketpair. 1298 */ 1299 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1300 sti->sti_laddr_valid = 1; 1301 break; 1302 case AF_INET: 1303 case AF_INET6: 1304 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1305 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1306 sti->sti_laddr_valid = 1; 1307 break; 1308 default: 1309 /* 1310 * Don't mark sti_laddr_valid, as we cannot be 1311 * sure that the returned address is the real 1312 * bound address when talking to an unknown 1313 * transport. 1314 */ 1315 break; 1316 } 1317 } 1318 1319 freemsg(mp); 1320 1321 done: 1322 if (error) { 1323 /* reset state & backlog to values held on entry */ 1324 if (clear_acceptconn_on_err == B_TRUE) 1325 so->so_state &= ~SS_ACCEPTCONN; 1326 if (restore_backlog_on_err == B_TRUE) 1327 so->so_backlog = save_so_backlog; 1328 1329 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1330 int err; 1331 1332 err = sotpi_unbind(so, 0); 1333 /* LINTED - statement has no consequent: if */ 1334 if (err) { 1335 eprintsoline(so, error); 1336 } else { 1337 ASSERT(!(so->so_state & SS_ISBOUND)); 1338 } 1339 } 1340 } 1341 if (!(flags & _SOBIND_LOCK_HELD)) { 1342 so_unlock_single(so, SOLOCKED); 1343 mutex_exit(&so->so_lock); 1344 } else { 1345 ASSERT(MUTEX_HELD(&so->so_lock)); 1346 ASSERT(so->so_flag & SOLOCKED); 1347 } 1348 return (error); 1349 } 1350 1351 /* bind the socket */ 1352 static int 1353 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1354 int flags, struct cred *cr) 1355 { 1356 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1357 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1358 1359 flags &= ~_SOBIND_SOCKETPAIR; 1360 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1361 } 1362 1363 /* 1364 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1365 * address, or when listen needs to unbind and bind. 1366 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1367 * so that a sobind can pick them up. 1368 */ 1369 static int 1370 sotpi_unbind(struct sonode *so, int flags) 1371 { 1372 struct T_unbind_req unbind_req; 1373 int error = 0; 1374 mblk_t *mp; 1375 sotpi_info_t *sti = SOTOTPI(so); 1376 1377 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1378 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1379 1380 ASSERT(MUTEX_HELD(&so->so_lock)); 1381 ASSERT(so->so_flag & SOLOCKED); 1382 1383 if (!(so->so_state & SS_ISBOUND)) { 1384 error = EINVAL; 1385 eprintsoline(so, error); 1386 goto done; 1387 } 1388 1389 mutex_exit(&so->so_lock); 1390 1391 /* 1392 * Flush the read and write side (except stream head read queue) 1393 * and send down T_UNBIND_REQ. 1394 */ 1395 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1396 1397 unbind_req.PRIM_type = T_UNBIND_REQ; 1398 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1399 0, _ALLOC_SLEEP, CRED()); 1400 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1401 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1402 mutex_enter(&so->so_lock); 1403 if (error) { 1404 eprintsoline(so, error); 1405 goto done; 1406 } 1407 1408 error = sowaitokack(so, T_UNBIND_REQ); 1409 if (error) { 1410 eprintsoline(so, error); 1411 goto done; 1412 } 1413 1414 /* 1415 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1416 * strsock_proto while the lock was dropped above, the unbind 1417 * is allowed to complete. 1418 */ 1419 if (!(flags & _SOUNBIND_REBIND)) { 1420 /* 1421 * Clear out bound address. 1422 */ 1423 vnode_t *vp; 1424 1425 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1426 sti->sti_ux_bound_vp = NULL; 1427 vn_rele_stream(vp); 1428 } 1429 /* Clear out address */ 1430 sti->sti_laddr_len = 0; 1431 } 1432 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1433 sti->sti_laddr_valid = 0; 1434 1435 done: 1436 1437 /* If the caller held the lock don't release it here */ 1438 ASSERT(MUTEX_HELD(&so->so_lock)); 1439 ASSERT(so->so_flag & SOLOCKED); 1440 1441 return (error); 1442 } 1443 1444 /* 1445 * listen on the socket. 1446 * For TPI conforming transports this has to first unbind with the transport 1447 * and then bind again using the new backlog. 1448 */ 1449 /* ARGSUSED */ 1450 int 1451 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1452 { 1453 int error = 0; 1454 sotpi_info_t *sti = SOTOTPI(so); 1455 1456 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1457 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1458 1459 if (sti->sti_serv_type == T_CLTS) 1460 return (EOPNOTSUPP); 1461 1462 /* 1463 * If the socket is ready to accept connections already, then 1464 * return without doing anything. This avoids a problem where 1465 * a second listen() call fails if a connection is pending and 1466 * leaves the socket unbound. Only when we are not unbinding 1467 * with the transport can we safely increase the backlog. 1468 */ 1469 if (so->so_state & SS_ACCEPTCONN && 1470 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1471 /*CONSTCOND*/ 1472 !solisten_tpi_tcp)) 1473 return (0); 1474 1475 if (so->so_state & SS_ISCONNECTED) 1476 return (EINVAL); 1477 1478 mutex_enter(&so->so_lock); 1479 so_lock_single(so); /* Set SOLOCKED */ 1480 1481 /* 1482 * If the listen doesn't change the backlog we do nothing. 1483 * This avoids an EPROTO error from the transport. 1484 */ 1485 if ((so->so_state & SS_ACCEPTCONN) && 1486 so->so_backlog == backlog) 1487 goto done; 1488 1489 if (!(so->so_state & SS_ISBOUND)) { 1490 /* 1491 * Must have been explicitly bound in the UNIX domain. 1492 */ 1493 if (so->so_family == AF_UNIX) { 1494 error = EINVAL; 1495 goto done; 1496 } 1497 error = sotpi_bindlisten(so, NULL, 0, backlog, 1498 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1499 } else if (backlog > 0) { 1500 /* 1501 * AF_INET{,6} hack to avoid losing the port. 1502 * Assumes that all AF_INET{,6} transports can handle a 1503 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1504 * has already bound thus it is possible to avoid the unbind. 1505 */ 1506 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1507 /*CONSTCOND*/ 1508 !solisten_tpi_tcp)) { 1509 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1510 if (error) 1511 goto done; 1512 } 1513 error = sotpi_bindlisten(so, NULL, 0, backlog, 1514 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1515 } else { 1516 so->so_state |= SS_ACCEPTCONN; 1517 so->so_backlog = backlog; 1518 } 1519 if (error) 1520 goto done; 1521 ASSERT(so->so_state & SS_ACCEPTCONN); 1522 done: 1523 so_unlock_single(so, SOLOCKED); 1524 mutex_exit(&so->so_lock); 1525 return (error); 1526 } 1527 1528 /* 1529 * Disconnect either a specified seqno or all (-1). 1530 * The former is used on listening sockets only. 1531 * 1532 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1533 * the current use of sodisconnect(seqno == -1) is only for shutdown 1534 * so there is no point (and potentially incorrect) to unbind. 1535 */ 1536 static int 1537 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1538 { 1539 struct T_discon_req discon_req; 1540 int error = 0; 1541 mblk_t *mp; 1542 1543 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1544 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1545 1546 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1547 mutex_enter(&so->so_lock); 1548 so_lock_single(so); /* Set SOLOCKED */ 1549 } else { 1550 ASSERT(MUTEX_HELD(&so->so_lock)); 1551 ASSERT(so->so_flag & SOLOCKED); 1552 } 1553 1554 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1555 error = EINVAL; 1556 eprintsoline(so, error); 1557 goto done; 1558 } 1559 1560 mutex_exit(&so->so_lock); 1561 /* 1562 * Flush the write side (unless this is a listener) 1563 * and then send down a T_DISCON_REQ. 1564 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1565 * and other messages.) 1566 */ 1567 if (!(so->so_state & SS_ACCEPTCONN)) 1568 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1569 1570 discon_req.PRIM_type = T_DISCON_REQ; 1571 discon_req.SEQ_number = seqno; 1572 mp = soallocproto1(&discon_req, sizeof (discon_req), 1573 0, _ALLOC_SLEEP, CRED()); 1574 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1575 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1576 mutex_enter(&so->so_lock); 1577 if (error) { 1578 eprintsoline(so, error); 1579 goto done; 1580 } 1581 1582 error = sowaitokack(so, T_DISCON_REQ); 1583 if (error) { 1584 eprintsoline(so, error); 1585 goto done; 1586 } 1587 /* 1588 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1589 * strsock_proto while the lock was dropped above, the disconnect 1590 * is allowed to complete. However, it is not possible to 1591 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1592 */ 1593 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1594 SOTOTPI(so)->sti_laddr_valid = 0; 1595 SOTOTPI(so)->sti_faddr_valid = 0; 1596 done: 1597 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1598 so_unlock_single(so, SOLOCKED); 1599 mutex_exit(&so->so_lock); 1600 } else { 1601 /* If the caller held the lock don't release it here */ 1602 ASSERT(MUTEX_HELD(&so->so_lock)); 1603 ASSERT(so->so_flag & SOLOCKED); 1604 } 1605 return (error); 1606 } 1607 1608 /* ARGSUSED */ 1609 int 1610 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1611 struct sonode **nsop) 1612 { 1613 struct T_conn_ind *conn_ind; 1614 struct T_conn_res *conn_res; 1615 int error = 0; 1616 mblk_t *mp, *ack_mp; 1617 struct sonode *nso; 1618 vnode_t *nvp; 1619 void *src; 1620 t_uscalar_t srclen; 1621 void *opt; 1622 t_uscalar_t optlen; 1623 t_scalar_t PRIM_type; 1624 t_scalar_t SEQ_number; 1625 size_t sinlen; 1626 sotpi_info_t *sti = SOTOTPI(so); 1627 sotpi_info_t *nsti; 1628 1629 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1630 (void *)so, fflag, (void *)nsop, 1631 pr_state(so->so_state, so->so_mode))); 1632 1633 /* 1634 * Defer single-threading the accepting socket until 1635 * the T_CONN_IND has been received and parsed and the 1636 * new sonode has been opened. 1637 */ 1638 1639 /* Check that we are not already connected */ 1640 if ((so->so_state & SS_ACCEPTCONN) == 0) 1641 goto conn_bad; 1642 1643 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1644 goto e_bad; 1645 1646 ASSERT(mp != NULL); 1647 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1648 1649 /* 1650 * Save SEQ_number for error paths. 1651 */ 1652 SEQ_number = conn_ind->SEQ_number; 1653 1654 srclen = conn_ind->SRC_length; 1655 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1656 if (src == NULL) { 1657 error = EPROTO; 1658 freemsg(mp); 1659 eprintsoline(so, error); 1660 goto disconnect_unlocked; 1661 } 1662 optlen = conn_ind->OPT_length; 1663 switch (so->so_family) { 1664 case AF_INET: 1665 case AF_INET6: 1666 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1667 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1668 &opt, conn_ind->OPT_length); 1669 } else { 1670 /* 1671 * The transport (in this case TCP) hasn't sent up 1672 * a pointer to an instance for the accept fast-path. 1673 * Disable fast-path completely because the call to 1674 * sotpi_create() below would otherwise create an 1675 * incomplete TCP instance, which would lead to 1676 * problems when sockfs sends a normal T_CONN_RES 1677 * message down the new stream. 1678 */ 1679 if (sti->sti_direct) { 1680 int rval; 1681 /* 1682 * For consistency we inform tcp to disable 1683 * direct interface on the listener, though 1684 * we can certainly live without doing this 1685 * because no data will ever travel upstream 1686 * on the listening socket. 1687 */ 1688 sti->sti_direct = 0; 1689 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1690 0, 0, K_TO_K, cr, &rval); 1691 } 1692 opt = NULL; 1693 optlen = 0; 1694 } 1695 break; 1696 case AF_UNIX: 1697 default: 1698 if (optlen != 0) { 1699 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1700 __TPI_ALIGN_SIZE); 1701 if (opt == NULL) { 1702 error = EPROTO; 1703 freemsg(mp); 1704 eprintsoline(so, error); 1705 goto disconnect_unlocked; 1706 } 1707 } 1708 if (so->so_family == AF_UNIX) { 1709 if (!sti->sti_faddr_noxlate) { 1710 src = NULL; 1711 srclen = 0; 1712 } 1713 /* Extract src address from options */ 1714 if (optlen != 0) 1715 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1716 } 1717 break; 1718 } 1719 1720 /* 1721 * Create the new socket. 1722 */ 1723 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1724 if (nso == NULL) { 1725 ASSERT(error != 0); 1726 /* 1727 * Accept can not fail with ENOBUFS. sotpi_create 1728 * sleeps waiting for memory until a signal is caught 1729 * so return EINTR. 1730 */ 1731 freemsg(mp); 1732 if (error == ENOBUFS) 1733 error = EINTR; 1734 goto e_disc_unl; 1735 } 1736 nvp = SOTOV(nso); 1737 nsti = SOTOTPI(nso); 1738 1739 #ifdef DEBUG 1740 /* 1741 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1742 * it's inherited early to allow debugging of the accept code itself. 1743 */ 1744 nso->so_options |= so->so_options & SO_DEBUG; 1745 #endif /* DEBUG */ 1746 1747 /* 1748 * Save the SRC address from the T_CONN_IND 1749 * for getpeername to work on AF_UNIX and on transports that do not 1750 * support TI_GETPEERNAME. 1751 * 1752 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1753 * copyin_name(). 1754 */ 1755 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1756 error = EINVAL; 1757 freemsg(mp); 1758 eprintsoline(so, error); 1759 goto disconnect_vp_unlocked; 1760 } 1761 nsti->sti_faddr_len = (socklen_t)srclen; 1762 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1763 bcopy(src, nsti->sti_faddr_sa, srclen); 1764 nsti->sti_faddr_valid = 1; 1765 1766 /* 1767 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1768 */ 1769 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1770 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1771 cred_t *cr; 1772 pid_t cpid; 1773 1774 cr = msg_getcred(mp, &cpid); 1775 if (cr != NULL) { 1776 crhold(cr); 1777 nso->so_peercred = cr; 1778 nso->so_cpid = cpid; 1779 } 1780 freemsg(mp); 1781 1782 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1783 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1784 if (mp == NULL) { 1785 /* 1786 * Accept can not fail with ENOBUFS. 1787 * A signal was caught so return EINTR. 1788 */ 1789 error = EINTR; 1790 eprintsoline(so, error); 1791 goto disconnect_vp_unlocked; 1792 } 1793 conn_res = (struct T_conn_res *)mp->b_rptr; 1794 } else { 1795 /* 1796 * For efficency reasons we use msg_extractcred; no crhold 1797 * needed since db_credp is cleared (i.e., we move the cred 1798 * from the message to so_peercred. 1799 */ 1800 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1801 1802 mp->b_rptr = DB_BASE(mp); 1803 conn_res = (struct T_conn_res *)mp->b_rptr; 1804 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1805 1806 mblk_setcred(mp, cr, curproc->p_pid); 1807 } 1808 1809 /* 1810 * New socket must be bound at least in sockfs and, except for AF_INET, 1811 * (or AF_INET6) it also has to be bound in the transport provider. 1812 * We set the local address in the sonode from the T_OK_ACK of the 1813 * T_CONN_RES. For this reason the address we bind to here isn't 1814 * important. 1815 */ 1816 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1817 /*CONSTCOND*/ 1818 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1819 /* 1820 * Optimization for AF_INET{,6} transports 1821 * that can handle a T_CONN_RES without being bound. 1822 */ 1823 mutex_enter(&nso->so_lock); 1824 so_automatic_bind(nso); 1825 mutex_exit(&nso->so_lock); 1826 } else { 1827 /* Perform NULL bind with the transport provider. */ 1828 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1829 cr)) != 0) { 1830 ASSERT(error != ENOBUFS); 1831 freemsg(mp); 1832 eprintsoline(nso, error); 1833 goto disconnect_vp_unlocked; 1834 } 1835 } 1836 1837 /* 1838 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1839 * so that any data arriving on the new socket will cause the 1840 * appropriate signals to be delivered for the new socket. 1841 * 1842 * No other thread (except strsock_proto and strsock_misc) 1843 * can access the new socket thus we relax the locking. 1844 */ 1845 nso->so_pgrp = so->so_pgrp; 1846 nso->so_state |= so->so_state & SS_ASYNC; 1847 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1848 1849 if (nso->so_pgrp != 0) { 1850 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1851 eprintsoline(nso, error); 1852 error = 0; 1853 nso->so_pgrp = 0; 1854 } 1855 } 1856 1857 /* 1858 * Make note of the socket level options. TCP and IP level options 1859 * are already inherited. We could do all this after accept is 1860 * successful but doing it here simplifies code and no harm done 1861 * for error case. 1862 */ 1863 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1864 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1865 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1866 nso->so_sndbuf = so->so_sndbuf; 1867 nso->so_rcvbuf = so->so_rcvbuf; 1868 if (nso->so_options & SO_LINGER) 1869 nso->so_linger = so->so_linger; 1870 1871 /* 1872 * Note that the following sti_direct code path should be 1873 * removed once we are confident that the direct sockets 1874 * do not result in any degradation. 1875 */ 1876 if (sti->sti_direct) { 1877 1878 ASSERT(opt != NULL); 1879 1880 conn_res->OPT_length = optlen; 1881 conn_res->OPT_offset = MBLKL(mp); 1882 bcopy(&opt, mp->b_wptr, optlen); 1883 mp->b_wptr += optlen; 1884 conn_res->PRIM_type = T_CONN_RES; 1885 conn_res->ACCEPTOR_id = 0; 1886 PRIM_type = T_CONN_RES; 1887 1888 /* Send down the T_CONN_RES on acceptor STREAM */ 1889 error = kstrputmsg(SOTOV(nso), mp, NULL, 1890 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1891 if (error) { 1892 mutex_enter(&so->so_lock); 1893 so_lock_single(so); 1894 eprintsoline(so, error); 1895 goto disconnect_vp; 1896 } 1897 mutex_enter(&nso->so_lock); 1898 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1899 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1900 if (error) { 1901 mutex_exit(&nso->so_lock); 1902 mutex_enter(&so->so_lock); 1903 so_lock_single(so); 1904 eprintsoline(so, error); 1905 goto disconnect_vp; 1906 } 1907 if (nso->so_family == AF_INET) { 1908 sin_t *sin; 1909 1910 sin = (sin_t *)(ack_mp->b_rptr + 1911 sizeof (struct T_ok_ack)); 1912 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1913 nsti->sti_laddr_len = sizeof (sin_t); 1914 } else { 1915 sin6_t *sin6; 1916 1917 sin6 = (sin6_t *)(ack_mp->b_rptr + 1918 sizeof (struct T_ok_ack)); 1919 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1920 nsti->sti_laddr_len = sizeof (sin6_t); 1921 } 1922 freemsg(ack_mp); 1923 1924 nso->so_state |= SS_ISCONNECTED; 1925 nso->so_proto_handle = (sock_lower_handle_t)opt; 1926 nsti->sti_laddr_valid = 1; 1927 1928 mutex_exit(&nso->so_lock); 1929 1930 /* 1931 * It's possible, through the use of autopush for example, 1932 * that the acceptor stream may not support sti_direct 1933 * semantics. If the new socket does not support sti_direct 1934 * we issue a _SIOCSOCKFALLBACK to inform the transport 1935 * as we would in the I_PUSH case. 1936 */ 1937 if (nsti->sti_direct == 0) { 1938 int rval; 1939 1940 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1941 0, 0, K_TO_K, cr, &rval)) != 0) { 1942 mutex_enter(&so->so_lock); 1943 so_lock_single(so); 1944 eprintsoline(so, error); 1945 goto disconnect_vp; 1946 } 1947 } 1948 1949 /* 1950 * Pass out new socket. 1951 */ 1952 if (nsop != NULL) 1953 *nsop = nso; 1954 1955 return (0); 1956 } 1957 1958 /* 1959 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1960 * which don't support the FireEngine accept fast-path. It is also 1961 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1962 * again. Neither sockfs nor TCP attempt to find out if some other 1963 * random module has been inserted in between (in which case we 1964 * should follow TLI accept behaviour). We blindly assume the worst 1965 * case and revert back to old behaviour i.e. TCP will not send us 1966 * any option (eager) and the accept should happen on the listener 1967 * queue. Any queued T_conn_ind have already got their options removed 1968 * by so_sock2_stream() when "sockmod" was I_POP'd. 1969 */ 1970 /* 1971 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1972 */ 1973 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1974 #ifdef _ILP32 1975 queue_t *q; 1976 1977 /* 1978 * Find read queue in driver 1979 * Can safely do this since we "own" nso/nvp. 1980 */ 1981 q = strvp2wq(nvp)->q_next; 1982 while (SAMESTR(q)) 1983 q = q->q_next; 1984 q = RD(q); 1985 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1986 #else 1987 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1988 #endif /* _ILP32 */ 1989 conn_res->PRIM_type = O_T_CONN_RES; 1990 PRIM_type = O_T_CONN_RES; 1991 } else { 1992 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 1993 conn_res->PRIM_type = T_CONN_RES; 1994 PRIM_type = T_CONN_RES; 1995 } 1996 conn_res->SEQ_number = SEQ_number; 1997 conn_res->OPT_length = 0; 1998 conn_res->OPT_offset = 0; 1999 2000 mutex_enter(&so->so_lock); 2001 so_lock_single(so); /* Set SOLOCKED */ 2002 mutex_exit(&so->so_lock); 2003 2004 error = kstrputmsg(SOTOV(so), mp, NULL, 2005 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2006 mutex_enter(&so->so_lock); 2007 if (error) { 2008 eprintsoline(so, error); 2009 goto disconnect_vp; 2010 } 2011 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2012 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2013 if (error) { 2014 eprintsoline(so, error); 2015 goto disconnect_vp; 2016 } 2017 mutex_exit(&so->so_lock); 2018 /* 2019 * If there is a sin/sin6 appended onto the T_OK_ACK use 2020 * that to set the local address. If this is not present 2021 * then we zero out the address and don't set the 2022 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2023 * the pathname from the listening socket. 2024 * In the case where this is TCP or an AF_UNIX socket the 2025 * client side may have queued data or a T_ORDREL in the 2026 * transport. Having now sent the T_CONN_RES we may receive 2027 * those queued messages at any time. Hold the acceptor 2028 * so_lock until its state and laddr are finalized. 2029 */ 2030 mutex_enter(&nso->so_lock); 2031 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2032 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 2033 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2034 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2035 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2036 nsti->sti_laddr_len = sinlen; 2037 nsti->sti_laddr_valid = 1; 2038 } else if (nso->so_family == AF_UNIX) { 2039 ASSERT(so->so_family == AF_UNIX); 2040 nsti->sti_laddr_len = sti->sti_laddr_len; 2041 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2042 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2043 nsti->sti_laddr_len); 2044 nsti->sti_laddr_valid = 1; 2045 } else { 2046 nsti->sti_laddr_len = sti->sti_laddr_len; 2047 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2048 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2049 nsti->sti_laddr_sa->sa_family = nso->so_family; 2050 } 2051 nso->so_state |= SS_ISCONNECTED; 2052 mutex_exit(&nso->so_lock); 2053 2054 freemsg(ack_mp); 2055 2056 mutex_enter(&so->so_lock); 2057 so_unlock_single(so, SOLOCKED); 2058 mutex_exit(&so->so_lock); 2059 2060 /* 2061 * Pass out new socket. 2062 */ 2063 if (nsop != NULL) 2064 *nsop = nso; 2065 2066 return (0); 2067 2068 e_disc_unl: 2069 eprintsoline(so, error); 2070 goto disconnect_unlocked; 2071 2072 disconnect_vp_unlocked: 2073 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2074 VN_RELE(nvp); 2075 disconnect_unlocked: 2076 (void) sodisconnect(so, SEQ_number, 0); 2077 return (error); 2078 2079 disconnect_vp: 2080 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2081 so_unlock_single(so, SOLOCKED); 2082 mutex_exit(&so->so_lock); 2083 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2084 VN_RELE(nvp); 2085 return (error); 2086 2087 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2088 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2089 ? EOPNOTSUPP : EINVAL; 2090 e_bad: 2091 eprintsoline(so, error); 2092 return (error); 2093 } 2094 2095 /* 2096 * connect a socket. 2097 * 2098 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2099 * unconnect (by specifying a null address). 2100 */ 2101 int 2102 sotpi_connect(struct sonode *so, 2103 struct sockaddr *name, 2104 socklen_t namelen, 2105 int fflag, 2106 int flags, 2107 struct cred *cr) 2108 { 2109 struct T_conn_req conn_req; 2110 int error = 0; 2111 mblk_t *mp; 2112 void *src; 2113 socklen_t srclen; 2114 void *addr; 2115 socklen_t addrlen; 2116 boolean_t need_unlock; 2117 sotpi_info_t *sti = SOTOTPI(so); 2118 2119 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2120 (void *)so, (void *)name, namelen, fflag, flags, 2121 pr_state(so->so_state, so->so_mode))); 2122 2123 /* 2124 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2125 * avoid sleeping for memory with SOLOCKED held. 2126 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2127 * + sizeof (struct T_opthdr). 2128 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2129 * exceed sti_faddr_maxlen). 2130 */ 2131 mp = soallocproto(sizeof (struct T_conn_req) + 2132 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2133 cr); 2134 if (mp == NULL) { 2135 /* 2136 * Connect can not fail with ENOBUFS. A signal was 2137 * caught so return EINTR. 2138 */ 2139 error = EINTR; 2140 eprintsoline(so, error); 2141 return (error); 2142 } 2143 2144 mutex_enter(&so->so_lock); 2145 /* 2146 * Make sure there is a preallocated T_unbind_req message 2147 * before any binding. This message is allocated when the 2148 * socket is created. Since another thread can consume 2149 * so_unbind_mp by the time we return from so_lock_single(), 2150 * we should check the availability of so_unbind_mp after 2151 * we return from so_lock_single(). 2152 */ 2153 2154 so_lock_single(so); /* Set SOLOCKED */ 2155 need_unlock = B_TRUE; 2156 2157 if (sti->sti_unbind_mp == NULL) { 2158 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2159 /* NOTE: holding so_lock while sleeping */ 2160 sti->sti_unbind_mp = 2161 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2162 if (sti->sti_unbind_mp == NULL) { 2163 error = EINTR; 2164 goto done; 2165 } 2166 } 2167 2168 /* 2169 * Can't have done a listen before connecting. 2170 */ 2171 if (so->so_state & SS_ACCEPTCONN) { 2172 error = EOPNOTSUPP; 2173 goto done; 2174 } 2175 2176 /* 2177 * Must be bound with the transport 2178 */ 2179 if (!(so->so_state & SS_ISBOUND)) { 2180 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2181 /*CONSTCOND*/ 2182 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2183 /* 2184 * Optimization for AF_INET{,6} transports 2185 * that can handle a T_CONN_REQ without being bound. 2186 */ 2187 so_automatic_bind(so); 2188 } else { 2189 error = sotpi_bind(so, NULL, 0, 2190 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2191 if (error) 2192 goto done; 2193 } 2194 ASSERT(so->so_state & SS_ISBOUND); 2195 flags |= _SOCONNECT_DID_BIND; 2196 } 2197 2198 /* 2199 * Handle a connect to a name parameter of type AF_UNSPEC like a 2200 * connect to a null address. This is the portable method to 2201 * unconnect a socket. 2202 */ 2203 if ((namelen >= sizeof (sa_family_t)) && 2204 (name->sa_family == AF_UNSPEC)) { 2205 name = NULL; 2206 namelen = 0; 2207 } 2208 2209 /* 2210 * Check that we are not already connected. 2211 * A connection-oriented socket cannot be reconnected. 2212 * A connected connection-less socket can be 2213 * - connected to a different address by a subsequent connect 2214 * - "unconnected" by a connect to the NULL address 2215 */ 2216 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2217 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2218 if (so->so_mode & SM_CONNREQUIRED) { 2219 /* Connection-oriented socket */ 2220 error = so->so_state & SS_ISCONNECTED ? 2221 EISCONN : EALREADY; 2222 goto done; 2223 } 2224 /* Connection-less socket */ 2225 if (name == NULL) { 2226 /* 2227 * Remove the connected state and clear SO_DGRAM_ERRIND 2228 * since it was set when the socket was connected. 2229 * If this is UDP also send down a T_DISCON_REQ. 2230 */ 2231 int val; 2232 2233 if ((so->so_family == AF_INET || 2234 so->so_family == AF_INET6) && 2235 (so->so_type == SOCK_DGRAM || 2236 so->so_type == SOCK_RAW) && 2237 /*CONSTCOND*/ 2238 !soconnect_tpi_udp) { 2239 /* XXX What about implicitly unbinding here? */ 2240 error = sodisconnect(so, -1, 2241 _SODISCONNECT_LOCK_HELD); 2242 } else { 2243 so->so_state &= 2244 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2245 sti->sti_faddr_valid = 0; 2246 sti->sti_faddr_len = 0; 2247 } 2248 2249 /* Remove SOLOCKED since setsockopt will grab it */ 2250 so_unlock_single(so, SOLOCKED); 2251 mutex_exit(&so->so_lock); 2252 2253 val = 0; 2254 (void) sotpi_setsockopt(so, SOL_SOCKET, 2255 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2256 cr); 2257 2258 mutex_enter(&so->so_lock); 2259 so_lock_single(so); /* Set SOLOCKED */ 2260 goto done; 2261 } 2262 } 2263 ASSERT(so->so_state & SS_ISBOUND); 2264 2265 if (name == NULL || namelen == 0) { 2266 error = EINVAL; 2267 goto done; 2268 } 2269 /* 2270 * Mark the socket if sti_faddr_sa represents the transport level 2271 * address. 2272 */ 2273 if (flags & _SOCONNECT_NOXLATE) { 2274 struct sockaddr_ux *soaddr_ux; 2275 2276 ASSERT(so->so_family == AF_UNIX); 2277 if (namelen != sizeof (struct sockaddr_ux)) { 2278 error = EINVAL; 2279 goto done; 2280 } 2281 soaddr_ux = (struct sockaddr_ux *)name; 2282 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2283 namelen = sizeof (soaddr_ux->sou_addr); 2284 sti->sti_faddr_noxlate = 1; 2285 } 2286 2287 /* 2288 * Length and family checks. 2289 */ 2290 error = so_addr_verify(so, name, namelen); 2291 if (error) 2292 goto bad; 2293 2294 /* 2295 * Save foreign address. Needed for AF_UNIX as well as 2296 * transport providers that do not support TI_GETPEERNAME. 2297 * Also used for cached foreign address for TCP and UDP. 2298 */ 2299 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2300 error = EINVAL; 2301 goto done; 2302 } 2303 sti->sti_faddr_len = (socklen_t)namelen; 2304 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2305 bcopy(name, sti->sti_faddr_sa, namelen); 2306 sti->sti_faddr_valid = 1; 2307 2308 if (so->so_family == AF_UNIX) { 2309 if (sti->sti_faddr_noxlate) { 2310 /* 2311 * sti_faddr is a transport-level address, so 2312 * don't pass it as an option. Do save it in 2313 * sti_ux_faddr, used for connected DG send. 2314 */ 2315 src = NULL; 2316 srclen = 0; 2317 addr = sti->sti_faddr_sa; 2318 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2319 bcopy(addr, &sti->sti_ux_faddr, 2320 sizeof (sti->sti_ux_faddr)); 2321 } else { 2322 /* 2323 * Pass the sockaddr_un source address as an option 2324 * and translate the remote address. 2325 * Holding so_lock thus sti_laddr_sa can not change. 2326 */ 2327 src = sti->sti_laddr_sa; 2328 srclen = (t_uscalar_t)sti->sti_laddr_len; 2329 dprintso(so, 1, 2330 ("sotpi_connect UNIX: srclen %d, src %p\n", 2331 srclen, src)); 2332 /* 2333 * Translate the destination address into our 2334 * internal form, and save it in sti_ux_faddr. 2335 * After this call, addr==&sti->sti_ux_taddr, 2336 * and we copy that to sti->sti_ux_faddr so 2337 * we save the connected peer address. 2338 */ 2339 error = so_ux_addr_xlate(so, 2340 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2341 (flags & _SOCONNECT_XPG4_2), 2342 &addr, &addrlen); 2343 if (error) 2344 goto bad; 2345 bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr, 2346 sizeof (sti->sti_ux_faddr)); 2347 } 2348 } else { 2349 addr = sti->sti_faddr_sa; 2350 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2351 src = NULL; 2352 srclen = 0; 2353 } 2354 /* 2355 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2356 * option which asks the transport provider to send T_UDERR_IND 2357 * messages. These T_UDERR_IND messages are used to return connected 2358 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2359 * 2360 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2361 * we send down a T_CONN_REQ. This is needed to let the 2362 * transport assign a local address that is consistent with 2363 * the remote address. Applications depend on a getsockname() 2364 * after a connect() to retrieve the "source" IP address for 2365 * the connected socket. Invalidate the cached local address 2366 * to force getsockname() to enquire of the transport. 2367 */ 2368 if (!(so->so_mode & SM_CONNREQUIRED)) { 2369 /* 2370 * Datagram socket. 2371 */ 2372 int32_t val; 2373 2374 so_unlock_single(so, SOLOCKED); 2375 mutex_exit(&so->so_lock); 2376 2377 val = 1; 2378 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2379 &val, (t_uscalar_t)sizeof (val), cr); 2380 2381 mutex_enter(&so->so_lock); 2382 so_lock_single(so); /* Set SOLOCKED */ 2383 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2384 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2385 soconnect_tpi_udp) { 2386 soisconnected(so); 2387 goto done; 2388 } 2389 /* 2390 * Send down T_CONN_REQ etc. 2391 * Clear fflag to avoid returning EWOULDBLOCK. 2392 */ 2393 fflag = 0; 2394 ASSERT(so->so_family != AF_UNIX); 2395 sti->sti_laddr_valid = 0; 2396 } else if (sti->sti_laddr_len != 0) { 2397 /* 2398 * If the local address or port was "any" then it may be 2399 * changed by the transport as a result of the 2400 * connect. Invalidate the cached version if we have one. 2401 */ 2402 switch (so->so_family) { 2403 case AF_INET: 2404 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2405 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2406 INADDR_ANY || 2407 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2408 sti->sti_laddr_valid = 0; 2409 break; 2410 2411 case AF_INET6: 2412 ASSERT(sti->sti_laddr_len == 2413 (socklen_t)sizeof (sin6_t)); 2414 if (IN6_IS_ADDR_UNSPECIFIED( 2415 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2416 IN6_IS_ADDR_V4MAPPED_ANY( 2417 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2418 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2419 sti->sti_laddr_valid = 0; 2420 break; 2421 2422 default: 2423 break; 2424 } 2425 } 2426 2427 /* 2428 * Check for failure of an earlier call 2429 */ 2430 if (so->so_error != 0) 2431 goto so_bad; 2432 2433 /* 2434 * Send down T_CONN_REQ. Message was allocated above. 2435 */ 2436 conn_req.PRIM_type = T_CONN_REQ; 2437 conn_req.DEST_length = addrlen; 2438 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2439 if (srclen == 0) { 2440 conn_req.OPT_length = 0; 2441 conn_req.OPT_offset = 0; 2442 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2443 soappendmsg(mp, addr, addrlen); 2444 } else { 2445 /* 2446 * There is a AF_UNIX sockaddr_un to include as a source 2447 * address option. 2448 */ 2449 struct T_opthdr toh; 2450 2451 toh.level = SOL_SOCKET; 2452 toh.name = SO_SRCADDR; 2453 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2454 toh.status = 0; 2455 conn_req.OPT_length = 2456 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2457 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2458 _TPI_ALIGN_TOPT(addrlen)); 2459 2460 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2461 soappendmsg(mp, addr, addrlen); 2462 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2463 soappendmsg(mp, &toh, sizeof (toh)); 2464 soappendmsg(mp, src, srclen); 2465 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2466 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2467 } 2468 /* 2469 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2470 * in order to have the right state when the T_CONN_CON shows up. 2471 */ 2472 soisconnecting(so); 2473 mutex_exit(&so->so_lock); 2474 2475 if (AU_AUDITING()) 2476 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2477 2478 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2479 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2480 mp = NULL; 2481 mutex_enter(&so->so_lock); 2482 if (error != 0) 2483 goto bad; 2484 2485 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2486 goto bad; 2487 2488 /* Allow other threads to access the socket */ 2489 so_unlock_single(so, SOLOCKED); 2490 need_unlock = B_FALSE; 2491 2492 /* 2493 * Wait until we get a T_CONN_CON or an error 2494 */ 2495 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2496 so_lock_single(so); /* Set SOLOCKED */ 2497 need_unlock = B_TRUE; 2498 } 2499 2500 done: 2501 freemsg(mp); 2502 switch (error) { 2503 case EINPROGRESS: 2504 case EALREADY: 2505 case EISCONN: 2506 case EINTR: 2507 /* Non-fatal errors */ 2508 sti->sti_laddr_valid = 0; 2509 /* FALLTHRU */ 2510 case 0: 2511 break; 2512 default: 2513 ASSERT(need_unlock); 2514 /* 2515 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2516 * and invalidate local-address cache 2517 */ 2518 so->so_state &= ~SS_ISCONNECTING; 2519 sti->sti_laddr_valid = 0; 2520 /* A discon_ind might have already unbound us */ 2521 if ((flags & _SOCONNECT_DID_BIND) && 2522 (so->so_state & SS_ISBOUND)) { 2523 int err; 2524 2525 err = sotpi_unbind(so, 0); 2526 /* LINTED - statement has no conseq */ 2527 if (err) { 2528 eprintsoline(so, err); 2529 } 2530 } 2531 break; 2532 } 2533 if (need_unlock) 2534 so_unlock_single(so, SOLOCKED); 2535 mutex_exit(&so->so_lock); 2536 return (error); 2537 2538 so_bad: error = sogeterr(so, B_TRUE); 2539 bad: eprintsoline(so, error); 2540 goto done; 2541 } 2542 2543 /* ARGSUSED */ 2544 int 2545 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2546 { 2547 struct T_ordrel_req ordrel_req; 2548 mblk_t *mp; 2549 uint_t old_state, state_change; 2550 int error = 0; 2551 sotpi_info_t *sti = SOTOTPI(so); 2552 2553 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2554 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2555 2556 mutex_enter(&so->so_lock); 2557 so_lock_single(so); /* Set SOLOCKED */ 2558 2559 /* 2560 * SunOS 4.X has no check for datagram sockets. 2561 * 5.X checks that it is connected (ENOTCONN) 2562 * X/Open requires that we check the connected state. 2563 */ 2564 if (!(so->so_state & SS_ISCONNECTED)) { 2565 if (!xnet_skip_checks) { 2566 error = ENOTCONN; 2567 if (xnet_check_print) { 2568 printf("sockfs: X/Open shutdown check " 2569 "caused ENOTCONN\n"); 2570 } 2571 } 2572 goto done; 2573 } 2574 /* 2575 * Record the current state and then perform any state changes. 2576 * Then use the difference between the old and new states to 2577 * determine which messages need to be sent. 2578 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2579 * duplicate calls to shutdown(). 2580 */ 2581 old_state = so->so_state; 2582 2583 switch (how) { 2584 case 0: 2585 socantrcvmore(so); 2586 break; 2587 case 1: 2588 socantsendmore(so); 2589 break; 2590 case 2: 2591 socantsendmore(so); 2592 socantrcvmore(so); 2593 break; 2594 default: 2595 error = EINVAL; 2596 goto done; 2597 } 2598 2599 /* 2600 * Assumes that the SS_CANT* flags are never cleared in the above code. 2601 */ 2602 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2603 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2604 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2605 2606 switch (state_change) { 2607 case 0: 2608 dprintso(so, 1, 2609 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2610 so->so_state)); 2611 goto done; 2612 2613 case SS_CANTRCVMORE: 2614 mutex_exit(&so->so_lock); 2615 strseteof(SOTOV(so), 1); 2616 /* 2617 * strseteof takes care of read side wakeups, 2618 * pollwakeups, and signals. 2619 */ 2620 /* 2621 * Get the read lock before flushing data to avoid problems 2622 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2623 */ 2624 mutex_enter(&so->so_lock); 2625 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2626 mutex_exit(&so->so_lock); 2627 2628 /* Flush read side queue */ 2629 strflushrq(SOTOV(so), FLUSHALL); 2630 2631 mutex_enter(&so->so_lock); 2632 so_unlock_read(so); /* Clear SOREADLOCKED */ 2633 break; 2634 2635 case SS_CANTSENDMORE: 2636 mutex_exit(&so->so_lock); 2637 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2638 mutex_enter(&so->so_lock); 2639 break; 2640 2641 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2642 mutex_exit(&so->so_lock); 2643 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2644 strseteof(SOTOV(so), 1); 2645 /* 2646 * strseteof takes care of read side wakeups, 2647 * pollwakeups, and signals. 2648 */ 2649 /* 2650 * Get the read lock before flushing data to avoid problems 2651 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2652 */ 2653 mutex_enter(&so->so_lock); 2654 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2655 mutex_exit(&so->so_lock); 2656 2657 /* Flush read side queue */ 2658 strflushrq(SOTOV(so), FLUSHALL); 2659 2660 mutex_enter(&so->so_lock); 2661 so_unlock_read(so); /* Clear SOREADLOCKED */ 2662 break; 2663 } 2664 2665 ASSERT(MUTEX_HELD(&so->so_lock)); 2666 2667 /* 2668 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2669 * was set due to this call and the new state has both of them set: 2670 * Send the AF_UNIX close indication 2671 * For T_COTS send a discon_ind 2672 * 2673 * If cantsend was set due to this call: 2674 * For T_COTSORD send an ordrel_ind 2675 * 2676 * Note that for T_CLTS there is no message sent here. 2677 */ 2678 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2679 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2680 /* 2681 * For SunOS 4.X compatibility we tell the other end 2682 * that we are unable to receive at this point. 2683 */ 2684 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2685 so_unix_close(so); 2686 2687 if (sti->sti_serv_type == T_COTS) 2688 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2689 } 2690 if ((state_change & SS_CANTSENDMORE) && 2691 (sti->sti_serv_type == T_COTS_ORD)) { 2692 /* Send an orderly release */ 2693 ordrel_req.PRIM_type = T_ORDREL_REQ; 2694 2695 mutex_exit(&so->so_lock); 2696 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2697 0, _ALLOC_SLEEP, cr); 2698 /* 2699 * Send down the T_ORDREL_REQ even if there is flow control. 2700 * This prevents shutdown from blocking. 2701 * Note that there is no T_OK_ACK for ordrel_req. 2702 */ 2703 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2704 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2705 mutex_enter(&so->so_lock); 2706 if (error) { 2707 eprintsoline(so, error); 2708 goto done; 2709 } 2710 } 2711 2712 done: 2713 so_unlock_single(so, SOLOCKED); 2714 mutex_exit(&so->so_lock); 2715 return (error); 2716 } 2717 2718 /* 2719 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2720 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2721 * that we have closed. 2722 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2723 * T_UNITDATA_REQ containing the same option. 2724 * 2725 * For SOCK_DGRAM half-connections (somebody connected to this end 2726 * but this end is not connect) we don't know where to send any 2727 * SO_UNIX_CLOSE. 2728 * 2729 * We have to ignore stream head errors just in case there has been 2730 * a shutdown(output). 2731 * Ignore any flow control to try to get the message more quickly to the peer. 2732 * While locally ignoring flow control solves the problem when there 2733 * is only the loopback transport on the stream it would not provide 2734 * the correct AF_UNIX socket semantics when one or more modules have 2735 * been pushed. 2736 */ 2737 void 2738 so_unix_close(struct sonode *so) 2739 { 2740 struct T_opthdr toh; 2741 mblk_t *mp; 2742 sotpi_info_t *sti = SOTOTPI(so); 2743 2744 ASSERT(MUTEX_HELD(&so->so_lock)); 2745 2746 ASSERT(so->so_family == AF_UNIX); 2747 2748 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2749 (SS_ISCONNECTED|SS_ISBOUND)) 2750 return; 2751 2752 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2753 (void *)so, pr_state(so->so_state, so->so_mode))); 2754 2755 toh.level = SOL_SOCKET; 2756 toh.name = SO_UNIX_CLOSE; 2757 2758 /* zero length + header */ 2759 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2760 toh.status = 0; 2761 2762 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2763 struct T_optdata_req tdr; 2764 2765 tdr.PRIM_type = T_OPTDATA_REQ; 2766 tdr.DATA_flag = 0; 2767 2768 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2769 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2770 2771 /* NOTE: holding so_lock while sleeping */ 2772 mp = soallocproto2(&tdr, sizeof (tdr), 2773 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2774 } else { 2775 struct T_unitdata_req tudr; 2776 void *addr; 2777 socklen_t addrlen; 2778 void *src; 2779 socklen_t srclen; 2780 struct T_opthdr toh2; 2781 t_scalar_t size; 2782 2783 /* 2784 * We know this is an AF_UNIX connected DGRAM socket. 2785 * We therefore already have the destination address 2786 * in the internal form needed for this send. This is 2787 * similar to the sosend_dgram call later in this file 2788 * when there's no user-specified destination address. 2789 */ 2790 if (sti->sti_faddr_noxlate) { 2791 /* 2792 * Already have a transport internal address. Do not 2793 * pass any (transport internal) source address. 2794 */ 2795 addr = sti->sti_faddr_sa; 2796 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2797 src = NULL; 2798 srclen = 0; 2799 } else { 2800 /* 2801 * Pass the sockaddr_un source address as an option 2802 * and translate the remote address. 2803 * Holding so_lock thus sti_laddr_sa can not change. 2804 */ 2805 src = sti->sti_laddr_sa; 2806 srclen = (socklen_t)sti->sti_laddr_len; 2807 dprintso(so, 1, 2808 ("so_ux_close: srclen %d, src %p\n", 2809 srclen, src)); 2810 /* 2811 * Use the destination address saved in connect. 2812 */ 2813 addr = &sti->sti_ux_faddr; 2814 addrlen = sizeof (sti->sti_ux_faddr); 2815 } 2816 tudr.PRIM_type = T_UNITDATA_REQ; 2817 tudr.DEST_length = addrlen; 2818 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2819 if (srclen == 0) { 2820 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2821 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2822 _TPI_ALIGN_TOPT(addrlen)); 2823 2824 size = tudr.OPT_offset + tudr.OPT_length; 2825 /* NOTE: holding so_lock while sleeping */ 2826 mp = soallocproto2(&tudr, sizeof (tudr), 2827 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2828 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2829 soappendmsg(mp, &toh, sizeof (toh)); 2830 } else { 2831 /* 2832 * There is a AF_UNIX sockaddr_un to include as a 2833 * source address option. 2834 */ 2835 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2836 _TPI_ALIGN_TOPT(srclen)); 2837 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2838 _TPI_ALIGN_TOPT(addrlen)); 2839 2840 toh2.level = SOL_SOCKET; 2841 toh2.name = SO_SRCADDR; 2842 toh2.len = (t_uscalar_t)(srclen + 2843 sizeof (struct T_opthdr)); 2844 toh2.status = 0; 2845 2846 size = tudr.OPT_offset + tudr.OPT_length; 2847 2848 /* NOTE: holding so_lock while sleeping */ 2849 mp = soallocproto2(&tudr, sizeof (tudr), 2850 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2851 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2852 soappendmsg(mp, &toh, sizeof (toh)); 2853 soappendmsg(mp, &toh2, sizeof (toh2)); 2854 soappendmsg(mp, src, srclen); 2855 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2856 } 2857 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2858 } 2859 mutex_exit(&so->so_lock); 2860 (void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2861 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2862 mutex_enter(&so->so_lock); 2863 } 2864 2865 /* 2866 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2867 * In addition, the caller typically verifies that there is some 2868 * potential state to clear by checking 2869 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2870 * before calling this routine. 2871 * Note that such a check can be made without holding so_lock since 2872 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2873 * decrements sti_oobsigcnt. 2874 * 2875 * When data is read *after* the point that all pending 2876 * oob data has been consumed the oob indication is cleared. 2877 * 2878 * This logic keeps select/poll returning POLLRDBAND and 2879 * SIOCATMARK returning true until we have read past 2880 * the mark. 2881 */ 2882 static void 2883 sorecv_update_oobstate(struct sonode *so) 2884 { 2885 sotpi_info_t *sti = SOTOTPI(so); 2886 2887 mutex_enter(&so->so_lock); 2888 ASSERT(so_verify_oobstate(so)); 2889 dprintso(so, 1, 2890 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2891 sti->sti_oobsigcnt, 2892 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2893 if (sti->sti_oobsigcnt == 0) { 2894 /* No more pending oob indications */ 2895 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2896 freemsg(so->so_oobmsg); 2897 so->so_oobmsg = NULL; 2898 } 2899 ASSERT(so_verify_oobstate(so)); 2900 mutex_exit(&so->so_lock); 2901 } 2902 2903 /* 2904 * Receive the next message on the queue. 2905 * If msg_controllen is non-zero when called the caller is interested in 2906 * any received control info (options). 2907 * If msg_namelen is non-zero when called the caller is interested in 2908 * any received source address. 2909 * The routine returns with msg_control and msg_name pointing to 2910 * kmem_alloc'ed memory which the caller has to free. 2911 */ 2912 /* ARGSUSED */ 2913 int 2914 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 2915 struct cred *cr) 2916 { 2917 union T_primitives *tpr; 2918 mblk_t *mp; 2919 uchar_t pri; 2920 int pflag, opflag; 2921 void *control; 2922 t_uscalar_t controllen; 2923 t_uscalar_t namelen; 2924 int so_state = so->so_state; /* Snapshot */ 2925 ssize_t saved_resid; 2926 rval_t rval; 2927 int flags; 2928 clock_t timout; 2929 int error = 0; 2930 sotpi_info_t *sti = SOTOTPI(so); 2931 2932 flags = msg->msg_flags; 2933 msg->msg_flags = 0; 2934 2935 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2936 (void *)so, (void *)msg, flags, 2937 pr_state(so->so_state, so->so_mode), so->so_error)); 2938 2939 if (so->so_version == SOV_STREAM) { 2940 so_update_attrs(so, SOACC); 2941 /* The imaginary "sockmod" has been popped - act as a stream */ 2942 return (strread(SOTOV(so), uiop, cr)); 2943 } 2944 2945 /* 2946 * If we are not connected because we have never been connected 2947 * we return ENOTCONN. If we have been connected (but are no longer 2948 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2949 * the EOF. 2950 * 2951 * An alternative would be to post an ENOTCONN error in stream head 2952 * (read+write) and clear it when we're connected. However, that error 2953 * would cause incorrect poll/select behavior! 2954 */ 2955 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2956 (so->so_mode & SM_CONNREQUIRED)) { 2957 return (ENOTCONN); 2958 } 2959 2960 /* 2961 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2962 * after checking that the read queue is empty) and returns zero. 2963 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2964 * is zero. 2965 */ 2966 2967 if (flags & MSG_OOB) { 2968 /* Check that the transport supports OOB */ 2969 if (!(so->so_mode & SM_EXDATA)) 2970 return (EOPNOTSUPP); 2971 so_update_attrs(so, SOACC); 2972 return (sorecvoob(so, msg, uiop, flags, 2973 (so->so_options & SO_OOBINLINE))); 2974 } 2975 2976 so_update_attrs(so, SOACC); 2977 2978 /* 2979 * Set msg_controllen and msg_namelen to zero here to make it 2980 * simpler in the cases that no control or name is returned. 2981 */ 2982 controllen = msg->msg_controllen; 2983 namelen = msg->msg_namelen; 2984 msg->msg_controllen = 0; 2985 msg->msg_namelen = 0; 2986 2987 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2988 namelen, controllen)); 2989 2990 mutex_enter(&so->so_lock); 2991 /* 2992 * Only one reader is allowed at any given time. This is needed 2993 * for T_EXDATA handling and, in the future, MSG_WAITALL. 2994 * 2995 * This is slightly different that BSD behavior in that it fails with 2996 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 2997 * is single-threaded using sblock(), which is dropped while waiting 2998 * for data to appear. The difference shows up e.g. if one 2999 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3000 * does use nonblocking io and different threads are reading each 3001 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3002 * in this case as long as the read queue doesn't get empty. 3003 * In this implementation the thread using nonblocking io can 3004 * get an EWOULDBLOCK error due to the blocking thread executing 3005 * e.g. in the uiomove in kstrgetmsg. 3006 * This difference is not believed to be significant. 3007 */ 3008 /* Set SOREADLOCKED */ 3009 error = so_lock_read_intr(so, 3010 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3011 mutex_exit(&so->so_lock); 3012 if (error) 3013 return (error); 3014 3015 /* 3016 * Tell kstrgetmsg to not inspect the stream head errors until all 3017 * queued data has been consumed. 3018 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3019 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3020 * 3021 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3022 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3023 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3024 */ 3025 pflag = MSG_ANY | MSG_DELAYERROR; 3026 if (flags & MSG_PEEK) { 3027 pflag |= MSG_IPEEK; 3028 flags &= ~MSG_WAITALL; 3029 } 3030 if (so->so_mode & SM_ATOMIC) 3031 pflag |= MSG_DISCARDTAIL; 3032 3033 if (flags & MSG_DONTWAIT) 3034 timout = 0; 3035 else if (so->so_rcvtimeo != 0) 3036 timout = TICK_TO_MSEC(so->so_rcvtimeo); 3037 else 3038 timout = -1; 3039 opflag = pflag; 3040 retry: 3041 saved_resid = uiop->uio_resid; 3042 pri = 0; 3043 mp = NULL; 3044 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3045 timout, &rval); 3046 if (error != 0) { 3047 /* kstrgetmsg returns ETIME when timeout expires */ 3048 if (error == ETIME) 3049 error = EWOULDBLOCK; 3050 goto out; 3051 } 3052 /* 3053 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3054 * For non-datagrams MOREDATA is used to set MSG_EOR. 3055 */ 3056 ASSERT(!(rval.r_val1 & MORECTL)); 3057 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3058 msg->msg_flags |= MSG_TRUNC; 3059 3060 if (mp == NULL) { 3061 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3062 /* 3063 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3064 * The draft Posix socket spec states that the mark should 3065 * not be cleared when peeking. We follow the latter. 3066 */ 3067 if ((so->so_state & 3068 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3069 (uiop->uio_resid != saved_resid) && 3070 !(flags & MSG_PEEK)) { 3071 sorecv_update_oobstate(so); 3072 } 3073 3074 mutex_enter(&so->so_lock); 3075 /* Set MSG_EOR based on MOREDATA */ 3076 if (!(rval.r_val1 & MOREDATA)) { 3077 if (so->so_state & SS_SAVEDEOR) { 3078 msg->msg_flags |= MSG_EOR; 3079 so->so_state &= ~SS_SAVEDEOR; 3080 } 3081 } 3082 /* 3083 * If some data was received (i.e. not EOF) and the 3084 * read/recv* has not been satisfied wait for some more. 3085 */ 3086 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3087 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3088 mutex_exit(&so->so_lock); 3089 pflag = opflag | MSG_NOMARK; 3090 goto retry; 3091 } 3092 goto out_locked; 3093 } 3094 3095 /* strsock_proto has already verified length and alignment */ 3096 tpr = (union T_primitives *)mp->b_rptr; 3097 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3098 3099 switch (tpr->type) { 3100 case T_DATA_IND: { 3101 if ((so->so_state & 3102 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3103 (uiop->uio_resid != saved_resid) && 3104 !(flags & MSG_PEEK)) { 3105 sorecv_update_oobstate(so); 3106 } 3107 3108 /* 3109 * Set msg_flags to MSG_EOR based on 3110 * MORE_flag and MOREDATA. 3111 */ 3112 mutex_enter(&so->so_lock); 3113 so->so_state &= ~SS_SAVEDEOR; 3114 if (!(tpr->data_ind.MORE_flag & 1)) { 3115 if (!(rval.r_val1 & MOREDATA)) 3116 msg->msg_flags |= MSG_EOR; 3117 else 3118 so->so_state |= SS_SAVEDEOR; 3119 } 3120 freemsg(mp); 3121 /* 3122 * If some data was received (i.e. not EOF) and the 3123 * read/recv* has not been satisfied wait for some more. 3124 */ 3125 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3126 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3127 mutex_exit(&so->so_lock); 3128 pflag = opflag | MSG_NOMARK; 3129 goto retry; 3130 } 3131 goto out_locked; 3132 } 3133 case T_UNITDATA_IND: { 3134 void *addr; 3135 t_uscalar_t addrlen; 3136 void *abuf; 3137 t_uscalar_t optlen; 3138 void *opt; 3139 3140 if ((so->so_state & 3141 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3142 (uiop->uio_resid != saved_resid) && 3143 !(flags & MSG_PEEK)) { 3144 sorecv_update_oobstate(so); 3145 } 3146 3147 if (namelen != 0) { 3148 /* Caller wants source address */ 3149 addrlen = tpr->unitdata_ind.SRC_length; 3150 addr = sogetoff(mp, 3151 tpr->unitdata_ind.SRC_offset, 3152 addrlen, 1); 3153 if (addr == NULL) { 3154 freemsg(mp); 3155 error = EPROTO; 3156 eprintsoline(so, error); 3157 goto out; 3158 } 3159 if (so->so_family == AF_UNIX) { 3160 /* 3161 * Can not use the transport level address. 3162 * If there is a SO_SRCADDR option carrying 3163 * the socket level address it will be 3164 * extracted below. 3165 */ 3166 addr = NULL; 3167 addrlen = 0; 3168 } 3169 } 3170 optlen = tpr->unitdata_ind.OPT_length; 3171 if (optlen != 0) { 3172 t_uscalar_t ncontrollen; 3173 3174 /* 3175 * Extract any source address option. 3176 * Determine how large cmsg buffer is needed. 3177 */ 3178 opt = sogetoff(mp, 3179 tpr->unitdata_ind.OPT_offset, 3180 optlen, __TPI_ALIGN_SIZE); 3181 3182 if (opt == NULL) { 3183 freemsg(mp); 3184 error = EPROTO; 3185 eprintsoline(so, error); 3186 goto out; 3187 } 3188 if (so->so_family == AF_UNIX) 3189 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3190 ncontrollen = so_cmsglen(mp, opt, optlen, 3191 !(flags & MSG_XPG4_2)); 3192 if (controllen != 0) 3193 controllen = ncontrollen; 3194 else if (ncontrollen != 0) 3195 msg->msg_flags |= MSG_CTRUNC; 3196 } else { 3197 controllen = 0; 3198 } 3199 3200 if (namelen != 0) { 3201 /* 3202 * Return address to caller. 3203 * Caller handles truncation if length 3204 * exceeds msg_namelen. 3205 * NOTE: AF_UNIX NUL termination is ensured by 3206 * the sender's copyin_name(). 3207 */ 3208 abuf = kmem_alloc(addrlen, KM_SLEEP); 3209 3210 bcopy(addr, abuf, addrlen); 3211 msg->msg_name = abuf; 3212 msg->msg_namelen = addrlen; 3213 } 3214 3215 if (controllen != 0) { 3216 /* 3217 * Return control msg to caller. 3218 * Caller handles truncation if length 3219 * exceeds msg_controllen. 3220 */ 3221 control = kmem_zalloc(controllen, KM_SLEEP); 3222 3223 error = so_opt2cmsg(mp, opt, optlen, flags, control, 3224 controllen); 3225 if (error) { 3226 freemsg(mp); 3227 if (msg->msg_namelen != 0) 3228 kmem_free(msg->msg_name, 3229 msg->msg_namelen); 3230 kmem_free(control, controllen); 3231 eprintsoline(so, error); 3232 goto out; 3233 } 3234 msg->msg_control = control; 3235 msg->msg_controllen = controllen; 3236 } 3237 3238 freemsg(mp); 3239 goto out; 3240 } 3241 case T_OPTDATA_IND: { 3242 struct T_optdata_req *tdr; 3243 void *opt; 3244 t_uscalar_t optlen; 3245 3246 if ((so->so_state & 3247 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3248 (uiop->uio_resid != saved_resid) && 3249 !(flags & MSG_PEEK)) { 3250 sorecv_update_oobstate(so); 3251 } 3252 3253 tdr = (struct T_optdata_req *)mp->b_rptr; 3254 optlen = tdr->OPT_length; 3255 if (optlen != 0) { 3256 t_uscalar_t ncontrollen; 3257 /* 3258 * Determine how large cmsg buffer is needed. 3259 */ 3260 opt = sogetoff(mp, 3261 tpr->optdata_ind.OPT_offset, 3262 optlen, __TPI_ALIGN_SIZE); 3263 3264 if (opt == NULL) { 3265 freemsg(mp); 3266 error = EPROTO; 3267 eprintsoline(so, error); 3268 goto out; 3269 } 3270 3271 ncontrollen = so_cmsglen(mp, opt, optlen, 3272 !(flags & MSG_XPG4_2)); 3273 if (controllen != 0) 3274 controllen = ncontrollen; 3275 else if (ncontrollen != 0) 3276 msg->msg_flags |= MSG_CTRUNC; 3277 } else { 3278 controllen = 0; 3279 } 3280 3281 if (controllen != 0) { 3282 /* 3283 * Return control msg to caller. 3284 * Caller handles truncation if length 3285 * exceeds msg_controllen. 3286 */ 3287 control = kmem_zalloc(controllen, KM_SLEEP); 3288 3289 error = so_opt2cmsg(mp, opt, optlen, flags, control, 3290 controllen); 3291 if (error) { 3292 freemsg(mp); 3293 kmem_free(control, controllen); 3294 eprintsoline(so, error); 3295 goto out; 3296 } 3297 msg->msg_control = control; 3298 msg->msg_controllen = controllen; 3299 } 3300 3301 /* 3302 * Set msg_flags to MSG_EOR based on 3303 * DATA_flag and MOREDATA. 3304 */ 3305 mutex_enter(&so->so_lock); 3306 so->so_state &= ~SS_SAVEDEOR; 3307 if (!(tpr->data_ind.MORE_flag & 1)) { 3308 if (!(rval.r_val1 & MOREDATA)) 3309 msg->msg_flags |= MSG_EOR; 3310 else 3311 so->so_state |= SS_SAVEDEOR; 3312 } 3313 freemsg(mp); 3314 /* 3315 * If some data was received (i.e. not EOF) and the 3316 * read/recv* has not been satisfied wait for some more. 3317 * Not possible to wait if control info was received. 3318 */ 3319 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3320 controllen == 0 && 3321 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3322 mutex_exit(&so->so_lock); 3323 pflag = opflag | MSG_NOMARK; 3324 goto retry; 3325 } 3326 goto out_locked; 3327 } 3328 case T_EXDATA_IND: { 3329 dprintso(so, 1, 3330 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3331 "state %s\n", 3332 sti->sti_oobsigcnt, sti->sti_oobcnt, 3333 saved_resid - uiop->uio_resid, 3334 pr_state(so->so_state, so->so_mode))); 3335 /* 3336 * kstrgetmsg handles MSGMARK so there is nothing to 3337 * inspect in the T_EXDATA_IND. 3338 * strsock_proto makes the stream head queue the T_EXDATA_IND 3339 * as a separate message with no M_DATA component. Furthermore, 3340 * the stream head does not consolidate M_DATA messages onto 3341 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3342 * remains a message by itself. This is needed since MSGMARK 3343 * marks both the whole message as well as the last byte 3344 * of the message. 3345 */ 3346 freemsg(mp); 3347 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3348 if (flags & MSG_PEEK) { 3349 /* 3350 * Even though we are peeking we consume the 3351 * T_EXDATA_IND thereby moving the mark information 3352 * to SS_RCVATMARK. Then the oob code below will 3353 * retry the peeking kstrgetmsg. 3354 * Note that the stream head read queue is 3355 * never flushed without holding SOREADLOCKED 3356 * thus the T_EXDATA_IND can not disappear 3357 * underneath us. 3358 */ 3359 dprintso(so, 1, 3360 ("sotpi_recvmsg: consume EXDATA_IND " 3361 "counts %d/%d state %s\n", 3362 sti->sti_oobsigcnt, 3363 sti->sti_oobcnt, 3364 pr_state(so->so_state, so->so_mode))); 3365 3366 pflag = MSG_ANY | MSG_DELAYERROR; 3367 if (so->so_mode & SM_ATOMIC) 3368 pflag |= MSG_DISCARDTAIL; 3369 3370 pri = 0; 3371 mp = NULL; 3372 3373 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3374 &pri, &pflag, (clock_t)-1, &rval); 3375 ASSERT(uiop->uio_resid == saved_resid); 3376 3377 if (error) { 3378 #ifdef SOCK_DEBUG 3379 if (error != EWOULDBLOCK && error != EINTR) { 3380 eprintsoline(so, error); 3381 } 3382 #endif /* SOCK_DEBUG */ 3383 goto out; 3384 } 3385 ASSERT(mp); 3386 tpr = (union T_primitives *)mp->b_rptr; 3387 ASSERT(tpr->type == T_EXDATA_IND); 3388 freemsg(mp); 3389 } /* end "if (flags & MSG_PEEK)" */ 3390 3391 /* 3392 * Decrement the number of queued and pending oob. 3393 * 3394 * SS_RCVATMARK is cleared when we read past a mark. 3395 * SS_HAVEOOBDATA is cleared when we've read past the 3396 * last mark. 3397 * SS_OOBPEND is cleared if we've read past the last 3398 * mark and no (new) SIGURG has been posted. 3399 */ 3400 mutex_enter(&so->so_lock); 3401 ASSERT(so_verify_oobstate(so)); 3402 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3403 ASSERT(sti->sti_oobsigcnt > 0); 3404 sti->sti_oobsigcnt--; 3405 ASSERT(sti->sti_oobcnt > 0); 3406 sti->sti_oobcnt--; 3407 /* 3408 * Since the T_EXDATA_IND has been removed from the stream 3409 * head, but we have not read data past the mark, 3410 * sockfs needs to track that the socket is still at the mark. 3411 * 3412 * Since no data was received call kstrgetmsg again to wait 3413 * for data. 3414 */ 3415 so->so_state |= SS_RCVATMARK; 3416 mutex_exit(&so->so_lock); 3417 dprintso(so, 1, 3418 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3419 sti->sti_oobsigcnt, sti->sti_oobcnt, 3420 pr_state(so->so_state, so->so_mode))); 3421 pflag = opflag; 3422 goto retry; 3423 } 3424 default: 3425 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3426 (void *)so, tpr->type, (void *)mp); 3427 ASSERT(0); 3428 freemsg(mp); 3429 error = EPROTO; 3430 eprintsoline(so, error); 3431 goto out; 3432 } 3433 /* NOTREACHED */ 3434 out: 3435 mutex_enter(&so->so_lock); 3436 out_locked: 3437 so_unlock_read(so); /* Clear SOREADLOCKED */ 3438 mutex_exit(&so->so_lock); 3439 return (error); 3440 } 3441 3442 /* 3443 * Sending data with options on a datagram socket. 3444 * Assumes caller has verified that SS_ISBOUND etc. are set. 3445 * 3446 * For AF_UNIX the destination address may be already in 3447 * internal form, as indicated by sti->sti_faddr_noxlate 3448 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3449 * translate the destination address to internal form. 3450 * 3451 * The source address is passed as an option. If passing 3452 * file descriptors, those are passed as file pointers in 3453 * another option. 3454 */ 3455 static int 3456 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3457 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3458 { 3459 struct T_unitdata_req tudr; 3460 mblk_t *mp; 3461 int error; 3462 void *addr; 3463 socklen_t addrlen; 3464 void *src; 3465 socklen_t srclen; 3466 ssize_t len; 3467 int size; 3468 struct T_opthdr toh; 3469 struct fdbuf *fdbuf; 3470 t_uscalar_t optlen; 3471 void *fds; 3472 int fdlen; 3473 sotpi_info_t *sti = SOTOTPI(so); 3474 3475 ASSERT(name && namelen); 3476 ASSERT(control && controllen); 3477 3478 len = uiop->uio_resid; 3479 if (len > (ssize_t)sti->sti_tidu_size) { 3480 return (EMSGSIZE); 3481 } 3482 3483 if (sti->sti_faddr_noxlate == 0 && 3484 (flags & MSG_SENDTO_NOXLATE) == 0) { 3485 /* 3486 * Length and family checks. 3487 * Don't verify internal form. 3488 */ 3489 error = so_addr_verify(so, name, namelen); 3490 if (error) { 3491 eprintsoline(so, error); 3492 return (error); 3493 } 3494 } 3495 3496 if (so->so_family == AF_UNIX) { 3497 if (sti->sti_faddr_noxlate) { 3498 /* 3499 * Already have a transport internal address. Do not 3500 * pass any (transport internal) source address. 3501 */ 3502 addr = name; 3503 addrlen = namelen; 3504 src = NULL; 3505 srclen = 0; 3506 } else if (flags & MSG_SENDTO_NOXLATE) { 3507 /* 3508 * Have an internal form dest. address. 3509 * Pass the source address as usual. 3510 */ 3511 addr = name; 3512 addrlen = namelen; 3513 src = sti->sti_laddr_sa; 3514 srclen = (socklen_t)sti->sti_laddr_len; 3515 } else { 3516 /* 3517 * Pass the sockaddr_un source address as an option 3518 * and translate the remote address. 3519 * 3520 * Note that this code does not prevent sti_laddr_sa 3521 * from changing while it is being used. Thus 3522 * if an unbind+bind occurs concurrently with this 3523 * send the peer might see a partially new and a 3524 * partially old "from" address. 3525 */ 3526 src = sti->sti_laddr_sa; 3527 srclen = (socklen_t)sti->sti_laddr_len; 3528 dprintso(so, 1, 3529 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3530 srclen, src)); 3531 /* 3532 * The sendmsg caller specified a destination 3533 * address, which we must translate into our 3534 * internal form. addr = &sti->sti_ux_taddr 3535 */ 3536 error = so_ux_addr_xlate(so, name, namelen, 3537 (flags & MSG_XPG4_2), 3538 &addr, &addrlen); 3539 if (error) { 3540 eprintsoline(so, error); 3541 return (error); 3542 } 3543 } 3544 } else { 3545 addr = name; 3546 addrlen = namelen; 3547 src = NULL; 3548 srclen = 0; 3549 } 3550 optlen = so_optlen(control, controllen, 3551 !(flags & MSG_XPG4_2)); 3552 tudr.PRIM_type = T_UNITDATA_REQ; 3553 tudr.DEST_length = addrlen; 3554 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3555 if (srclen != 0) 3556 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3557 _TPI_ALIGN_TOPT(srclen)); 3558 else 3559 tudr.OPT_length = optlen; 3560 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3561 _TPI_ALIGN_TOPT(addrlen)); 3562 3563 size = tudr.OPT_offset + tudr.OPT_length; 3564 3565 /* 3566 * File descriptors only when SM_FDPASSING set. 3567 */ 3568 error = so_getfdopt(control, controllen, 3569 !(flags & MSG_XPG4_2), &fds, &fdlen); 3570 if (error) 3571 return (error); 3572 if (fdlen != -1) { 3573 if (!(so->so_mode & SM_FDPASSING)) 3574 return (EOPNOTSUPP); 3575 3576 error = fdbuf_create(fds, fdlen, &fdbuf); 3577 if (error) 3578 return (error); 3579 3580 /* 3581 * Pre-allocate enough additional space for lower level modules 3582 * to append an option (e.g. see tl_unitdata). The following 3583 * is enough extra space for the largest option we might append. 3584 */ 3585 size += sizeof (struct T_opthdr) + ucredsize; 3586 mp = fdbuf_allocmsg(size, fdbuf); 3587 } else { 3588 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3589 if (mp == NULL) { 3590 /* 3591 * Caught a signal waiting for memory. 3592 * Let send* return EINTR. 3593 */ 3594 return (EINTR); 3595 } 3596 } 3597 soappendmsg(mp, &tudr, sizeof (tudr)); 3598 soappendmsg(mp, addr, addrlen); 3599 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3600 3601 if (fdlen != -1) { 3602 ASSERT(fdbuf != NULL); 3603 toh.level = SOL_SOCKET; 3604 toh.name = SO_FILEP; 3605 toh.len = fdbuf->fd_size + 3606 (t_uscalar_t)sizeof (struct T_opthdr); 3607 toh.status = 0; 3608 soappendmsg(mp, &toh, sizeof (toh)); 3609 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3610 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3611 } 3612 if (srclen != 0) { 3613 /* 3614 * There is a AF_UNIX sockaddr_un to include as a source 3615 * address option. 3616 */ 3617 toh.level = SOL_SOCKET; 3618 toh.name = SO_SRCADDR; 3619 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3620 toh.status = 0; 3621 soappendmsg(mp, &toh, sizeof (toh)); 3622 soappendmsg(mp, src, srclen); 3623 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3624 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3625 } 3626 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3627 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3628 /* 3629 * Normally at most 3 bytes left in the message, but we might have 3630 * allowed for extra space if we're passing fd's through. 3631 */ 3632 ASSERT(MBLKL(mp) <= (ssize_t)size); 3633 3634 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3635 if (AU_AUDITING()) 3636 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3637 3638 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3639 #ifdef SOCK_DEBUG 3640 if (error) { 3641 eprintsoline(so, error); 3642 } 3643 #endif /* SOCK_DEBUG */ 3644 return (error); 3645 } 3646 3647 /* 3648 * Sending data with options on a connected stream socket. 3649 * Assumes caller has verified that SS_ISCONNECTED is set. 3650 */ 3651 static int 3652 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3653 t_uscalar_t controllen, int flags) 3654 { 3655 struct T_optdata_req tdr; 3656 mblk_t *mp; 3657 int error; 3658 ssize_t iosize; 3659 int size; 3660 struct fdbuf *fdbuf; 3661 t_uscalar_t optlen; 3662 void *fds; 3663 int fdlen; 3664 struct T_opthdr toh; 3665 sotpi_info_t *sti = SOTOTPI(so); 3666 3667 dprintso(so, 1, 3668 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3669 3670 /* 3671 * Has to be bound and connected. However, since no locks are 3672 * held the state could have changed after sotpi_sendmsg checked it 3673 * thus it is not possible to ASSERT on the state. 3674 */ 3675 3676 /* Options on connection-oriented only when SM_OPTDATA set. */ 3677 if (!(so->so_mode & SM_OPTDATA)) 3678 return (EOPNOTSUPP); 3679 3680 do { 3681 /* 3682 * Set the MORE flag if uio_resid does not fit in this 3683 * message or if the caller passed in "more". 3684 * Error for transports with zero tidu_size. 3685 */ 3686 tdr.PRIM_type = T_OPTDATA_REQ; 3687 iosize = sti->sti_tidu_size; 3688 if (iosize <= 0) 3689 return (EMSGSIZE); 3690 if (uiop->uio_resid > iosize) { 3691 tdr.DATA_flag = 1; 3692 } else { 3693 if (more) 3694 tdr.DATA_flag = 1; 3695 else 3696 tdr.DATA_flag = 0; 3697 iosize = uiop->uio_resid; 3698 } 3699 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3700 tdr.DATA_flag, iosize)); 3701 3702 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3703 tdr.OPT_length = optlen; 3704 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3705 3706 size = (int)sizeof (tdr) + optlen; 3707 /* 3708 * File descriptors only when SM_FDPASSING set. 3709 */ 3710 error = so_getfdopt(control, controllen, 3711 !(flags & MSG_XPG4_2), &fds, &fdlen); 3712 if (error) 3713 return (error); 3714 if (fdlen != -1) { 3715 if (!(so->so_mode & SM_FDPASSING)) 3716 return (EOPNOTSUPP); 3717 3718 error = fdbuf_create(fds, fdlen, &fdbuf); 3719 if (error) 3720 return (error); 3721 3722 /* 3723 * Pre-allocate enough additional space for lower level 3724 * modules to append an option (e.g. see tl_unitdata). 3725 * The following is enough extra space for the largest 3726 * option we might append. 3727 */ 3728 size += sizeof (struct T_opthdr) + ucredsize; 3729 mp = fdbuf_allocmsg(size, fdbuf); 3730 } else { 3731 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3732 if (mp == NULL) { 3733 /* 3734 * Caught a signal waiting for memory. 3735 * Let send* return EINTR. 3736 */ 3737 return (EINTR); 3738 } 3739 } 3740 soappendmsg(mp, &tdr, sizeof (tdr)); 3741 3742 if (fdlen != -1) { 3743 ASSERT(fdbuf != NULL); 3744 toh.level = SOL_SOCKET; 3745 toh.name = SO_FILEP; 3746 toh.len = fdbuf->fd_size + 3747 (t_uscalar_t)sizeof (struct T_opthdr); 3748 toh.status = 0; 3749 soappendmsg(mp, &toh, sizeof (toh)); 3750 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3751 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3752 } 3753 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3754 /* 3755 * Normally at most 3 bytes left in the message, but we might 3756 * have allowed for extra space if we're passing fd's through. 3757 */ 3758 ASSERT(MBLKL(mp) <= (ssize_t)size); 3759 3760 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3761 3762 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3763 0, MSG_BAND, 0); 3764 if (error) { 3765 eprintsoline(so, error); 3766 return (error); 3767 } 3768 control = NULL; 3769 if (uiop->uio_resid > 0) { 3770 /* 3771 * Recheck for fatal errors. Fail write even though 3772 * some data have been written. This is consistent 3773 * with strwrite semantics and BSD sockets semantics. 3774 */ 3775 if (so->so_state & SS_CANTSENDMORE) { 3776 eprintsoline(so, error); 3777 return (EPIPE); 3778 } 3779 if (so->so_error != 0) { 3780 mutex_enter(&so->so_lock); 3781 error = sogeterr(so, B_TRUE); 3782 mutex_exit(&so->so_lock); 3783 if (error != 0) { 3784 eprintsoline(so, error); 3785 return (error); 3786 } 3787 } 3788 } 3789 } while (uiop->uio_resid > 0); 3790 return (0); 3791 } 3792 3793 /* 3794 * Sending data on a datagram socket. 3795 * Assumes caller has verified that SS_ISBOUND etc. are set. 3796 * 3797 * For AF_UNIX the destination address may be already in 3798 * internal form, as indicated by sti->sti_faddr_noxlate 3799 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3800 * translate the destination address to internal form. 3801 * 3802 * The source address is passed as an option. 3803 */ 3804 int 3805 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3806 struct uio *uiop, int flags) 3807 { 3808 struct T_unitdata_req tudr; 3809 mblk_t *mp; 3810 int error; 3811 void *addr; 3812 socklen_t addrlen; 3813 void *src; 3814 socklen_t srclen; 3815 ssize_t len; 3816 sotpi_info_t *sti = SOTOTPI(so); 3817 3818 ASSERT(name != NULL && namelen != 0); 3819 3820 len = uiop->uio_resid; 3821 if (len > sti->sti_tidu_size) { 3822 error = EMSGSIZE; 3823 goto done; 3824 } 3825 3826 if (sti->sti_faddr_noxlate == 0 && 3827 (flags & MSG_SENDTO_NOXLATE) == 0) { 3828 /* 3829 * Length and family checks. 3830 * Don't verify internal form. 3831 */ 3832 error = so_addr_verify(so, name, namelen); 3833 if (error != 0) 3834 goto done; 3835 } 3836 3837 if (sti->sti_direct) /* Never on AF_UNIX */ 3838 return (sodgram_direct(so, name, namelen, uiop, flags)); 3839 3840 if (so->so_family == AF_UNIX) { 3841 if (sti->sti_faddr_noxlate) { 3842 /* 3843 * Already have a transport internal address. Do not 3844 * pass any (transport internal) source address. 3845 */ 3846 addr = name; 3847 addrlen = namelen; 3848 src = NULL; 3849 srclen = 0; 3850 } else if (flags & MSG_SENDTO_NOXLATE) { 3851 /* 3852 * Have an internal form dest. address. 3853 * Pass the source address as usual. 3854 */ 3855 addr = name; 3856 addrlen = namelen; 3857 src = sti->sti_laddr_sa; 3858 srclen = (socklen_t)sti->sti_laddr_len; 3859 } else { 3860 /* 3861 * Pass the sockaddr_un source address as an option 3862 * and translate the remote address. 3863 * 3864 * Note that this code does not prevent sti_laddr_sa 3865 * from changing while it is being used. Thus 3866 * if an unbind+bind occurs concurrently with this 3867 * send the peer might see a partially new and a 3868 * partially old "from" address. 3869 */ 3870 src = sti->sti_laddr_sa; 3871 srclen = (socklen_t)sti->sti_laddr_len; 3872 dprintso(so, 1, 3873 ("sosend_dgram UNIX: srclen %d, src %p\n", 3874 srclen, src)); 3875 /* 3876 * The sendmsg caller specified a destination 3877 * address, which we must translate into our 3878 * internal form. addr = &sti->sti_ux_taddr 3879 */ 3880 error = so_ux_addr_xlate(so, name, namelen, 3881 (flags & MSG_XPG4_2), 3882 &addr, &addrlen); 3883 if (error) { 3884 eprintsoline(so, error); 3885 goto done; 3886 } 3887 } 3888 } else { 3889 addr = name; 3890 addrlen = namelen; 3891 src = NULL; 3892 srclen = 0; 3893 } 3894 tudr.PRIM_type = T_UNITDATA_REQ; 3895 tudr.DEST_length = addrlen; 3896 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3897 if (srclen == 0) { 3898 tudr.OPT_length = 0; 3899 tudr.OPT_offset = 0; 3900 3901 mp = soallocproto2(&tudr, sizeof (tudr), 3902 addr, addrlen, 0, _ALLOC_INTR, CRED()); 3903 if (mp == NULL) { 3904 /* 3905 * Caught a signal waiting for memory. 3906 * Let send* return EINTR. 3907 */ 3908 error = EINTR; 3909 goto done; 3910 } 3911 } else { 3912 /* 3913 * There is a AF_UNIX sockaddr_un to include as a source 3914 * address option. 3915 */ 3916 struct T_opthdr toh; 3917 ssize_t size; 3918 3919 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3920 _TPI_ALIGN_TOPT(srclen)); 3921 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3922 _TPI_ALIGN_TOPT(addrlen)); 3923 3924 toh.level = SOL_SOCKET; 3925 toh.name = SO_SRCADDR; 3926 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3927 toh.status = 0; 3928 3929 size = tudr.OPT_offset + tudr.OPT_length; 3930 mp = soallocproto2(&tudr, sizeof (tudr), 3931 addr, addrlen, size, _ALLOC_INTR, CRED()); 3932 if (mp == NULL) { 3933 /* 3934 * Caught a signal waiting for memory. 3935 * Let send* return EINTR. 3936 */ 3937 error = EINTR; 3938 goto done; 3939 } 3940 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3941 soappendmsg(mp, &toh, sizeof (toh)); 3942 soappendmsg(mp, src, srclen); 3943 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3944 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3945 } 3946 3947 if (AU_AUDITING()) 3948 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3949 3950 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3951 done: 3952 #ifdef SOCK_DEBUG 3953 if (error) { 3954 eprintsoline(so, error); 3955 } 3956 #endif /* SOCK_DEBUG */ 3957 return (error); 3958 } 3959 3960 /* 3961 * Sending data on a connected stream socket. 3962 * Assumes caller has verified that SS_ISCONNECTED is set. 3963 */ 3964 int 3965 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 3966 int sflag) 3967 { 3968 struct T_data_req tdr; 3969 mblk_t *mp; 3970 int error; 3971 ssize_t iosize; 3972 sotpi_info_t *sti = SOTOTPI(so); 3973 3974 dprintso(so, 1, 3975 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3976 (void *)so, uiop->uio_resid, prim, sflag)); 3977 3978 /* 3979 * Has to be bound and connected. However, since no locks are 3980 * held the state could have changed after sotpi_sendmsg checked it 3981 * thus it is not possible to ASSERT on the state. 3982 */ 3983 3984 do { 3985 /* 3986 * Set the MORE flag if uio_resid does not fit in this 3987 * message or if the caller passed in "more". 3988 * Error for transports with zero tidu_size. 3989 */ 3990 tdr.PRIM_type = prim; 3991 iosize = sti->sti_tidu_size; 3992 if (iosize <= 0) 3993 return (EMSGSIZE); 3994 if (uiop->uio_resid > iosize) { 3995 tdr.MORE_flag = 1; 3996 } else { 3997 if (more) 3998 tdr.MORE_flag = 1; 3999 else 4000 tdr.MORE_flag = 0; 4001 iosize = uiop->uio_resid; 4002 } 4003 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4004 prim, tdr.MORE_flag, iosize)); 4005 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4006 if (mp == NULL) { 4007 /* 4008 * Caught a signal waiting for memory. 4009 * Let send* return EINTR. 4010 */ 4011 return (EINTR); 4012 } 4013 4014 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4015 0, sflag | MSG_BAND, 0); 4016 if (error) { 4017 eprintsoline(so, error); 4018 return (error); 4019 } 4020 if (uiop->uio_resid > 0) { 4021 /* 4022 * Recheck for fatal errors. Fail write even though 4023 * some data have been written. This is consistent 4024 * with strwrite semantics and BSD sockets semantics. 4025 */ 4026 if (so->so_state & SS_CANTSENDMORE) { 4027 eprintsoline(so, error); 4028 return (EPIPE); 4029 } 4030 if (so->so_error != 0) { 4031 mutex_enter(&so->so_lock); 4032 error = sogeterr(so, B_TRUE); 4033 mutex_exit(&so->so_lock); 4034 if (error != 0) { 4035 eprintsoline(so, error); 4036 return (error); 4037 } 4038 } 4039 } 4040 } while (uiop->uio_resid > 0); 4041 return (0); 4042 } 4043 4044 /* 4045 * Check the state for errors and call the appropriate send function. 4046 * 4047 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4048 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4049 * after sending the message. 4050 * 4051 * The caller may optionally specify a destination address, for either 4052 * stream or datagram sockets. This table summarizes the cases: 4053 * 4054 * Socket type Dest. given Connected Result 4055 * ----------- ----------- --------- -------------- 4056 * Stream * Yes send to conn. addr. 4057 * Stream * No error ENOTCONN 4058 * Dgram yes * send to given addr. 4059 * Dgram no yes send to conn. addr. 4060 * Dgram no no error EDESTADDRREQ 4061 * 4062 * There are subtleties around the destination address when using 4063 * AF_UNIX datagram sockets. When the sendmsg call specifies the 4064 * destination address, it's in (struct sockaddr_un) form and we 4065 * need to translate it to our internal form (struct so_ux_addr). 4066 * 4067 * When the sendmsg call does not specify a destination address 4068 * we're using the peer address saved during sotpi_connect, and 4069 * that address is already in internal form. In this case, the 4070 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags 4071 * passed to sosend_dgram or sosend_dgramcmsg to indicate that 4072 * those functions should skip translation to internal form. 4073 * Avoiding that translation is not only more efficient, but it's 4074 * also necessary when a process does a connect on an AF_UNIX 4075 * datagram socket and then drops privileges. After the process 4076 * has dropped privileges, it may no longer be able to lookup the 4077 * the external name in the filesystem, but it should still be 4078 * able to send messages on the connected socket by leaving the 4079 * destination name unspecified. 4080 * 4081 * Yet more subtleties arise with sockets connected by socketpair(), 4082 * which puts internal form addresses in the fields where normally 4083 * the external form is found, and sets sti_faddr_noxlate=1, which 4084 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions 4085 * to skip translation of destination addresses to internal form. 4086 * However, beware that the flag sti_faddr_noxlate=1 also triggers 4087 * different behaviour almost everywhere AF_UNIX addresses appear. 4088 */ 4089 static int 4090 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4091 struct cred *cr) 4092 { 4093 int so_state; 4094 int so_mode; 4095 int error; 4096 struct sockaddr *name; 4097 t_uscalar_t namelen; 4098 int dontroute; 4099 int flags; 4100 sotpi_info_t *sti = SOTOTPI(so); 4101 4102 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4103 (void *)so, (void *)msg, msg->msg_flags, 4104 pr_state(so->so_state, so->so_mode), so->so_error)); 4105 4106 if (so->so_version == SOV_STREAM) { 4107 /* The imaginary "sockmod" has been popped - act as a stream */ 4108 so_update_attrs(so, SOMOD); 4109 return (strwrite(SOTOV(so), uiop, cr)); 4110 } 4111 4112 mutex_enter(&so->so_lock); 4113 so_state = so->so_state; 4114 4115 if (so_state & SS_CANTSENDMORE) { 4116 mutex_exit(&so->so_lock); 4117 return (EPIPE); 4118 } 4119 4120 if (so->so_error != 0) { 4121 error = sogeterr(so, B_TRUE); 4122 if (error != 0) { 4123 mutex_exit(&so->so_lock); 4124 return (error); 4125 } 4126 } 4127 4128 name = (struct sockaddr *)msg->msg_name; 4129 namelen = msg->msg_namelen; 4130 flags = msg->msg_flags; 4131 4132 /* 4133 * Historically, this function does not validate the flags 4134 * passed in, and any errant bits are ignored. However, 4135 * we would not want any such errant flag bits accidently 4136 * being treated as one of the internal-only flags, so 4137 * clear the internal-only flag bits. 4138 */ 4139 flags &= ~MSG_SENDTO_NOXLATE; 4140 4141 so_mode = so->so_mode; 4142 4143 if (name == NULL) { 4144 if (!(so_state & SS_ISCONNECTED)) { 4145 mutex_exit(&so->so_lock); 4146 if (so_mode & SM_CONNREQUIRED) 4147 return (ENOTCONN); 4148 else 4149 return (EDESTADDRREQ); 4150 } 4151 /* 4152 * This is a connected socket. 4153 */ 4154 if (so_mode & SM_CONNREQUIRED) { 4155 /* 4156 * This is a connected STREAM socket, 4157 * destination not specified. 4158 */ 4159 name = NULL; 4160 namelen = 0; 4161 } else { 4162 /* 4163 * Datagram send on connected socket with 4164 * the destination name not specified. 4165 * Use the peer address from connect. 4166 */ 4167 if (so->so_family == AF_UNIX) { 4168 /* 4169 * Use the (internal form) address saved 4170 * in sotpi_connect. See above. 4171 */ 4172 name = (void *)&sti->sti_ux_faddr; 4173 namelen = sizeof (sti->sti_ux_faddr); 4174 flags |= MSG_SENDTO_NOXLATE; 4175 } else { 4176 ASSERT(sti->sti_faddr_sa); 4177 name = sti->sti_faddr_sa; 4178 namelen = (t_uscalar_t)sti->sti_faddr_len; 4179 } 4180 } 4181 } else { 4182 /* 4183 * Sendmsg specifies a destination name 4184 */ 4185 if (!(so_state & SS_ISCONNECTED) && 4186 (so_mode & SM_CONNREQUIRED)) { 4187 /* i.e. TCP not connected */ 4188 mutex_exit(&so->so_lock); 4189 return (ENOTCONN); 4190 } 4191 /* 4192 * Ignore the address on connection-oriented sockets. 4193 * Just like BSD this code does not generate an error for 4194 * TCP (a CONNREQUIRED socket) when sending to an address 4195 * passed in with sendto/sendmsg. Instead the data is 4196 * delivered on the connection as if no address had been 4197 * supplied. 4198 */ 4199 if ((so_state & SS_ISCONNECTED) && 4200 !(so_mode & SM_CONNREQUIRED)) { 4201 mutex_exit(&so->so_lock); 4202 return (EISCONN); 4203 } 4204 if (!(so_state & SS_ISBOUND)) { 4205 so_lock_single(so); /* Set SOLOCKED */ 4206 error = sotpi_bind(so, NULL, 0, 4207 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4208 so_unlock_single(so, SOLOCKED); 4209 if (error) { 4210 mutex_exit(&so->so_lock); 4211 eprintsoline(so, error); 4212 return (error); 4213 } 4214 } 4215 /* 4216 * Handle delayed datagram errors. These are only queued 4217 * when the application sets SO_DGRAM_ERRIND. 4218 * Return the error if we are sending to the address 4219 * that was returned in the last T_UDERROR_IND. 4220 * If sending to some other address discard the delayed 4221 * error indication. 4222 */ 4223 if (sti->sti_delayed_error) { 4224 struct T_uderror_ind *tudi; 4225 void *addr; 4226 t_uscalar_t addrlen; 4227 boolean_t match = B_FALSE; 4228 4229 ASSERT(sti->sti_eaddr_mp); 4230 error = sti->sti_delayed_error; 4231 sti->sti_delayed_error = 0; 4232 tudi = 4233 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4234 addrlen = tudi->DEST_length; 4235 addr = sogetoff(sti->sti_eaddr_mp, 4236 tudi->DEST_offset, addrlen, 1); 4237 ASSERT(addr); /* Checked by strsock_proto */ 4238 switch (so->so_family) { 4239 case AF_INET: { 4240 /* Compare just IP address and port */ 4241 sin_t *sin1 = (sin_t *)name; 4242 sin_t *sin2 = (sin_t *)addr; 4243 4244 if (addrlen == sizeof (sin_t) && 4245 namelen == addrlen && 4246 sin1->sin_port == sin2->sin_port && 4247 sin1->sin_addr.s_addr == 4248 sin2->sin_addr.s_addr) 4249 match = B_TRUE; 4250 break; 4251 } 4252 case AF_INET6: { 4253 /* Compare just IP address and port. Not flow */ 4254 sin6_t *sin1 = (sin6_t *)name; 4255 sin6_t *sin2 = (sin6_t *)addr; 4256 4257 if (addrlen == sizeof (sin6_t) && 4258 namelen == addrlen && 4259 sin1->sin6_port == sin2->sin6_port && 4260 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4261 &sin2->sin6_addr)) 4262 match = B_TRUE; 4263 break; 4264 } 4265 case AF_UNIX: 4266 default: 4267 if (namelen == addrlen && 4268 bcmp(name, addr, namelen) == 0) 4269 match = B_TRUE; 4270 } 4271 if (match) { 4272 freemsg(sti->sti_eaddr_mp); 4273 sti->sti_eaddr_mp = NULL; 4274 mutex_exit(&so->so_lock); 4275 #ifdef DEBUG 4276 dprintso(so, 0, 4277 ("sockfs delayed error %d for %s\n", 4278 error, 4279 pr_addr(so->so_family, name, namelen))); 4280 #endif /* DEBUG */ 4281 return (error); 4282 } 4283 freemsg(sti->sti_eaddr_mp); 4284 sti->sti_eaddr_mp = NULL; 4285 } 4286 } 4287 mutex_exit(&so->so_lock); 4288 4289 dontroute = 0; 4290 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4291 uint32_t val; 4292 4293 val = 1; 4294 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4295 &val, (t_uscalar_t)sizeof (val), cr); 4296 if (error) 4297 return (error); 4298 dontroute = 1; 4299 } 4300 4301 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4302 error = EOPNOTSUPP; 4303 goto done; 4304 } 4305 if (msg->msg_controllen != 0) { 4306 if (!(so_mode & SM_CONNREQUIRED)) { 4307 so_update_attrs(so, SOMOD); 4308 error = sosend_dgramcmsg(so, name, namelen, uiop, 4309 msg->msg_control, msg->msg_controllen, flags); 4310 } else { 4311 if (flags & MSG_OOB) { 4312 /* Can't generate T_EXDATA_REQ with options */ 4313 error = EOPNOTSUPP; 4314 goto done; 4315 } 4316 so_update_attrs(so, SOMOD); 4317 error = sosend_svccmsg(so, uiop, 4318 !(flags & MSG_EOR), 4319 msg->msg_control, msg->msg_controllen, 4320 flags); 4321 } 4322 goto done; 4323 } 4324 4325 so_update_attrs(so, SOMOD); 4326 if (!(so_mode & SM_CONNREQUIRED)) { 4327 /* 4328 * If there is no SO_DONTROUTE to turn off return immediately 4329 * from send_dgram. This can allow tail-call optimizations. 4330 */ 4331 if (!dontroute) { 4332 return (sosend_dgram(so, name, namelen, uiop, flags)); 4333 } 4334 error = sosend_dgram(so, name, namelen, uiop, flags); 4335 } else { 4336 t_scalar_t prim; 4337 int sflag; 4338 4339 /* Ignore msg_name in the connected state */ 4340 if (flags & MSG_OOB) { 4341 prim = T_EXDATA_REQ; 4342 /* 4343 * Send down T_EXDATA_REQ even if there is flow 4344 * control for data. 4345 */ 4346 sflag = MSG_IGNFLOW; 4347 } else { 4348 if (so_mode & SM_BYTESTREAM) { 4349 /* Byte stream transport - use write */ 4350 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4351 4352 /* Send M_DATA messages */ 4353 /* 4354 * If there is no SO_DONTROUTE to turn off, 4355 * sti_direct is on, and there is no flow 4356 * control, we can take the fast path. 4357 */ 4358 if (!dontroute && sti->sti_direct != 0 && 4359 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4360 return (sostream_direct(so, uiop, 4361 NULL, cr)); 4362 } 4363 error = strwrite(SOTOV(so), uiop, cr); 4364 goto done; 4365 } 4366 prim = T_DATA_REQ; 4367 sflag = 0; 4368 } 4369 /* 4370 * If there is no SO_DONTROUTE to turn off return immediately 4371 * from sosend_svc. This can allow tail-call optimizations. 4372 */ 4373 if (!dontroute) 4374 return (sosend_svc(so, uiop, prim, 4375 !(flags & MSG_EOR), sflag)); 4376 error = sosend_svc(so, uiop, prim, 4377 !(flags & MSG_EOR), sflag); 4378 } 4379 ASSERT(dontroute); 4380 done: 4381 if (dontroute) { 4382 uint32_t val; 4383 4384 val = 0; 4385 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4386 &val, (t_uscalar_t)sizeof (val), cr); 4387 } 4388 return (error); 4389 } 4390 4391 /* 4392 * kstrwritemp() has very similar semantics as that of strwrite(). 4393 * The main difference is it obtains mblks from the caller and also 4394 * does not do any copy as done in strwrite() from user buffers to 4395 * kernel buffers. 4396 * 4397 * Currently, this routine is used by sendfile to send data allocated 4398 * within the kernel without any copying. This interface does not use the 4399 * synchronous stream interface as synch. stream interface implies 4400 * copying. 4401 */ 4402 int 4403 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4404 { 4405 struct stdata *stp; 4406 struct queue *wqp; 4407 mblk_t *newmp; 4408 char waitflag; 4409 int tempmode; 4410 int error = 0; 4411 int done = 0; 4412 struct sonode *so; 4413 boolean_t direct; 4414 4415 ASSERT(vp->v_stream); 4416 stp = vp->v_stream; 4417 4418 so = VTOSO(vp); 4419 direct = _SOTOTPI(so)->sti_direct; 4420 4421 /* 4422 * This is the sockfs direct fast path. canputnext() need 4423 * not be accurate so we don't grab the sd_lock here. If 4424 * we get flow-controlled, we grab sd_lock just before the 4425 * do..while loop below to emulate what strwrite() does. 4426 */ 4427 wqp = stp->sd_wrq; 4428 if (canputnext(wqp) && direct && 4429 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4430 return (sostream_direct(so, NULL, mp, CRED())); 4431 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4432 /* Fast check of flags before acquiring the lock */ 4433 mutex_enter(&stp->sd_lock); 4434 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4435 mutex_exit(&stp->sd_lock); 4436 if (error != 0) { 4437 if (!(stp->sd_flag & STPLEX) && 4438 (stp->sd_wput_opt & SW_SIGPIPE)) { 4439 error = EPIPE; 4440 } 4441 return (error); 4442 } 4443 } 4444 4445 waitflag = WRITEWAIT; 4446 if (stp->sd_flag & OLDNDELAY) 4447 tempmode = fmode & ~FNDELAY; 4448 else 4449 tempmode = fmode; 4450 4451 mutex_enter(&stp->sd_lock); 4452 do { 4453 if (canputnext(wqp)) { 4454 mutex_exit(&stp->sd_lock); 4455 if (stp->sd_wputdatafunc != NULL) { 4456 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4457 NULL, NULL, NULL); 4458 if (newmp == NULL) { 4459 /* The caller will free mp */ 4460 return (ECOMM); 4461 } 4462 mp = newmp; 4463 } 4464 putnext(wqp, mp); 4465 return (0); 4466 } 4467 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4468 &done); 4469 } while (error == 0 && !done); 4470 4471 mutex_exit(&stp->sd_lock); 4472 /* 4473 * EAGAIN tells the application to try again. ENOMEM 4474 * is returned only if the memory allocation size 4475 * exceeds the physical limits of the system. ENOMEM 4476 * can't be true here. 4477 */ 4478 if (error == ENOMEM) 4479 error = EAGAIN; 4480 return (error); 4481 } 4482 4483 /* ARGSUSED */ 4484 static int 4485 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4486 struct cred *cr, mblk_t **mpp) 4487 { 4488 int error; 4489 4490 switch (so->so_family) { 4491 case AF_INET: 4492 case AF_INET6: 4493 case AF_UNIX: 4494 break; 4495 default: 4496 return (EAFNOSUPPORT); 4497 4498 } 4499 4500 if (so->so_state & SS_CANTSENDMORE) 4501 return (EPIPE); 4502 4503 if (so->so_type != SOCK_STREAM) 4504 return (EOPNOTSUPP); 4505 4506 if ((so->so_state & SS_ISCONNECTED) == 0) 4507 return (ENOTCONN); 4508 4509 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4510 if (error == 0) 4511 *mpp = NULL; 4512 return (error); 4513 } 4514 4515 /* 4516 * Sending data on a datagram socket. 4517 * Assumes caller has verified that SS_ISBOUND etc. are set. 4518 */ 4519 /* ARGSUSED */ 4520 static int 4521 sodgram_direct(struct sonode *so, struct sockaddr *name, 4522 socklen_t namelen, struct uio *uiop, int flags) 4523 { 4524 struct T_unitdata_req tudr; 4525 mblk_t *mp = NULL; 4526 int error = 0; 4527 void *addr; 4528 socklen_t addrlen; 4529 ssize_t len; 4530 struct stdata *stp = SOTOV(so)->v_stream; 4531 int so_state; 4532 queue_t *udp_wq; 4533 boolean_t connected; 4534 mblk_t *mpdata = NULL; 4535 sotpi_info_t *sti = SOTOTPI(so); 4536 uint32_t auditing = AU_AUDITING(); 4537 4538 ASSERT(name != NULL && namelen != 0); 4539 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4540 ASSERT(!(so->so_mode & SM_EXDATA)); 4541 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4542 ASSERT(SOTOV(so)->v_type == VSOCK); 4543 4544 /* Caller checked for proper length */ 4545 len = uiop->uio_resid; 4546 ASSERT(len <= sti->sti_tidu_size); 4547 4548 /* Length and family checks have been done by caller */ 4549 ASSERT(name->sa_family == so->so_family); 4550 ASSERT(so->so_family == AF_INET || 4551 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4552 ASSERT(so->so_family == AF_INET6 || 4553 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4554 4555 addr = name; 4556 addrlen = namelen; 4557 4558 if (stp->sd_sidp != NULL && 4559 (error = straccess(stp, JCWRITE)) != 0) 4560 goto done; 4561 4562 so_state = so->so_state; 4563 4564 connected = so_state & SS_ISCONNECTED; 4565 if (!connected) { 4566 tudr.PRIM_type = T_UNITDATA_REQ; 4567 tudr.DEST_length = addrlen; 4568 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4569 tudr.OPT_length = 0; 4570 tudr.OPT_offset = 0; 4571 4572 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4573 _ALLOC_INTR, CRED()); 4574 if (mp == NULL) { 4575 /* 4576 * Caught a signal waiting for memory. 4577 * Let send* return EINTR. 4578 */ 4579 error = EINTR; 4580 goto done; 4581 } 4582 } 4583 4584 /* 4585 * For UDP we don't break up the copyin into smaller pieces 4586 * as in the TCP case. That means if ENOMEM is returned by 4587 * mcopyinuio() then the uio vector has not been modified at 4588 * all and we fallback to either strwrite() or kstrputmsg() 4589 * below. Note also that we never generate priority messages 4590 * from here. 4591 */ 4592 udp_wq = stp->sd_wrq->q_next; 4593 if (canput(udp_wq) && 4594 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4595 ASSERT(DB_TYPE(mpdata) == M_DATA); 4596 ASSERT(uiop->uio_resid == 0); 4597 if (!connected) 4598 linkb(mp, mpdata); 4599 else 4600 mp = mpdata; 4601 if (auditing) 4602 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4603 4604 /* Always returns 0... */ 4605 return (udp_wput(udp_wq, mp)); 4606 } 4607 4608 ASSERT(mpdata == NULL); 4609 if (error != 0 && error != ENOMEM) { 4610 freemsg(mp); 4611 return (error); 4612 } 4613 4614 /* 4615 * For connected, let strwrite() handle the blocking case. 4616 * Otherwise we fall thru and use kstrputmsg(). 4617 */ 4618 if (connected) 4619 return (strwrite(SOTOV(so), uiop, CRED())); 4620 4621 if (auditing) 4622 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4623 4624 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4625 done: 4626 #ifdef SOCK_DEBUG 4627 if (error != 0) { 4628 eprintsoline(so, error); 4629 } 4630 #endif /* SOCK_DEBUG */ 4631 return (error); 4632 } 4633 4634 int 4635 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4636 { 4637 struct stdata *stp = SOTOV(so)->v_stream; 4638 ssize_t iosize, rmax, maxblk; 4639 queue_t *tcp_wq = stp->sd_wrq->q_next; 4640 mblk_t *newmp; 4641 int error = 0, wflag = 0; 4642 4643 ASSERT(so->so_mode & SM_BYTESTREAM); 4644 ASSERT(SOTOV(so)->v_type == VSOCK); 4645 4646 if (stp->sd_sidp != NULL && 4647 (error = straccess(stp, JCWRITE)) != 0) 4648 return (error); 4649 4650 if (uiop == NULL) { 4651 /* 4652 * kstrwritemp() should have checked sd_flag and 4653 * flow-control before coming here. If we end up 4654 * here it means that we can simply pass down the 4655 * data to tcp. 4656 */ 4657 ASSERT(mp != NULL); 4658 if (stp->sd_wputdatafunc != NULL) { 4659 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4660 NULL, NULL, NULL); 4661 if (newmp == NULL) { 4662 /* The caller will free mp */ 4663 return (ECOMM); 4664 } 4665 mp = newmp; 4666 } 4667 /* Always returns 0... */ 4668 return (tcp_wput(tcp_wq, mp)); 4669 } 4670 4671 /* Fallback to strwrite() to do proper error handling */ 4672 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4673 return (strwrite(SOTOV(so), uiop, cr)); 4674 4675 rmax = stp->sd_qn_maxpsz; 4676 ASSERT(rmax >= 0 || rmax == INFPSZ); 4677 if (rmax == 0 || uiop->uio_resid <= 0) 4678 return (0); 4679 4680 if (rmax == INFPSZ) 4681 rmax = uiop->uio_resid; 4682 4683 maxblk = stp->sd_maxblk; 4684 4685 for (;;) { 4686 iosize = MIN(uiop->uio_resid, rmax); 4687 4688 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4689 if (mp == NULL) { 4690 /* 4691 * Fallback to strwrite() for ENOMEM; if this 4692 * is our first time in this routine and the uio 4693 * vector has not been modified, we will end up 4694 * calling strwrite() without any flag set. 4695 */ 4696 if (error == ENOMEM) 4697 goto slow_send; 4698 else 4699 return (error); 4700 } 4701 ASSERT(uiop->uio_resid >= 0); 4702 /* 4703 * If mp is non-NULL and ENOMEM is set, it means that 4704 * mcopyinuio() was able to break down some of the user 4705 * data into one or more mblks. Send the partial data 4706 * to tcp and let the rest be handled in strwrite(). 4707 */ 4708 ASSERT(error == 0 || error == ENOMEM); 4709 if (stp->sd_wputdatafunc != NULL) { 4710 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4711 NULL, NULL, NULL); 4712 if (newmp == NULL) { 4713 /* The caller will free mp */ 4714 return (ECOMM); 4715 } 4716 mp = newmp; 4717 } 4718 (void) tcp_wput(tcp_wq, mp); /* Always returns 0 anyway. */ 4719 4720 wflag |= NOINTR; 4721 4722 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4723 ASSERT(error == 0); 4724 break; 4725 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4726 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4727 slow_send: 4728 /* 4729 * We were able to send down partial data using 4730 * the direct call interface, but are now relying 4731 * on strwrite() to handle the non-fastpath cases. 4732 * If the socket is blocking we will sleep in 4733 * strwaitq() until write is permitted, otherwise, 4734 * we will need to return the amount of bytes 4735 * written so far back to the app. This is the 4736 * reason why we pass NOINTR flag to strwrite() 4737 * for non-blocking socket, because we don't want 4738 * to return EAGAIN when portion of the user data 4739 * has actually been sent down. 4740 */ 4741 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4742 } 4743 } 4744 return (0); 4745 } 4746 4747 /* 4748 * Update sti_faddr by asking the transport (unless AF_UNIX). 4749 */ 4750 /* ARGSUSED */ 4751 int 4752 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4753 boolean_t accept, struct cred *cr) 4754 { 4755 struct strbuf strbuf; 4756 int error = 0, res; 4757 void *addr; 4758 t_uscalar_t addrlen; 4759 k_sigset_t smask; 4760 sotpi_info_t *sti = SOTOTPI(so); 4761 4762 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4763 (void *)so, pr_state(so->so_state, so->so_mode))); 4764 4765 ASSERT(*namelen > 0); 4766 mutex_enter(&so->so_lock); 4767 so_lock_single(so); /* Set SOLOCKED */ 4768 4769 if (accept) { 4770 bcopy(sti->sti_faddr_sa, name, 4771 MIN(*namelen, sti->sti_faddr_len)); 4772 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4773 goto done; 4774 } 4775 4776 if (!(so->so_state & SS_ISCONNECTED)) { 4777 error = ENOTCONN; 4778 goto done; 4779 } 4780 /* Added this check for X/Open */ 4781 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4782 error = EINVAL; 4783 if (xnet_check_print) { 4784 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4785 } 4786 goto done; 4787 } 4788 4789 if (sti->sti_faddr_valid) { 4790 bcopy(sti->sti_faddr_sa, name, 4791 MIN(*namelen, sti->sti_faddr_len)); 4792 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4793 goto done; 4794 } 4795 4796 #ifdef DEBUG 4797 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4798 pr_addr(so->so_family, sti->sti_faddr_sa, 4799 (t_uscalar_t)sti->sti_faddr_len))); 4800 #endif /* DEBUG */ 4801 4802 if (so->so_family == AF_UNIX) { 4803 /* Transport has different name space - return local info */ 4804 if (sti->sti_faddr_noxlate) 4805 *namelen = 0; 4806 error = 0; 4807 goto done; 4808 } 4809 4810 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4811 4812 ASSERT(sti->sti_faddr_sa); 4813 /* Allocate local buffer to use with ioctl */ 4814 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4815 mutex_exit(&so->so_lock); 4816 addr = kmem_alloc(addrlen, KM_SLEEP); 4817 4818 /* 4819 * Issue TI_GETPEERNAME with signals masked. 4820 * Put the result in sti_faddr_sa so that getpeername works after 4821 * a shutdown(output). 4822 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4823 * back to the socket. 4824 */ 4825 strbuf.buf = addr; 4826 strbuf.maxlen = addrlen; 4827 strbuf.len = 0; 4828 4829 sigintr(&smask, 0); 4830 res = 0; 4831 ASSERT(cr); 4832 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4833 0, K_TO_K, cr, &res); 4834 sigunintr(&smask); 4835 4836 mutex_enter(&so->so_lock); 4837 /* 4838 * If there is an error record the error in so_error put don't fail 4839 * the getpeername. Instead fallback on the recorded 4840 * sti->sti_faddr_sa. 4841 */ 4842 if (error) { 4843 /* 4844 * Various stream head errors can be returned to the ioctl. 4845 * However, it is impossible to determine which ones of 4846 * these are really socket level errors that were incorrectly 4847 * consumed by the ioctl. Thus this code silently ignores the 4848 * error - to code explicitly does not reinstate the error 4849 * using soseterror(). 4850 * Experiments have shows that at least this set of 4851 * errors are reported and should not be reinstated on the 4852 * socket: 4853 * EINVAL E.g. if an I_LINK was in effect when 4854 * getpeername was called. 4855 * EPIPE The ioctl error semantics prefer the write 4856 * side error over the read side error. 4857 * ENOTCONN The transport just got disconnected but 4858 * sockfs had not yet seen the T_DISCON_IND 4859 * when issuing the ioctl. 4860 */ 4861 error = 0; 4862 } else if (res == 0 && strbuf.len > 0 && 4863 (so->so_state & SS_ISCONNECTED)) { 4864 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4865 sti->sti_faddr_len = (socklen_t)strbuf.len; 4866 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4867 sti->sti_faddr_valid = 1; 4868 4869 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4870 *namelen = sti->sti_faddr_len; 4871 } 4872 kmem_free(addr, addrlen); 4873 #ifdef DEBUG 4874 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4875 pr_addr(so->so_family, sti->sti_faddr_sa, 4876 (t_uscalar_t)sti->sti_faddr_len))); 4877 #endif /* DEBUG */ 4878 done: 4879 so_unlock_single(so, SOLOCKED); 4880 mutex_exit(&so->so_lock); 4881 return (error); 4882 } 4883 4884 /* 4885 * Update sti_laddr by asking the transport (unless AF_UNIX). 4886 */ 4887 int 4888 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4889 struct cred *cr) 4890 { 4891 struct strbuf strbuf; 4892 int error = 0, res; 4893 void *addr; 4894 t_uscalar_t addrlen; 4895 k_sigset_t smask; 4896 sotpi_info_t *sti = SOTOTPI(so); 4897 4898 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4899 (void *)so, pr_state(so->so_state, so->so_mode))); 4900 4901 ASSERT(*namelen > 0); 4902 mutex_enter(&so->so_lock); 4903 so_lock_single(so); /* Set SOLOCKED */ 4904 4905 #ifdef DEBUG 4906 4907 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4908 pr_addr(so->so_family, sti->sti_laddr_sa, 4909 (t_uscalar_t)sti->sti_laddr_len))); 4910 #endif /* DEBUG */ 4911 if (sti->sti_laddr_valid) { 4912 bcopy(sti->sti_laddr_sa, name, 4913 MIN(*namelen, sti->sti_laddr_len)); 4914 *namelen = sti->sti_laddr_len; 4915 goto done; 4916 } 4917 4918 if (so->so_family == AF_UNIX) { 4919 /* 4920 * Transport has different name space - return local info. If we 4921 * have enough space, let consumers know the family. 4922 */ 4923 if (*namelen >= sizeof (sa_family_t)) { 4924 name->sa_family = AF_UNIX; 4925 *namelen = sizeof (sa_family_t); 4926 } else { 4927 *namelen = 0; 4928 } 4929 error = 0; 4930 goto done; 4931 } 4932 if (!(so->so_state & SS_ISBOUND)) { 4933 /* If not bound, then nothing to return. */ 4934 error = 0; 4935 goto done; 4936 } 4937 4938 /* Allocate local buffer to use with ioctl */ 4939 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 4940 mutex_exit(&so->so_lock); 4941 addr = kmem_alloc(addrlen, KM_SLEEP); 4942 4943 /* 4944 * Issue TI_GETMYNAME with signals masked. 4945 * Put the result in sti_laddr_sa so that getsockname works after 4946 * a shutdown(output). 4947 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4948 * back to the socket. 4949 */ 4950 strbuf.buf = addr; 4951 strbuf.maxlen = addrlen; 4952 strbuf.len = 0; 4953 4954 sigintr(&smask, 0); 4955 res = 0; 4956 ASSERT(cr); 4957 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4958 0, K_TO_K, cr, &res); 4959 sigunintr(&smask); 4960 4961 mutex_enter(&so->so_lock); 4962 /* 4963 * If there is an error record the error in so_error put don't fail 4964 * the getsockname. Instead fallback on the recorded 4965 * sti->sti_laddr_sa. 4966 */ 4967 if (error) { 4968 /* 4969 * Various stream head errors can be returned to the ioctl. 4970 * However, it is impossible to determine which ones of 4971 * these are really socket level errors that were incorrectly 4972 * consumed by the ioctl. Thus this code silently ignores the 4973 * error - to code explicitly does not reinstate the error 4974 * using soseterror(). 4975 * Experiments have shows that at least this set of 4976 * errors are reported and should not be reinstated on the 4977 * socket: 4978 * EINVAL E.g. if an I_LINK was in effect when 4979 * getsockname was called. 4980 * EPIPE The ioctl error semantics prefer the write 4981 * side error over the read side error. 4982 */ 4983 error = 0; 4984 } else if (res == 0 && strbuf.len > 0 && 4985 (so->so_state & SS_ISBOUND)) { 4986 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 4987 sti->sti_laddr_len = (socklen_t)strbuf.len; 4988 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 4989 sti->sti_laddr_valid = 1; 4990 4991 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 4992 *namelen = sti->sti_laddr_len; 4993 } 4994 kmem_free(addr, addrlen); 4995 #ifdef DEBUG 4996 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4997 pr_addr(so->so_family, sti->sti_laddr_sa, 4998 (t_uscalar_t)sti->sti_laddr_len))); 4999 #endif /* DEBUG */ 5000 done: 5001 so_unlock_single(so, SOLOCKED); 5002 mutex_exit(&so->so_lock); 5003 return (error); 5004 } 5005 5006 /* 5007 * Get socket options. For SOL_SOCKET options some options are handled 5008 * by the sockfs while others use the value recorded in the sonode as a 5009 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5010 * 5011 * On the return most *optlenp bytes are copied to optval. 5012 */ 5013 /* ARGSUSED */ 5014 int 5015 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5016 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5017 { 5018 struct T_optmgmt_req optmgmt_req; 5019 struct T_optmgmt_ack *optmgmt_ack; 5020 struct opthdr oh; 5021 struct opthdr *opt_res; 5022 mblk_t *mp = NULL; 5023 int error = 0; 5024 void *option = NULL; /* Set if fallback value */ 5025 t_uscalar_t maxlen = *optlenp; 5026 t_uscalar_t len; 5027 uint32_t value; 5028 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5029 struct timeval32 tmo_val32; 5030 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5031 5032 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5033 (void *)so, level, option_name, optval, (void *)optlenp, 5034 pr_state(so->so_state, so->so_mode))); 5035 5036 mutex_enter(&so->so_lock); 5037 so_lock_single(so); /* Set SOLOCKED */ 5038 5039 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5040 5041 /* 5042 * Check for SOL_SOCKET options. 5043 * Certain SOL_SOCKET options are returned directly whereas 5044 * others only provide a default (fallback) value should 5045 * the T_SVR4_OPTMGMT_REQ fail. 5046 */ 5047 if (level == SOL_SOCKET) { 5048 /* Check parameters */ 5049 switch (option_name) { 5050 case SO_TYPE: 5051 case SO_ERROR: 5052 case SO_DEBUG: 5053 case SO_ACCEPTCONN: 5054 case SO_REUSEADDR: 5055 case SO_KEEPALIVE: 5056 case SO_DONTROUTE: 5057 case SO_BROADCAST: 5058 case SO_USELOOPBACK: 5059 case SO_OOBINLINE: 5060 case SO_SNDBUF: 5061 case SO_RCVBUF: 5062 #ifdef notyet 5063 case SO_SNDLOWAT: 5064 case SO_RCVLOWAT: 5065 #endif /* notyet */ 5066 case SO_DOMAIN: 5067 case SO_DGRAM_ERRIND: 5068 case SO_PROTOCOL: 5069 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5070 error = EINVAL; 5071 eprintsoline(so, error); 5072 goto done2; 5073 } 5074 break; 5075 case SO_RCVTIMEO: 5076 case SO_SNDTIMEO: 5077 if (get_udatamodel() == DATAMODEL_NONE || 5078 get_udatamodel() == DATAMODEL_NATIVE) { 5079 if (maxlen < sizeof (struct timeval)) { 5080 error = EINVAL; 5081 eprintsoline(so, error); 5082 goto done2; 5083 } 5084 } else { 5085 if (maxlen < sizeof (struct timeval32)) { 5086 error = EINVAL; 5087 eprintsoline(so, error); 5088 goto done2; 5089 } 5090 5091 } 5092 break; 5093 case SO_LINGER: 5094 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5095 error = EINVAL; 5096 eprintsoline(so, error); 5097 goto done2; 5098 } 5099 break; 5100 case SO_SND_BUFINFO: 5101 if (maxlen < (t_uscalar_t) 5102 sizeof (struct so_snd_bufinfo)) { 5103 error = EINVAL; 5104 eprintsoline(so, error); 5105 goto done2; 5106 } 5107 break; 5108 } 5109 5110 switch (option_name) { 5111 case SO_TYPE: 5112 value = so->so_type; 5113 option = &value; 5114 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5115 5116 case SO_ERROR: 5117 value = sogeterr(so, B_TRUE); 5118 option = &value; 5119 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5120 5121 case SO_ACCEPTCONN: 5122 if (so->so_state & SS_ACCEPTCONN) 5123 value = SO_ACCEPTCONN; 5124 else 5125 value = 0; 5126 #ifdef DEBUG 5127 if (value) { 5128 dprintso(so, 1, 5129 ("sotpi_getsockopt: 0x%x is set\n", 5130 option_name)); 5131 } else { 5132 dprintso(so, 1, 5133 ("sotpi_getsockopt: 0x%x not set\n", 5134 option_name)); 5135 } 5136 #endif /* DEBUG */ 5137 option = &value; 5138 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5139 5140 case SO_DEBUG: 5141 case SO_REUSEADDR: 5142 case SO_KEEPALIVE: 5143 case SO_DONTROUTE: 5144 case SO_BROADCAST: 5145 case SO_USELOOPBACK: 5146 case SO_OOBINLINE: 5147 case SO_DGRAM_ERRIND: 5148 value = (so->so_options & option_name); 5149 #ifdef DEBUG 5150 if (value) { 5151 dprintso(so, 1, 5152 ("sotpi_getsockopt: 0x%x is set\n", 5153 option_name)); 5154 } else { 5155 dprintso(so, 1, 5156 ("sotpi_getsockopt: 0x%x not set\n", 5157 option_name)); 5158 } 5159 #endif /* DEBUG */ 5160 option = &value; 5161 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5162 5163 /* 5164 * The following options are only returned by sockfs when the 5165 * T_SVR4_OPTMGMT_REQ fails. 5166 */ 5167 case SO_LINGER: 5168 option = &so->so_linger; 5169 len = (t_uscalar_t)sizeof (struct linger); 5170 break; 5171 case SO_SNDBUF: { 5172 ssize_t lvalue; 5173 5174 /* 5175 * If the option has not been set then get a default 5176 * value from the read queue. This value is 5177 * returned if the transport fails 5178 * the T_SVR4_OPTMGMT_REQ. 5179 */ 5180 lvalue = so->so_sndbuf; 5181 if (lvalue == 0) { 5182 mutex_exit(&so->so_lock); 5183 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5184 QHIWAT, 0, &lvalue); 5185 mutex_enter(&so->so_lock); 5186 dprintso(so, 1, 5187 ("got SO_SNDBUF %ld from q\n", lvalue)); 5188 } 5189 value = (int)lvalue; 5190 option = &value; 5191 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5192 break; 5193 } 5194 case SO_RCVBUF: { 5195 ssize_t lvalue; 5196 5197 /* 5198 * If the option has not been set then get a default 5199 * value from the read queue. This value is 5200 * returned if the transport fails 5201 * the T_SVR4_OPTMGMT_REQ. 5202 * 5203 * XXX If SO_RCVBUF has been set and this is an 5204 * XPG 4.2 application then do not ask the transport 5205 * since the transport might adjust the value and not 5206 * return exactly what was set by the application. 5207 * For non-XPG 4.2 application we return the value 5208 * that the transport is actually using. 5209 */ 5210 lvalue = so->so_rcvbuf; 5211 if (lvalue == 0) { 5212 mutex_exit(&so->so_lock); 5213 (void) strqget(RD(strvp2wq(SOTOV(so))), 5214 QHIWAT, 0, &lvalue); 5215 mutex_enter(&so->so_lock); 5216 dprintso(so, 1, 5217 ("got SO_RCVBUF %ld from q\n", lvalue)); 5218 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5219 value = (int)lvalue; 5220 option = &value; 5221 goto copyout; /* skip asking transport */ 5222 } 5223 value = (int)lvalue; 5224 option = &value; 5225 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5226 break; 5227 } 5228 case SO_DOMAIN: 5229 value = so->so_family; 5230 option = &value; 5231 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5232 5233 case SO_PROTOCOL: 5234 value = so->so_protocol; 5235 option = &value; 5236 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5237 5238 #ifdef notyet 5239 /* 5240 * We do not implement the semantics of these options 5241 * thus we shouldn't implement the options either. 5242 */ 5243 case SO_SNDLOWAT: 5244 value = so->so_sndlowat; 5245 option = &value; 5246 break; 5247 case SO_RCVLOWAT: 5248 value = so->so_rcvlowat; 5249 option = &value; 5250 break; 5251 #endif /* notyet */ 5252 case SO_SNDTIMEO: 5253 case SO_RCVTIMEO: { 5254 clock_t val; 5255 5256 if (option_name == SO_RCVTIMEO) 5257 val = drv_hztousec(so->so_rcvtimeo); 5258 else 5259 val = drv_hztousec(so->so_sndtimeo); 5260 tmo_val.tv_sec = val / (1000 * 1000); 5261 tmo_val.tv_usec = val % (1000 * 1000); 5262 if (get_udatamodel() == DATAMODEL_NONE || 5263 get_udatamodel() == DATAMODEL_NATIVE) { 5264 option = &tmo_val; 5265 len = sizeof (struct timeval); 5266 } else { 5267 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5268 option = &tmo_val32; 5269 len = sizeof (struct timeval32); 5270 } 5271 break; 5272 } 5273 case SO_SND_BUFINFO: { 5274 snd_bufinfo.sbi_wroff = 5275 (so->so_proto_props).sopp_wroff; 5276 snd_bufinfo.sbi_maxblk = 5277 (so->so_proto_props).sopp_maxblk; 5278 snd_bufinfo.sbi_maxpsz = 5279 (so->so_proto_props).sopp_maxpsz; 5280 snd_bufinfo.sbi_tail = 5281 (so->so_proto_props).sopp_tail; 5282 option = &snd_bufinfo; 5283 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5284 break; 5285 } 5286 } 5287 } 5288 5289 mutex_exit(&so->so_lock); 5290 5291 /* Send request */ 5292 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5293 optmgmt_req.MGMT_flags = T_CHECK; 5294 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5295 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5296 5297 oh.level = level; 5298 oh.name = option_name; 5299 oh.len = maxlen; 5300 5301 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5302 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5303 /* Let option management work in the presence of data flow control */ 5304 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5305 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5306 mp = NULL; 5307 mutex_enter(&so->so_lock); 5308 if (error) { 5309 eprintsoline(so, error); 5310 goto done2; 5311 } 5312 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5313 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5314 if (error) { 5315 if (option != NULL) { 5316 /* We have a fallback value */ 5317 error = 0; 5318 goto copyout; 5319 } 5320 eprintsoline(so, error); 5321 goto done2; 5322 } 5323 ASSERT(mp); 5324 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5325 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5326 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5327 if (opt_res == NULL) { 5328 if (option != NULL) { 5329 /* We have a fallback value */ 5330 error = 0; 5331 goto copyout; 5332 } 5333 error = EPROTO; 5334 eprintsoline(so, error); 5335 goto done; 5336 } 5337 option = &opt_res[1]; 5338 5339 /* check to ensure that the option is within bounds */ 5340 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5341 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5342 if (option != NULL) { 5343 /* We have a fallback value */ 5344 error = 0; 5345 goto copyout; 5346 } 5347 error = EPROTO; 5348 eprintsoline(so, error); 5349 goto done; 5350 } 5351 5352 len = opt_res->len; 5353 5354 copyout: { 5355 t_uscalar_t size = MIN(len, maxlen); 5356 bcopy(option, optval, size); 5357 bcopy(&size, optlenp, sizeof (size)); 5358 } 5359 done: 5360 freemsg(mp); 5361 done2: 5362 so_unlock_single(so, SOLOCKED); 5363 mutex_exit(&so->so_lock); 5364 5365 return (error); 5366 } 5367 5368 /* 5369 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5370 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5371 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5372 * setsockopt has to work even if the transport does not support the option. 5373 */ 5374 /* ARGSUSED */ 5375 int 5376 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5377 const void *optval, t_uscalar_t optlen, struct cred *cr) 5378 { 5379 struct T_optmgmt_req optmgmt_req; 5380 struct opthdr oh; 5381 mblk_t *mp; 5382 int error = 0; 5383 boolean_t handled = B_FALSE; 5384 5385 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5386 (void *)so, level, option_name, optval, optlen, 5387 pr_state(so->so_state, so->so_mode))); 5388 5389 /* X/Open requires this check */ 5390 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5391 if (xnet_check_print) 5392 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5393 return (EINVAL); 5394 } 5395 5396 mutex_enter(&so->so_lock); 5397 so_lock_single(so); /* Set SOLOCKED */ 5398 mutex_exit(&so->so_lock); 5399 5400 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5401 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5402 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5403 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5404 5405 oh.level = level; 5406 oh.name = option_name; 5407 oh.len = optlen; 5408 5409 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5410 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5411 /* Let option management work in the presence of data flow control */ 5412 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5413 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5414 mp = NULL; 5415 mutex_enter(&so->so_lock); 5416 if (error) { 5417 eprintsoline(so, error); 5418 goto done2; 5419 } 5420 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5421 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5422 if (error) { 5423 eprintsoline(so, error); 5424 goto done; 5425 } 5426 ASSERT(mp); 5427 /* No need to verify T_optmgmt_ack */ 5428 freemsg(mp); 5429 done: 5430 /* 5431 * Check for SOL_SOCKET options and record their values. 5432 * If we know about a SOL_SOCKET parameter and the transport 5433 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5434 * EPROTO) we let the setsockopt succeed. 5435 */ 5436 if (level == SOL_SOCKET) { 5437 /* Check parameters */ 5438 switch (option_name) { 5439 case SO_DEBUG: 5440 case SO_REUSEADDR: 5441 case SO_KEEPALIVE: 5442 case SO_DONTROUTE: 5443 case SO_BROADCAST: 5444 case SO_USELOOPBACK: 5445 case SO_OOBINLINE: 5446 case SO_SNDBUF: 5447 case SO_RCVBUF: 5448 #ifdef notyet 5449 case SO_SNDLOWAT: 5450 case SO_RCVLOWAT: 5451 #endif /* notyet */ 5452 case SO_DGRAM_ERRIND: 5453 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5454 error = EINVAL; 5455 eprintsoline(so, error); 5456 goto done2; 5457 } 5458 ASSERT(optval); 5459 handled = B_TRUE; 5460 break; 5461 case SO_SNDTIMEO: 5462 case SO_RCVTIMEO: 5463 if (get_udatamodel() == DATAMODEL_NONE || 5464 get_udatamodel() == DATAMODEL_NATIVE) { 5465 if (optlen != sizeof (struct timeval)) { 5466 error = EINVAL; 5467 eprintsoline(so, error); 5468 goto done2; 5469 } 5470 } else { 5471 if (optlen != sizeof (struct timeval32)) { 5472 error = EINVAL; 5473 eprintsoline(so, error); 5474 goto done2; 5475 } 5476 } 5477 ASSERT(optval); 5478 handled = B_TRUE; 5479 break; 5480 case SO_LINGER: 5481 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5482 error = EINVAL; 5483 eprintsoline(so, error); 5484 goto done2; 5485 } 5486 ASSERT(optval); 5487 handled = B_TRUE; 5488 break; 5489 } 5490 5491 #define intvalue (*(int32_t *)optval) 5492 5493 switch (option_name) { 5494 case SO_TYPE: 5495 case SO_ERROR: 5496 case SO_ACCEPTCONN: 5497 /* Can't be set */ 5498 error = ENOPROTOOPT; 5499 goto done2; 5500 case SO_LINGER: { 5501 struct linger *l = (struct linger *)optval; 5502 5503 so->so_linger.l_linger = l->l_linger; 5504 if (l->l_onoff) { 5505 so->so_linger.l_onoff = SO_LINGER; 5506 so->so_options |= SO_LINGER; 5507 } else { 5508 so->so_linger.l_onoff = 0; 5509 so->so_options &= ~SO_LINGER; 5510 } 5511 break; 5512 } 5513 5514 case SO_DEBUG: 5515 #ifdef SOCK_TEST 5516 if (intvalue & 2) 5517 sock_test_timelimit = 10 * hz; 5518 else 5519 sock_test_timelimit = 0; 5520 5521 if (intvalue & 4) 5522 do_useracc = 0; 5523 else 5524 do_useracc = 1; 5525 #endif /* SOCK_TEST */ 5526 /* FALLTHRU */ 5527 case SO_REUSEADDR: 5528 case SO_KEEPALIVE: 5529 case SO_DONTROUTE: 5530 case SO_BROADCAST: 5531 case SO_USELOOPBACK: 5532 case SO_OOBINLINE: 5533 case SO_DGRAM_ERRIND: 5534 if (intvalue != 0) { 5535 dprintso(so, 1, 5536 ("socket_setsockopt: setting 0x%x\n", 5537 option_name)); 5538 so->so_options |= option_name; 5539 } else { 5540 dprintso(so, 1, 5541 ("socket_setsockopt: clearing 0x%x\n", 5542 option_name)); 5543 so->so_options &= ~option_name; 5544 } 5545 break; 5546 /* 5547 * The following options are only returned by us when the 5548 * transport layer fails. 5549 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5550 * since the transport might adjust the value and not 5551 * return exactly what was set by the application. 5552 */ 5553 case SO_SNDBUF: 5554 so->so_sndbuf = intvalue; 5555 break; 5556 case SO_RCVBUF: 5557 so->so_rcvbuf = intvalue; 5558 break; 5559 case SO_RCVPSH: 5560 so->so_rcv_timer_interval = intvalue; 5561 break; 5562 #ifdef notyet 5563 /* 5564 * We do not implement the semantics of these options 5565 * thus we shouldn't implement the options either. 5566 */ 5567 case SO_SNDLOWAT: 5568 so->so_sndlowat = intvalue; 5569 break; 5570 case SO_RCVLOWAT: 5571 so->so_rcvlowat = intvalue; 5572 break; 5573 #endif /* notyet */ 5574 case SO_SNDTIMEO: 5575 case SO_RCVTIMEO: { 5576 struct timeval tl; 5577 clock_t val; 5578 5579 if (get_udatamodel() == DATAMODEL_NONE || 5580 get_udatamodel() == DATAMODEL_NATIVE) { 5581 bcopy((struct timeval *)optval, &tl, 5582 sizeof (struct timeval)); 5583 } else { 5584 TIMEVAL32_TO_TIMEVAL(&tl, 5585 (struct timeval32 *)optval); 5586 } 5587 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5588 if (option_name == SO_RCVTIMEO) 5589 so->so_rcvtimeo = drv_usectohz(val); 5590 else 5591 so->so_sndtimeo = drv_usectohz(val); 5592 break; 5593 } 5594 } 5595 #undef intvalue 5596 5597 if (error) { 5598 if ((error == ENOPROTOOPT || error == EPROTO || 5599 error == EINVAL) && handled) { 5600 dprintso(so, 1, 5601 ("setsockopt: ignoring error %d for 0x%x\n", 5602 error, option_name)); 5603 error = 0; 5604 } 5605 } 5606 } 5607 done2: 5608 so_unlock_single(so, SOLOCKED); 5609 mutex_exit(&so->so_lock); 5610 return (error); 5611 } 5612 5613 /* 5614 * sotpi_close() is called when the last open reference goes away. 5615 */ 5616 /* ARGSUSED */ 5617 int 5618 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5619 { 5620 struct vnode *vp = SOTOV(so); 5621 dev_t dev; 5622 int error = 0; 5623 sotpi_info_t *sti = SOTOTPI(so); 5624 5625 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5626 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5627 5628 dev = sti->sti_dev; 5629 5630 ASSERT(STREAMSTAB(getmajor(dev))); 5631 5632 mutex_enter(&so->so_lock); 5633 so_lock_single(so); /* Set SOLOCKED */ 5634 5635 ASSERT(so_verify_oobstate(so)); 5636 5637 if (vp->v_stream != NULL) { 5638 vnode_t *ux_vp; 5639 5640 if (so->so_family == AF_UNIX) { 5641 /* Could avoid this when CANTSENDMORE for !dgram */ 5642 so_unix_close(so); 5643 } 5644 5645 mutex_exit(&so->so_lock); 5646 /* 5647 * Disassemble the linkage from the AF_UNIX underlying file 5648 * system vnode to this socket (by atomically clearing 5649 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5650 * and frees the stream head. 5651 */ 5652 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5653 ASSERT(ux_vp->v_stream); 5654 sti->sti_ux_bound_vp = NULL; 5655 vn_rele_stream(ux_vp); 5656 } 5657 error = strclose(vp, flag, cr); 5658 vp->v_stream = NULL; 5659 mutex_enter(&so->so_lock); 5660 } 5661 5662 /* 5663 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5664 */ 5665 so_flush_discon_ind(so); 5666 5667 so_unlock_single(so, SOLOCKED); 5668 mutex_exit(&so->so_lock); 5669 5670 /* 5671 * Needed for STREAMs. 5672 * Decrement the device driver's reference count for streams 5673 * opened via the clone dip. The driver was held in clone_open(). 5674 * The absence of clone_close() forces this asymmetry. 5675 */ 5676 if (so->so_flag & SOCLONE) 5677 ddi_rele_driver(getmajor(dev)); 5678 5679 return (error); 5680 } 5681 5682 static int 5683 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5684 struct cred *cr, int32_t *rvalp) 5685 { 5686 struct vnode *vp = SOTOV(so); 5687 sotpi_info_t *sti = SOTOTPI(so); 5688 int error = 0; 5689 5690 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5691 cmd, arg, pr_state(so->so_state, so->so_mode))); 5692 5693 switch (cmd) { 5694 case SIOCSQPTR: 5695 /* 5696 * SIOCSQPTR is valid only when helper stream is created 5697 * by the protocol. 5698 */ 5699 case _I_INSERT: 5700 case _I_REMOVE: 5701 /* 5702 * Since there's no compelling reason to support these ioctls 5703 * on sockets, and doing so would increase the complexity 5704 * markedly, prevent it. 5705 */ 5706 return (EOPNOTSUPP); 5707 5708 case I_FIND: 5709 case I_LIST: 5710 case I_LOOK: 5711 case I_POP: 5712 case I_PUSH: 5713 /* 5714 * To prevent races and inconsistencies between the actual 5715 * state of the stream and the state according to the sonode, 5716 * we serialize all operations which modify or operate on the 5717 * list of modules on the socket's stream. 5718 */ 5719 mutex_enter(&sti->sti_plumb_lock); 5720 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5721 mutex_exit(&sti->sti_plumb_lock); 5722 return (error); 5723 5724 default: 5725 if (so->so_version != SOV_STREAM) 5726 break; 5727 5728 /* 5729 * The imaginary "sockmod" has been popped; act as a stream. 5730 */ 5731 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5732 } 5733 5734 ASSERT(so->so_version != SOV_STREAM); 5735 5736 /* 5737 * Process socket-specific ioctls. 5738 */ 5739 switch (cmd) { 5740 case FIONBIO: { 5741 int32_t value; 5742 5743 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5744 (mode & (int)FKIOCTL))) 5745 return (EFAULT); 5746 5747 mutex_enter(&so->so_lock); 5748 if (value) { 5749 so->so_state |= SS_NDELAY; 5750 } else { 5751 so->so_state &= ~SS_NDELAY; 5752 } 5753 mutex_exit(&so->so_lock); 5754 return (0); 5755 } 5756 5757 case FIOASYNC: { 5758 int32_t value; 5759 5760 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5761 (mode & (int)FKIOCTL))) 5762 return (EFAULT); 5763 5764 mutex_enter(&so->so_lock); 5765 /* 5766 * SS_ASYNC flag not already set correctly? 5767 * (!value != !(so->so_state & SS_ASYNC)) 5768 * but some engineers find that too hard to read. 5769 */ 5770 if ((value == 0 && (so->so_state & SS_ASYNC) != 0) || 5771 (value != 0 && (so->so_state & SS_ASYNC) == 0)) 5772 error = so_flip_async(so, vp, mode, cr); 5773 mutex_exit(&so->so_lock); 5774 return (error); 5775 } 5776 5777 case SIOCSPGRP: 5778 case FIOSETOWN: { 5779 pid_t pgrp; 5780 5781 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5782 (mode & (int)FKIOCTL))) 5783 return (EFAULT); 5784 5785 mutex_enter(&so->so_lock); 5786 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5787 /* Any change? */ 5788 if (pgrp != so->so_pgrp) 5789 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5790 mutex_exit(&so->so_lock); 5791 return (error); 5792 } 5793 case SIOCGPGRP: 5794 case FIOGETOWN: 5795 if (so_copyout(&so->so_pgrp, (void *)arg, 5796 sizeof (pid_t), (mode & (int)FKIOCTL))) 5797 return (EFAULT); 5798 return (0); 5799 5800 case SIOCATMARK: { 5801 int retval; 5802 uint_t so_state; 5803 5804 /* 5805 * strwaitmark has a finite timeout after which it 5806 * returns -1 if the mark state is undetermined. 5807 * In order to avoid any race between the mark state 5808 * in sockfs and the mark state in the stream head this 5809 * routine loops until the mark state can be determined 5810 * (or the urgent data indication has been removed by some 5811 * other thread). 5812 */ 5813 do { 5814 mutex_enter(&so->so_lock); 5815 so_state = so->so_state; 5816 mutex_exit(&so->so_lock); 5817 if (so_state & SS_RCVATMARK) { 5818 retval = 1; 5819 } else if (!(so_state & SS_OOBPEND)) { 5820 /* 5821 * No SIGURG has been generated -- there is no 5822 * pending or present urgent data. Thus can't 5823 * possibly be at the mark. 5824 */ 5825 retval = 0; 5826 } else { 5827 /* 5828 * Have the stream head wait until there is 5829 * either some messages on the read queue, or 5830 * STRATMARK or STRNOTATMARK gets set. The 5831 * STRNOTATMARK flag is used so that the 5832 * transport can send up a MSGNOTMARKNEXT 5833 * M_DATA to indicate that it is not 5834 * at the mark and additional data is not about 5835 * to be send upstream. 5836 * 5837 * If the mark state is undetermined this will 5838 * return -1 and we will loop rechecking the 5839 * socket state. 5840 */ 5841 retval = strwaitmark(vp); 5842 } 5843 } while (retval == -1); 5844 5845 if (so_copyout(&retval, (void *)arg, sizeof (int), 5846 (mode & (int)FKIOCTL))) 5847 return (EFAULT); 5848 return (0); 5849 } 5850 5851 case I_FDINSERT: 5852 case I_SENDFD: 5853 case I_RECVFD: 5854 case I_ATMARK: 5855 case _SIOCSOCKFALLBACK: 5856 /* 5857 * These ioctls do not apply to sockets. I_FDINSERT can be 5858 * used to send M_PROTO messages without modifying the socket 5859 * state. I_SENDFD/RECVFD should not be used for socket file 5860 * descriptor passing since they assume a twisted stream. 5861 * SIOCATMARK must be used instead of I_ATMARK. 5862 * 5863 * _SIOCSOCKFALLBACK from an application should never be 5864 * processed. It is only generated by socktpi_open() or 5865 * in response to I_POP or I_PUSH. 5866 */ 5867 #ifdef DEBUG 5868 zcmn_err(getzoneid(), CE_WARN, 5869 "Unsupported STREAMS ioctl 0x%x on socket. " 5870 "Pid = %d\n", cmd, curproc->p_pid); 5871 #endif /* DEBUG */ 5872 return (EOPNOTSUPP); 5873 5874 case _I_GETPEERCRED: 5875 if ((mode & FKIOCTL) == 0) 5876 return (EINVAL); 5877 5878 mutex_enter(&so->so_lock); 5879 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 5880 error = ENOTSUP; 5881 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 5882 error = ENOTCONN; 5883 } else if (so->so_peercred != NULL) { 5884 k_peercred_t *kp = (k_peercred_t *)arg; 5885 kp->pc_cr = so->so_peercred; 5886 kp->pc_cpid = so->so_cpid; 5887 crhold(so->so_peercred); 5888 } else { 5889 error = EINVAL; 5890 } 5891 mutex_exit(&so->so_lock); 5892 return (error); 5893 5894 default: 5895 /* 5896 * Do the higher-order bits of the ioctl cmd indicate 5897 * that it is an I_* streams ioctl? 5898 */ 5899 if ((cmd & 0xffffff00U) == STR && 5900 so->so_version == SOV_SOCKBSD) { 5901 #ifdef DEBUG 5902 zcmn_err(getzoneid(), CE_WARN, 5903 "Unsupported STREAMS ioctl 0x%x on socket. " 5904 "Pid = %d\n", cmd, curproc->p_pid); 5905 #endif /* DEBUG */ 5906 return (EOPNOTSUPP); 5907 } 5908 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5909 } 5910 } 5911 5912 /* 5913 * Handle plumbing-related ioctls. 5914 */ 5915 static int 5916 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 5917 struct cred *cr, int32_t *rvalp) 5918 { 5919 static const char sockmod_name[] = "sockmod"; 5920 struct sonode *so = VTOSO(vp); 5921 char mname[FMNAMESZ + 1]; 5922 int error; 5923 sotpi_info_t *sti = SOTOTPI(so); 5924 5925 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 5926 5927 if (so->so_version == SOV_SOCKBSD) 5928 return (EOPNOTSUPP); 5929 5930 if (so->so_version == SOV_STREAM) { 5931 /* 5932 * The imaginary "sockmod" has been popped - act as a stream. 5933 * If this is a push of sockmod then change back to a socket. 5934 */ 5935 if (cmd == I_PUSH) { 5936 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 5937 (void *)arg, mname, sizeof (mname), NULL); 5938 5939 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 5940 dprintso(so, 0, ("socktpi_ioctl: going to " 5941 "socket version\n")); 5942 so_stream2sock(so); 5943 return (0); 5944 } 5945 } 5946 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5947 } 5948 5949 switch (cmd) { 5950 case I_PUSH: 5951 if (sti->sti_direct) { 5952 mutex_enter(&so->so_lock); 5953 so_lock_single(so); 5954 mutex_exit(&so->so_lock); 5955 5956 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 5957 cr, rvalp); 5958 5959 mutex_enter(&so->so_lock); 5960 if (error == 0) 5961 sti->sti_direct = 0; 5962 so_unlock_single(so, SOLOCKED); 5963 mutex_exit(&so->so_lock); 5964 5965 if (error != 0) 5966 return (error); 5967 } 5968 5969 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5970 if (error == 0) 5971 sti->sti_pushcnt++; 5972 return (error); 5973 5974 case I_POP: 5975 if (sti->sti_pushcnt == 0) { 5976 /* Emulate sockmod being popped */ 5977 dprintso(so, 0, 5978 ("socktpi_ioctl: going to STREAMS version\n")); 5979 return (so_sock2stream(so)); 5980 } 5981 5982 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5983 if (error == 0) 5984 sti->sti_pushcnt--; 5985 return (error); 5986 5987 case I_LIST: { 5988 struct str_mlist *kmlistp, *umlistp; 5989 struct str_list kstrlist; 5990 ssize_t kstrlistsize; 5991 int i, nmods; 5992 5993 STRUCT_DECL(str_list, ustrlist); 5994 STRUCT_INIT(ustrlist, mode); 5995 5996 if (arg == 0) { 5997 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5998 if (error == 0) 5999 (*rvalp)++; /* Add one for sockmod */ 6000 return (error); 6001 } 6002 6003 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6004 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6005 if (error != 0) 6006 return (error); 6007 6008 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6009 if (nmods <= 0) 6010 return (EINVAL); 6011 /* 6012 * Ceiling nmods at nstrpush to prevent someone from 6013 * maliciously consuming lots of kernel memory. 6014 */ 6015 nmods = MIN(nmods, nstrpush); 6016 6017 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6018 kstrlist.sl_nmods = nmods; 6019 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6020 6021 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6022 cr, rvalp); 6023 if (error != 0) 6024 goto done; 6025 6026 /* 6027 * Considering the module list as a 0-based array of sl_nmods 6028 * modules, sockmod should conceptually exist at slot 6029 * sti_pushcnt. Insert sockmod at this location by sliding all 6030 * of the module names after so_pushcnt over by one. We know 6031 * that there will be room to do this since we allocated 6032 * sl_modlist with an additional slot. 6033 */ 6034 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6035 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6036 6037 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6038 kstrlist.sl_nmods++; 6039 6040 /* 6041 * Copy all of the entries out to ustrlist. 6042 */ 6043 kmlistp = kstrlist.sl_modlist; 6044 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6045 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6046 error = so_copyout(kmlistp++, umlistp++, 6047 sizeof (struct str_mlist), mode & FKIOCTL); 6048 if (error != 0) 6049 goto done; 6050 } 6051 6052 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6053 mode & FKIOCTL); 6054 if (error == 0) 6055 *rvalp = 0; 6056 done: 6057 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6058 return (error); 6059 } 6060 case I_LOOK: 6061 if (sti->sti_pushcnt == 0) { 6062 return (so_copyout(sockmod_name, (void *)arg, 6063 sizeof (sockmod_name), mode & FKIOCTL)); 6064 } 6065 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6066 6067 case I_FIND: 6068 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6069 if (error && error != EINVAL) 6070 return (error); 6071 6072 /* if not found and string was sockmod return 1 */ 6073 if (*rvalp == 0 || error == EINVAL) { 6074 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6075 (void *)arg, mname, sizeof (mname), NULL); 6076 if (error == ENAMETOOLONG) 6077 error = EINVAL; 6078 6079 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6080 *rvalp = 1; 6081 } 6082 return (error); 6083 6084 default: 6085 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6086 break; 6087 } 6088 6089 return (0); 6090 } 6091 6092 /* 6093 * Wrapper around the streams poll routine that implements socket poll 6094 * semantics. 6095 * The sockfs never calls pollwakeup itself - the stream head take care 6096 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6097 * stream head there can never be a deadlock due to holding so_lock across 6098 * pollwakeup and acquiring so_lock in this routine. 6099 * 6100 * However, since the performance of VOP_POLL is critical we avoid 6101 * acquiring so_lock here. This is based on two assumptions: 6102 * - The poll implementation holds locks to serialize the VOP_POLL call 6103 * and a pollwakeup for the same pollhead. This ensures that should 6104 * e.g. so_state change during a socktpi_poll call the pollwakeup 6105 * (which strsock_* and strrput conspire to issue) is issued after 6106 * the state change. Thus the pollwakeup will block until VOP_POLL has 6107 * returned and then wake up poll and have it call VOP_POLL again. 6108 * - The reading of so_state without holding so_lock does not result in 6109 * stale data that is older than the latest state change that has dropped 6110 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6111 * memory barrier to force the data into the coherency domain. 6112 */ 6113 static int 6114 sotpi_poll( 6115 struct sonode *so, 6116 short events, 6117 int anyyet, 6118 short *reventsp, 6119 struct pollhead **phpp) 6120 { 6121 short origevents = events; 6122 struct vnode *vp = SOTOV(so); 6123 int error; 6124 int so_state = so->so_state; /* snapshot */ 6125 sotpi_info_t *sti = SOTOTPI(so); 6126 6127 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6128 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6129 6130 ASSERT(vp->v_type == VSOCK); 6131 ASSERT(vp->v_stream != NULL); 6132 6133 if (so->so_version == SOV_STREAM) { 6134 /* The imaginary "sockmod" has been popped - act as a stream */ 6135 return (strpoll(vp->v_stream, events, anyyet, 6136 reventsp, phpp)); 6137 } 6138 6139 if (!(so_state & SS_ISCONNECTED) && 6140 (so->so_mode & SM_CONNREQUIRED)) { 6141 /* Not connected yet - turn off write side events */ 6142 events &= ~(POLLOUT|POLLWRBAND); 6143 } 6144 /* 6145 * Check for errors without calling strpoll if the caller wants them. 6146 * In sockets the errors are represented as input/output events 6147 * and there is no need to ask the stream head for this information. 6148 */ 6149 if (so->so_error != 0 && 6150 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6151 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6152 return (0); 6153 } 6154 /* 6155 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6156 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6157 * will not trigger a POLLIN event with POLLRDDATA set. 6158 * The handling of urgent data (causing POLLRDBAND) is done by 6159 * inspecting SS_OOBPEND below. 6160 */ 6161 events |= POLLRDDATA; 6162 6163 /* 6164 * After shutdown(output) a stream head write error is set. 6165 * However, we should not return output events. 6166 */ 6167 events |= POLLNOERR; 6168 error = strpoll(vp->v_stream, events, anyyet, 6169 reventsp, phpp); 6170 if (error) 6171 return (error); 6172 6173 ASSERT(!(*reventsp & POLLERR)); 6174 6175 /* 6176 * Notes on T_CONN_IND handling for sockets. 6177 * 6178 * If strpoll() returned without events, SR_POLLIN is guaranteed 6179 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6180 * 6181 * Since the so_lock is not held, soqueueconnind() may have run 6182 * and a T_CONN_IND may be waiting. We now check for any queued 6183 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6184 * to ensure poll returns. 6185 * 6186 * However: 6187 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6188 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6189 * the following actions will occur; taken together they ensure the 6190 * syscall will return. 6191 * 6192 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6193 * the accept() was run on a non-blocking socket sowaitconnind() 6194 * may have already returned EWOULDBLOCK, so not be waiting to 6195 * process the message. Additionally socktpi_poll() has probably 6196 * proceeded past the sti_conn_ind_head check below. 6197 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6198 * this thread, however that could occur before poll_common() 6199 * has entered cv_wait. 6200 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6201 * 6202 * Before proceeding to cv_wait() in poll_common() for an event, 6203 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6204 * and if set, re-calls strpoll() to ensure the late arriving 6205 * T_CONN_IND is recognized, and pollsys() returns. 6206 */ 6207 6208 if (sti->sti_conn_ind_head != NULL) 6209 *reventsp |= (POLLIN|POLLRDNORM) & events; 6210 6211 if (so->so_state & SS_CANTRCVMORE) { 6212 *reventsp |= POLLRDHUP & events; 6213 6214 if (so->so_state & SS_CANTSENDMORE) 6215 *reventsp |= POLLHUP; 6216 } 6217 6218 if (so->so_state & SS_OOBPEND) 6219 *reventsp |= POLLRDBAND & events; 6220 6221 return (0); 6222 } 6223 6224 /*ARGSUSED*/ 6225 static int 6226 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6227 { 6228 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6229 int error = 0; 6230 6231 error = sonode_constructor(buf, cdrarg, kmflags); 6232 if (error != 0) 6233 return (error); 6234 6235 error = i_sotpi_info_constructor(&st->st_info); 6236 if (error != 0) 6237 sonode_destructor(buf, cdrarg); 6238 6239 st->st_sonode.so_priv = &st->st_info; 6240 6241 return (error); 6242 } 6243 6244 /*ARGSUSED1*/ 6245 static void 6246 socktpi_destructor(void *buf, void *cdrarg) 6247 { 6248 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6249 6250 ASSERT(st->st_sonode.so_priv == &st->st_info); 6251 st->st_sonode.so_priv = NULL; 6252 6253 i_sotpi_info_destructor(&st->st_info); 6254 sonode_destructor(buf, cdrarg); 6255 } 6256 6257 static int 6258 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6259 { 6260 int retval; 6261 6262 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6263 struct sonode *so = (struct sonode *)buf; 6264 sotpi_info_t *sti = SOTOTPI(so); 6265 6266 mutex_enter(&socklist.sl_lock); 6267 6268 sti->sti_next_so = socklist.sl_list; 6269 sti->sti_prev_so = NULL; 6270 if (sti->sti_next_so != NULL) 6271 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6272 socklist.sl_list = so; 6273 6274 mutex_exit(&socklist.sl_lock); 6275 6276 } 6277 return (retval); 6278 } 6279 6280 static void 6281 socktpi_unix_destructor(void *buf, void *cdrarg) 6282 { 6283 struct sonode *so = (struct sonode *)buf; 6284 sotpi_info_t *sti = SOTOTPI(so); 6285 6286 mutex_enter(&socklist.sl_lock); 6287 6288 if (sti->sti_next_so != NULL) 6289 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6290 if (sti->sti_prev_so != NULL) 6291 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6292 else 6293 socklist.sl_list = sti->sti_next_so; 6294 6295 mutex_exit(&socklist.sl_lock); 6296 6297 socktpi_destructor(buf, cdrarg); 6298 } 6299 6300 int 6301 socktpi_init(void) 6302 { 6303 /* 6304 * Create sonode caches. We create a special one for AF_UNIX so 6305 * that we can track them for netstat(8). 6306 */ 6307 socktpi_cache = kmem_cache_create("socktpi_cache", 6308 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6309 socktpi_destructor, NULL, NULL, NULL, 0); 6310 6311 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6312 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6313 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6314 6315 return (0); 6316 } 6317 6318 /* 6319 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6320 * 6321 * Caller must still update state and mode using sotpi_update_state(). 6322 */ 6323 int 6324 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6325 boolean_t *direct, queue_t **qp, struct cred *cr) 6326 { 6327 sotpi_info_t *sti; 6328 struct sockparams *origsp = so->so_sockparams; 6329 sock_lower_handle_t handle = so->so_proto_handle; 6330 struct stdata *stp; 6331 struct vnode *vp; 6332 queue_t *q; 6333 int error = 0; 6334 6335 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6336 SS_FALLBACK_PENDING); 6337 ASSERT(SOCK_IS_NONSTR(so)); 6338 6339 *qp = NULL; 6340 *direct = B_FALSE; 6341 so->so_sockparams = newsp; 6342 /* 6343 * Allocate and initalize fields required by TPI. 6344 */ 6345 (void) sotpi_info_create(so, KM_SLEEP); 6346 sotpi_info_init(so); 6347 6348 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6349 sotpi_info_fini(so); 6350 sotpi_info_destroy(so); 6351 return (error); 6352 } 6353 ASSERT(handle == so->so_proto_handle); 6354 sti = SOTOTPI(so); 6355 if (sti->sti_direct != 0) 6356 *direct = B_TRUE; 6357 6358 /* 6359 * Keep the original sp around so we can properly dispose of the 6360 * sonode when the socket is being closed. 6361 */ 6362 sti->sti_orig_sp = origsp; 6363 6364 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6365 so_alloc_addr(so, so->so_max_addr_len); 6366 6367 /* 6368 * If the application has done a SIOCSPGRP, make sure the 6369 * STREAM head is aware. This needs to take place before 6370 * the protocol start sending up messages. Otherwise we 6371 * might miss to generate SIGPOLL. 6372 * 6373 * It is possible that the application will receive duplicate 6374 * signals if some were already generated for either data or 6375 * connection indications. 6376 */ 6377 if (so->so_pgrp != 0) { 6378 if (so_set_events(so, so->so_vnode, cr) != 0) 6379 so->so_pgrp = 0; 6380 } 6381 6382 /* 6383 * Determine which queue to use. 6384 */ 6385 vp = SOTOV(so); 6386 stp = vp->v_stream; 6387 ASSERT(stp != NULL); 6388 q = stp->sd_wrq->q_next; 6389 6390 /* 6391 * Skip any modules that may have been auto pushed when the device 6392 * was opened 6393 */ 6394 while (q->q_next != NULL) 6395 q = q->q_next; 6396 *qp = _RD(q); 6397 6398 /* This is now a STREAMS sockets */ 6399 so->so_not_str = B_FALSE; 6400 6401 return (error); 6402 } 6403 6404 /* 6405 * Revert a TPI sonode. It is only allowed to revert the sonode during 6406 * the fallback process. 6407 */ 6408 void 6409 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6410 { 6411 vnode_t *vp = SOTOV(so); 6412 6413 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6414 SS_FALLBACK_PENDING); 6415 ASSERT(!SOCK_IS_NONSTR(so)); 6416 ASSERT(vp->v_stream != NULL); 6417 6418 strclean(vp); 6419 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6420 6421 /* 6422 * Restore the original sockparams. The caller is responsible for 6423 * dropping the ref to the new sp. 6424 */ 6425 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6426 6427 sotpi_info_fini(so); 6428 sotpi_info_destroy(so); 6429 6430 /* This is no longer a STREAMS sockets */ 6431 so->so_not_str = B_TRUE; 6432 } 6433 6434 void 6435 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6436 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6437 socklen_t faddrlen, short opts) 6438 { 6439 sotpi_info_t *sti = SOTOTPI(so); 6440 6441 so_proc_tcapability_ack(so, tcap); 6442 6443 so->so_options |= opts; 6444 6445 /* 6446 * Determine whether the foreign and local address are valid 6447 */ 6448 if (laddrlen != 0) { 6449 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6450 sti->sti_laddr_len = laddrlen; 6451 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6452 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6453 } 6454 6455 if (faddrlen != 0) { 6456 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6457 sti->sti_faddr_len = faddrlen; 6458 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6459 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6460 } 6461 6462 } 6463 6464 /* 6465 * Allocate enough space to cache the local and foreign addresses. 6466 */ 6467 void 6468 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6469 { 6470 sotpi_info_t *sti = SOTOTPI(so); 6471 6472 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6473 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6474 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6475 P2ROUNDUP(maxlen, KMEM_ALIGN); 6476 so->so_max_addr_len = sti->sti_laddr_maxlen; 6477 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6478 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6479 + sti->sti_laddr_maxlen); 6480 6481 if (so->so_family == AF_UNIX) { 6482 /* 6483 * Initialize AF_UNIX related fields. 6484 */ 6485 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6486 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6487 } 6488 } 6489 6490 6491 sotpi_info_t * 6492 sotpi_sototpi(struct sonode *so) 6493 { 6494 sotpi_info_t *sti; 6495 6496 ASSERT(so != NULL); 6497 6498 sti = (sotpi_info_t *)so->so_priv; 6499 6500 ASSERT(sti != NULL); 6501 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6502 6503 return (sti); 6504 } 6505 6506 static int 6507 i_sotpi_info_constructor(sotpi_info_t *sti) 6508 { 6509 sti->sti_magic = SOTPI_INFO_MAGIC; 6510 sti->sti_ack_mp = NULL; 6511 sti->sti_discon_ind_mp = NULL; 6512 sti->sti_ux_bound_vp = NULL; 6513 sti->sti_unbind_mp = NULL; 6514 6515 sti->sti_conn_ind_head = NULL; 6516 sti->sti_conn_ind_tail = NULL; 6517 6518 sti->sti_laddr_sa = NULL; 6519 sti->sti_faddr_sa = NULL; 6520 6521 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6522 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6523 6524 return (0); 6525 } 6526 6527 static void 6528 i_sotpi_info_destructor(sotpi_info_t *sti) 6529 { 6530 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6531 ASSERT(sti->sti_ack_mp == NULL); 6532 ASSERT(sti->sti_discon_ind_mp == NULL); 6533 ASSERT(sti->sti_ux_bound_vp == NULL); 6534 ASSERT(sti->sti_unbind_mp == NULL); 6535 6536 ASSERT(sti->sti_conn_ind_head == NULL); 6537 ASSERT(sti->sti_conn_ind_tail == NULL); 6538 6539 ASSERT(sti->sti_laddr_sa == NULL); 6540 ASSERT(sti->sti_faddr_sa == NULL); 6541 6542 mutex_destroy(&sti->sti_plumb_lock); 6543 cv_destroy(&sti->sti_ack_cv); 6544 } 6545 6546 /* 6547 * Creates and attaches TPI information to the given sonode 6548 */ 6549 static boolean_t 6550 sotpi_info_create(struct sonode *so, int kmflags) 6551 { 6552 sotpi_info_t *sti; 6553 6554 ASSERT(so->so_priv == NULL); 6555 6556 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6557 return (B_FALSE); 6558 6559 if (i_sotpi_info_constructor(sti) != 0) { 6560 kmem_free(sti, sizeof (*sti)); 6561 return (B_FALSE); 6562 } 6563 6564 so->so_priv = (void *)sti; 6565 return (B_TRUE); 6566 } 6567 6568 /* 6569 * Initializes the TPI information. 6570 */ 6571 static void 6572 sotpi_info_init(struct sonode *so) 6573 { 6574 struct vnode *vp = SOTOV(so); 6575 sotpi_info_t *sti = SOTOTPI(so); 6576 time_t now; 6577 6578 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6579 vp->v_rdev = sti->sti_dev; 6580 6581 sti->sti_orig_sp = NULL; 6582 6583 sti->sti_pushcnt = 0; 6584 6585 now = gethrestime_sec(); 6586 sti->sti_atime = now; 6587 sti->sti_mtime = now; 6588 sti->sti_ctime = now; 6589 6590 sti->sti_eaddr_mp = NULL; 6591 sti->sti_delayed_error = 0; 6592 6593 sti->sti_provinfo = NULL; 6594 6595 sti->sti_oobcnt = 0; 6596 sti->sti_oobsigcnt = 0; 6597 6598 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6599 6600 sti->sti_laddr_sa = 0; 6601 sti->sti_faddr_sa = 0; 6602 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6603 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6604 6605 sti->sti_laddr_valid = 0; 6606 sti->sti_faddr_valid = 0; 6607 sti->sti_faddr_noxlate = 0; 6608 6609 sti->sti_direct = 0; 6610 6611 ASSERT(sti->sti_ack_mp == NULL); 6612 ASSERT(sti->sti_ux_bound_vp == NULL); 6613 ASSERT(sti->sti_unbind_mp == NULL); 6614 6615 ASSERT(sti->sti_conn_ind_head == NULL); 6616 ASSERT(sti->sti_conn_ind_tail == NULL); 6617 } 6618 6619 /* 6620 * Given a sonode, grab the TPI info and free any data. 6621 */ 6622 static void 6623 sotpi_info_fini(struct sonode *so) 6624 { 6625 sotpi_info_t *sti = SOTOTPI(so); 6626 mblk_t *mp; 6627 6628 ASSERT(sti->sti_discon_ind_mp == NULL); 6629 6630 if ((mp = sti->sti_conn_ind_head) != NULL) { 6631 mblk_t *mp1; 6632 6633 while (mp) { 6634 mp1 = mp->b_next; 6635 mp->b_next = NULL; 6636 freemsg(mp); 6637 mp = mp1; 6638 } 6639 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6640 } 6641 6642 /* 6643 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6644 * indirect them. It also uses so_count as a validity test. 6645 */ 6646 mutex_enter(&so->so_lock); 6647 6648 if (sti->sti_laddr_sa) { 6649 ASSERT((caddr_t)sti->sti_faddr_sa == 6650 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6651 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6652 sti->sti_laddr_valid = 0; 6653 sti->sti_faddr_valid = 0; 6654 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6655 sti->sti_laddr_sa = NULL; 6656 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6657 sti->sti_faddr_sa = NULL; 6658 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6659 } 6660 6661 mutex_exit(&so->so_lock); 6662 6663 if ((mp = sti->sti_eaddr_mp) != NULL) { 6664 freemsg(mp); 6665 sti->sti_eaddr_mp = NULL; 6666 sti->sti_delayed_error = 0; 6667 } 6668 6669 if ((mp = sti->sti_ack_mp) != NULL) { 6670 freemsg(mp); 6671 sti->sti_ack_mp = NULL; 6672 } 6673 6674 ASSERT(sti->sti_ux_bound_vp == NULL); 6675 if ((mp = sti->sti_unbind_mp) != NULL) { 6676 freemsg(mp); 6677 sti->sti_unbind_mp = NULL; 6678 } 6679 } 6680 6681 /* 6682 * Destroys the TPI information attached to a sonode. 6683 */ 6684 static void 6685 sotpi_info_destroy(struct sonode *so) 6686 { 6687 sotpi_info_t *sti = SOTOTPI(so); 6688 6689 i_sotpi_info_destructor(sti); 6690 kmem_free(sti, sizeof (*sti)); 6691 6692 so->so_priv = NULL; 6693 } 6694 6695 /* 6696 * Create the global sotpi socket module entry. It will never be freed. 6697 */ 6698 smod_info_t * 6699 sotpi_smod_create(void) 6700 { 6701 smod_info_t *smodp; 6702 6703 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6704 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6705 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6706 /* 6707 * Initialize the smod_refcnt to 1 so it will never be freed. 6708 */ 6709 smodp->smod_refcnt = 1; 6710 smodp->smod_uc_version = SOCK_UC_VERSION; 6711 smodp->smod_dc_version = SOCK_DC_VERSION; 6712 smodp->smod_sock_create_func = &sotpi_create; 6713 smodp->smod_sock_destroy_func = &sotpi_destroy; 6714 return (smodp); 6715 } 6716