1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015, Joyent, Inc. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/kmem_impl.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/suntpi.h> 51 #include <sys/ddi.h> 52 #include <sys/esunddi.h> 53 #include <sys/flock.h> 54 #include <sys/modctl.h> 55 #include <sys/vtrace.h> 56 #include <sys/cmn_err.h> 57 #include <sys/pathname.h> 58 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <fs/sockfs/sockcommon.h> 85 #include <fs/sockfs/socktpi.h> 86 #include <fs/sockfs/socktpi_impl.h> 87 88 /* 89 * Possible failures when memory can't be allocated. The documented behavior: 90 * 91 * 5.5: 4.X: XNET: 92 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 93 * EINTR 94 * (4.X does not document EINTR but returns it) 95 * bind: ENOSR - ENOBUFS/ENOSR 96 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 97 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 98 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 99 * (4.X getpeername and getsockname do not fail in practice) 100 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 101 * listen: - - ENOBUFS 102 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 107 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 108 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 109 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 110 * 111 * Resolution. When allocation fails: 112 * recv: return EINTR 113 * send: return EINTR 114 * connect, accept: EINTR 115 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 116 * socket, socketpair: ENOBUFS 117 * getpeername, getsockname: sleep 118 * getsockopt, setsockopt: sleep 119 */ 120 121 #ifdef SOCK_TEST 122 /* 123 * Variables that make sockfs do something other than the standard TPI 124 * for the AF_INET transports. 125 * 126 * solisten_tpi_tcp: 127 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 128 * the transport is already bound. This is needed to avoid loosing the 129 * port number should listen() do a T_UNBIND_REQ followed by a 130 * O_T_BIND_REQ. 131 * 132 * soconnect_tpi_udp: 133 * UDP and ICMP can handle a T_CONN_REQ. 134 * This is needed to make the sequence of connect(), getsockname() 135 * return the local IP address used to send packets to the connected to 136 * destination. 137 * 138 * soconnect_tpi_tcp: 139 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 140 * Set this to non-zero to send TPI conformant messages to TCP in this 141 * respect. This is a performance optimization. 142 * 143 * soaccept_tpi_tcp: 144 * TCP can handle a T_CONN_REQ without the acceptor being bound. 145 * This is a performance optimization that has been picked up in XTI. 146 * 147 * soaccept_tpi_multioptions: 148 * When inheriting SOL_SOCKET options from the listener to the accepting 149 * socket send them as a single message for AF_INET{,6}. 150 */ 151 int solisten_tpi_tcp = 0; 152 int soconnect_tpi_udp = 0; 153 int soconnect_tpi_tcp = 0; 154 int soaccept_tpi_tcp = 0; 155 int soaccept_tpi_multioptions = 1; 156 #else /* SOCK_TEST */ 157 #define soconnect_tpi_tcp 0 158 #define soconnect_tpi_udp 0 159 #define solisten_tpi_tcp 0 160 #define soaccept_tpi_tcp 0 161 #define soaccept_tpi_multioptions 1 162 #endif /* SOCK_TEST */ 163 164 #ifdef SOCK_TEST 165 extern int do_useracc; 166 extern clock_t sock_test_timelimit; 167 #endif /* SOCK_TEST */ 168 169 /* 170 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 171 * applications working. Turn on this flag to disable these checks. 172 */ 173 int xnet_skip_checks = 0; 174 int xnet_check_print = 0; 175 int xnet_truncate_print = 0; 176 177 static void sotpi_destroy(struct sonode *); 178 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 179 int, int *, cred_t *cr); 180 181 static boolean_t sotpi_info_create(struct sonode *, int); 182 static void sotpi_info_init(struct sonode *); 183 static void sotpi_info_fini(struct sonode *); 184 static void sotpi_info_destroy(struct sonode *); 185 186 /* 187 * Do direct function call to the transport layer below; this would 188 * also allow the transport to utilize read-side synchronous stream 189 * interface if necessary. This is a /etc/system tunable that must 190 * not be modified on a running system. By default this is enabled 191 * for performance reasons and may be disabled for debugging purposes. 192 */ 193 boolean_t socktpi_direct = B_TRUE; 194 195 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 196 197 extern void sigintr(k_sigset_t *, int); 198 extern void sigunintr(k_sigset_t *); 199 200 static int sotpi_unbind(struct sonode *, int); 201 202 /* TPI sockfs sonode operations */ 203 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 204 int); 205 static int sotpi_accept(struct sonode *, int, struct cred *, 206 struct sonode **); 207 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 208 int, struct cred *); 209 static int sotpi_listen(struct sonode *, int, struct cred *); 210 static int sotpi_connect(struct sonode *, struct sockaddr *, 211 socklen_t, int, int, struct cred *); 212 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 213 struct uio *, struct cred *); 214 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 215 struct uio *, struct cred *); 216 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 217 struct cred *, mblk_t **); 218 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 219 struct uio *, void *, t_uscalar_t, int); 220 static int sodgram_direct(struct sonode *, struct sockaddr *, 221 socklen_t, struct uio *, int); 222 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 223 socklen_t *, boolean_t, struct cred *); 224 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 225 socklen_t *, struct cred *); 226 static int sotpi_shutdown(struct sonode *, int, struct cred *); 227 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 228 socklen_t *, int, struct cred *); 229 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 230 socklen_t, struct cred *); 231 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 232 int32_t *); 233 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 234 struct cred *, int32_t *); 235 static int sotpi_poll(struct sonode *, short, int, short *, 236 struct pollhead **); 237 static int sotpi_close(struct sonode *, int, struct cred *); 238 239 static int i_sotpi_info_constructor(sotpi_info_t *); 240 static void i_sotpi_info_destructor(sotpi_info_t *); 241 242 sonodeops_t sotpi_sonodeops = { 243 sotpi_init, /* sop_init */ 244 sotpi_accept, /* sop_accept */ 245 sotpi_bind, /* sop_bind */ 246 sotpi_listen, /* sop_listen */ 247 sotpi_connect, /* sop_connect */ 248 sotpi_recvmsg, /* sop_recvmsg */ 249 sotpi_sendmsg, /* sop_sendmsg */ 250 sotpi_sendmblk, /* sop_sendmblk */ 251 sotpi_getpeername, /* sop_getpeername */ 252 sotpi_getsockname, /* sop_getsockname */ 253 sotpi_shutdown, /* sop_shutdown */ 254 sotpi_getsockopt, /* sop_getsockopt */ 255 sotpi_setsockopt, /* sop_setsockopt */ 256 sotpi_ioctl, /* sop_ioctl */ 257 sotpi_poll, /* sop_poll */ 258 sotpi_close, /* sop_close */ 259 }; 260 261 /* 262 * Return a TPI socket vnode. 263 * 264 * Note that sockets assume that the driver will clone (either itself 265 * or by using the clone driver) i.e. a socket() call will always 266 * result in a new vnode being created. 267 */ 268 269 /* 270 * Common create code for socket and accept. If tso is set the values 271 * from that node is used instead of issuing a T_INFO_REQ. 272 */ 273 274 /* ARGSUSED */ 275 static struct sonode * 276 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 277 int version, int sflags, int *errorp, cred_t *cr) 278 { 279 struct sonode *so; 280 kmem_cache_t *cp; 281 int sfamily = family; 282 283 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 284 285 if (family == AF_NCA) { 286 /* 287 * The request is for an NCA socket so for NL7C use the 288 * INET domain instead and mark NL7C_AF_NCA below. 289 */ 290 family = AF_INET; 291 /* 292 * NL7C is not supported in the non-global zone, 293 * we enforce this restriction here. 294 */ 295 if (getzoneid() != GLOBAL_ZONEID) { 296 *errorp = ENOTSUP; 297 return (NULL); 298 } 299 } 300 301 /* 302 * to be compatible with old tpi socket implementation ignore 303 * sleep flag (sflags) passed in 304 */ 305 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 306 so = kmem_cache_alloc(cp, KM_SLEEP); 307 if (so == NULL) { 308 *errorp = ENOMEM; 309 return (NULL); 310 } 311 312 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 313 sotpi_info_init(so); 314 315 if (sfamily == AF_NCA) { 316 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 317 } 318 319 if (version == SOV_DEFAULT) 320 version = so_default_version; 321 322 so->so_version = (short)version; 323 *errorp = 0; 324 325 return (so); 326 } 327 328 static void 329 sotpi_destroy(struct sonode *so) 330 { 331 kmem_cache_t *cp; 332 struct sockparams *origsp; 333 334 /* 335 * If there is a new dealloc function (ie. smod_destroy_func), 336 * then it should check the correctness of the ops. 337 */ 338 339 ASSERT(so->so_ops == &sotpi_sonodeops); 340 341 origsp = SOTOTPI(so)->sti_orig_sp; 342 343 sotpi_info_fini(so); 344 345 if (so->so_state & SS_FALLBACK_COMP) { 346 /* 347 * A fallback happend, which means that a sotpi_info_t struct 348 * was allocated (as opposed to being allocated from the TPI 349 * sonode cache. Therefore we explicitly free the struct 350 * here. 351 */ 352 sotpi_info_destroy(so); 353 ASSERT(origsp != NULL); 354 355 origsp->sp_smod_info->smod_sock_destroy_func(so); 356 SOCKPARAMS_DEC_REF(origsp); 357 } else { 358 sonode_fini(so); 359 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 360 socktpi_cache; 361 kmem_cache_free(cp, so); 362 } 363 } 364 365 /* ARGSUSED1 */ 366 int 367 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 368 { 369 major_t maj; 370 dev_t newdev; 371 struct vnode *vp; 372 int error = 0; 373 struct stdata *stp; 374 375 sotpi_info_t *sti = SOTOTPI(so); 376 377 dprint(1, ("sotpi_init()\n")); 378 379 /* 380 * over write the sleep flag passed in but that is ok 381 * as tpi socket does not honor sleep flag. 382 */ 383 flags |= FREAD|FWRITE; 384 385 /* 386 * Record in so_flag that it is a clone. 387 */ 388 if (getmajor(sti->sti_dev) == clone_major) 389 so->so_flag |= SOCLONE; 390 391 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 392 (so->so_family == AF_INET || so->so_family == AF_INET6) && 393 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 394 so->so_protocol == IPPROTO_IP)) { 395 /* Tell tcp or udp that it's talking to sockets */ 396 flags |= SO_SOCKSTR; 397 398 /* 399 * Here we indicate to socktpi_open() our attempt to 400 * make direct calls between sockfs and transport. 401 * The final decision is left to socktpi_open(). 402 */ 403 sti->sti_direct = 1; 404 405 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 406 if (so->so_type == SOCK_STREAM && tso != NULL) { 407 if (SOTOTPI(tso)->sti_direct) { 408 /* 409 * Inherit sti_direct from listener and pass 410 * SO_ACCEPTOR open flag to tcp, indicating 411 * that this is an accept fast-path instance. 412 */ 413 flags |= SO_ACCEPTOR; 414 } else { 415 /* 416 * sti_direct is not set on listener, meaning 417 * that the listener has been converted from 418 * a socket to a stream. Ensure that the 419 * acceptor inherits these settings. 420 */ 421 sti->sti_direct = 0; 422 flags &= ~SO_SOCKSTR; 423 } 424 } 425 } 426 427 /* 428 * Tell local transport that it is talking to sockets. 429 */ 430 if (so->so_family == AF_UNIX) { 431 flags |= SO_SOCKSTR; 432 } 433 434 vp = SOTOV(so); 435 newdev = vp->v_rdev; 436 maj = getmajor(newdev); 437 ASSERT(STREAMSTAB(maj)); 438 439 error = stropen(vp, &newdev, flags, cr); 440 441 stp = vp->v_stream; 442 if (error == 0) { 443 if (so->so_flag & SOCLONE) 444 ASSERT(newdev != vp->v_rdev); 445 mutex_enter(&so->so_lock); 446 sti->sti_dev = newdev; 447 vp->v_rdev = newdev; 448 mutex_exit(&so->so_lock); 449 450 if (stp->sd_flag & STRISTTY) { 451 /* 452 * this is a post SVR4 tty driver - a socket can not 453 * be a controlling terminal. Fail the open. 454 */ 455 (void) sotpi_close(so, flags, cr); 456 return (ENOTTY); /* XXX */ 457 } 458 459 ASSERT(stp->sd_wrq != NULL); 460 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 461 462 /* 463 * If caller is interested in doing direct function call 464 * interface to/from transport module, probe the module 465 * directly beneath the streamhead to see if it qualifies. 466 * 467 * We turn off the direct interface when qualifications fail. 468 * In the acceptor case, we simply turn off the sti_direct 469 * flag on the socket. We do the fallback after the accept 470 * has completed, before the new socket is returned to the 471 * application. 472 */ 473 if (sti->sti_direct) { 474 queue_t *tq = stp->sd_wrq->q_next; 475 476 /* 477 * sti_direct is currently supported and tested 478 * only for tcp/udp; this is the main reason to 479 * have the following assertions. 480 */ 481 ASSERT(so->so_family == AF_INET || 482 so->so_family == AF_INET6); 483 ASSERT(so->so_protocol == IPPROTO_UDP || 484 so->so_protocol == IPPROTO_TCP || 485 so->so_protocol == IPPROTO_IP); 486 ASSERT(so->so_type == SOCK_DGRAM || 487 so->so_type == SOCK_STREAM); 488 489 /* 490 * Abort direct call interface if the module directly 491 * underneath the stream head is not defined with the 492 * _D_DIRECT flag. This could happen in the tcp or 493 * udp case, when some other module is autopushed 494 * above it, or for some reasons the expected module 495 * isn't purely D_MP (which is the main requirement). 496 */ 497 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 498 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 499 int rval; 500 501 /* Continue on without direct calls */ 502 sti->sti_direct = 0; 503 504 /* 505 * Cannot issue ioctl on fallback socket since 506 * there is no conn associated with the queue. 507 * The fallback downcall will notify the proto 508 * of the change. 509 */ 510 if (!(flags & SO_ACCEPTOR) && 511 !(flags & SO_FALLBACK)) { 512 if ((error = strioctl(vp, 513 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 514 cr, &rval)) != 0) { 515 (void) sotpi_close(so, flags, 516 cr); 517 return (error); 518 } 519 } 520 } 521 } 522 523 if (flags & SO_FALLBACK) { 524 /* 525 * The stream created does not have a conn. 526 * do stream set up after conn has been assigned 527 */ 528 return (error); 529 } 530 if (error = so_strinit(so, tso)) { 531 (void) sotpi_close(so, flags, cr); 532 return (error); 533 } 534 535 /* Wildcard */ 536 if (so->so_protocol != so->so_sockparams->sp_protocol) { 537 int protocol = so->so_protocol; 538 /* 539 * Issue SO_PROTOTYPE setsockopt. 540 */ 541 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 542 &protocol, (t_uscalar_t)sizeof (protocol), cr); 543 if (error != 0) { 544 (void) sotpi_close(so, flags, cr); 545 /* 546 * Setsockopt often fails with ENOPROTOOPT but 547 * socket() should fail with 548 * EPROTONOSUPPORT/EPROTOTYPE. 549 */ 550 return (EPROTONOSUPPORT); 551 } 552 } 553 554 } else { 555 /* 556 * While the same socket can not be reopened (unlike specfs) 557 * the stream head sets STREOPENFAIL when the autopush fails. 558 */ 559 if ((stp != NULL) && 560 (stp->sd_flag & STREOPENFAIL)) { 561 /* 562 * Open failed part way through. 563 */ 564 mutex_enter(&stp->sd_lock); 565 stp->sd_flag &= ~STREOPENFAIL; 566 mutex_exit(&stp->sd_lock); 567 (void) sotpi_close(so, flags, cr); 568 return (error); 569 /*NOTREACHED*/ 570 } 571 ASSERT(stp == NULL); 572 } 573 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 574 "sockfs open:maj %d vp %p so %p error %d", 575 maj, vp, so, error); 576 return (error); 577 } 578 579 /* 580 * Bind the socket to an unspecified address in sockfs only. 581 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 582 * required in all cases. 583 */ 584 static void 585 so_automatic_bind(struct sonode *so) 586 { 587 sotpi_info_t *sti = SOTOTPI(so); 588 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 589 590 ASSERT(MUTEX_HELD(&so->so_lock)); 591 ASSERT(!(so->so_state & SS_ISBOUND)); 592 ASSERT(sti->sti_unbind_mp); 593 594 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 595 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 596 sti->sti_laddr_sa->sa_family = so->so_family; 597 so->so_state |= SS_ISBOUND; 598 } 599 600 601 /* 602 * bind the socket. 603 * 604 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 605 * are passed in we allow rebinding. Note that for backwards compatibility 606 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 607 * Thus the rebinding code is currently not executed. 608 * 609 * The constraints for rebinding are: 610 * - it is a SOCK_DGRAM, or 611 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 612 * and no listen() has been done. 613 * This rebinding code was added based on some language in the XNET book 614 * about not returning EINVAL it the protocol allows rebinding. However, 615 * this language is not present in the Posix socket draft. Thus maybe the 616 * rebinding logic should be deleted from the source. 617 * 618 * A null "name" can be used to unbind the socket if: 619 * - it is a SOCK_DGRAM, or 620 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 621 * and no listen() has been done. 622 */ 623 /* ARGSUSED */ 624 static int 625 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 626 socklen_t namelen, int backlog, int flags, struct cred *cr) 627 { 628 struct T_bind_req bind_req; 629 struct T_bind_ack *bind_ack; 630 int error = 0; 631 mblk_t *mp; 632 void *addr; 633 t_uscalar_t addrlen; 634 int unbind_on_err = 1; 635 boolean_t clear_acceptconn_on_err = B_FALSE; 636 boolean_t restore_backlog_on_err = B_FALSE; 637 int save_so_backlog; 638 t_scalar_t PRIM_type = O_T_BIND_REQ; 639 boolean_t tcp_udp_xport; 640 void *nl7c = NULL; 641 sotpi_info_t *sti = SOTOTPI(so); 642 643 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 644 (void *)so, (void *)name, namelen, backlog, flags, 645 pr_state(so->so_state, so->so_mode))); 646 647 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 648 649 if (!(flags & _SOBIND_LOCK_HELD)) { 650 mutex_enter(&so->so_lock); 651 so_lock_single(so); /* Set SOLOCKED */ 652 } else { 653 ASSERT(MUTEX_HELD(&so->so_lock)); 654 ASSERT(so->so_flag & SOLOCKED); 655 } 656 657 /* 658 * Make sure that there is a preallocated unbind_req message 659 * before binding. This message allocated when the socket is 660 * created but it might be have been consumed. 661 */ 662 if (sti->sti_unbind_mp == NULL) { 663 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 664 /* NOTE: holding so_lock while sleeping */ 665 sti->sti_unbind_mp = 666 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 667 cr); 668 } 669 670 if (flags & _SOBIND_REBIND) { 671 /* 672 * Called from solisten after doing an sotpi_unbind() or 673 * potentially without the unbind (latter for AF_INET{,6}). 674 */ 675 ASSERT(name == NULL && namelen == 0); 676 677 if (so->so_family == AF_UNIX) { 678 ASSERT(sti->sti_ux_bound_vp); 679 addr = &sti->sti_ux_laddr; 680 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 681 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 682 "addr 0x%p, vp %p\n", 683 addrlen, 684 (void *)((struct so_ux_addr *)addr)->soua_vp, 685 (void *)sti->sti_ux_bound_vp)); 686 } else { 687 addr = sti->sti_laddr_sa; 688 addrlen = (t_uscalar_t)sti->sti_laddr_len; 689 } 690 } else if (flags & _SOBIND_UNSPEC) { 691 ASSERT(name == NULL && namelen == 0); 692 693 /* 694 * The caller checked SS_ISBOUND but not necessarily 695 * under so_lock 696 */ 697 if (so->so_state & SS_ISBOUND) { 698 /* No error */ 699 goto done; 700 } 701 702 /* Set an initial local address */ 703 switch (so->so_family) { 704 case AF_UNIX: 705 /* 706 * Use an address with same size as struct sockaddr 707 * just like BSD. 708 */ 709 sti->sti_laddr_len = 710 (socklen_t)sizeof (struct sockaddr); 711 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 712 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 713 sti->sti_laddr_sa->sa_family = so->so_family; 714 715 /* 716 * Pass down an address with the implicit bind 717 * magic number and the rest all zeros. 718 * The transport will return a unique address. 719 */ 720 sti->sti_ux_laddr.soua_vp = NULL; 721 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 722 addr = &sti->sti_ux_laddr; 723 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 724 break; 725 726 case AF_INET: 727 case AF_INET6: 728 /* 729 * An unspecified bind in TPI has a NULL address. 730 * Set the address in sockfs to have the sa_family. 731 */ 732 sti->sti_laddr_len = (so->so_family == AF_INET) ? 733 (socklen_t)sizeof (sin_t) : 734 (socklen_t)sizeof (sin6_t); 735 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 736 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 737 sti->sti_laddr_sa->sa_family = so->so_family; 738 addr = NULL; 739 addrlen = 0; 740 break; 741 742 default: 743 /* 744 * An unspecified bind in TPI has a NULL address. 745 * Set the address in sockfs to be zero length. 746 * 747 * Can not assume there is a sa_family for all 748 * protocol families. For example, AF_X25 does not 749 * have a family field. 750 */ 751 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 752 sti->sti_laddr_len = 0; /* XXX correct? */ 753 addr = NULL; 754 addrlen = 0; 755 break; 756 } 757 758 } else { 759 if (so->so_state & SS_ISBOUND) { 760 /* 761 * If it is ok to rebind the socket, first unbind 762 * with the transport. A rebind to the NULL address 763 * is interpreted as an unbind. 764 * Note that a bind to NULL in BSD does unbind the 765 * socket but it fails with EINVAL. 766 * Note that regular sockets set SOV_SOCKBSD i.e. 767 * _SOBIND_SOCKBSD gets set here hence no type of 768 * socket does currently allow rebinding. 769 * 770 * If the name is NULL just do an unbind. 771 */ 772 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 773 name != NULL) { 774 error = EINVAL; 775 unbind_on_err = 0; 776 eprintsoline(so, error); 777 goto done; 778 } 779 if ((so->so_mode & SM_CONNREQUIRED) && 780 (so->so_state & SS_CANTREBIND)) { 781 error = EINVAL; 782 unbind_on_err = 0; 783 eprintsoline(so, error); 784 goto done; 785 } 786 error = sotpi_unbind(so, 0); 787 if (error) { 788 eprintsoline(so, error); 789 goto done; 790 } 791 ASSERT(!(so->so_state & SS_ISBOUND)); 792 if (name == NULL) { 793 so->so_state &= 794 ~(SS_ISCONNECTED|SS_ISCONNECTING); 795 goto done; 796 } 797 } 798 799 /* X/Open requires this check */ 800 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 801 if (xnet_check_print) { 802 printf("sockfs: X/Open bind state check " 803 "caused EINVAL\n"); 804 } 805 error = EINVAL; 806 goto done; 807 } 808 809 switch (so->so_family) { 810 case AF_UNIX: 811 /* 812 * All AF_UNIX addresses are nul terminated 813 * when copied (copyin_name) in so the minimum 814 * length is 3 bytes. 815 */ 816 if (name == NULL || 817 (ssize_t)namelen <= sizeof (short) + 1) { 818 error = EISDIR; 819 eprintsoline(so, error); 820 goto done; 821 } 822 /* 823 * Verify so_family matches the bound family. 824 * BSD does not check this for AF_UNIX resulting 825 * in funny mknods. 826 */ 827 if (name->sa_family != so->so_family) { 828 error = EAFNOSUPPORT; 829 goto done; 830 } 831 break; 832 case AF_INET: 833 if (name == NULL) { 834 error = EINVAL; 835 eprintsoline(so, error); 836 goto done; 837 } 838 if ((size_t)namelen != sizeof (sin_t)) { 839 error = name->sa_family != so->so_family ? 840 EAFNOSUPPORT : EINVAL; 841 eprintsoline(so, error); 842 goto done; 843 } 844 if ((flags & _SOBIND_XPG4_2) && 845 (name->sa_family != so->so_family)) { 846 /* 847 * This check has to be made for X/Open 848 * sockets however application failures have 849 * been observed when it is applied to 850 * all sockets. 851 */ 852 error = EAFNOSUPPORT; 853 eprintsoline(so, error); 854 goto done; 855 } 856 /* 857 * Force a zero sa_family to match so_family. 858 * 859 * Some programs like inetd(1M) don't set the 860 * family field. Other programs leave 861 * sin_family set to garbage - SunOS 4.X does 862 * not check the family field on a bind. 863 * We use the family field that 864 * was passed in to the socket() call. 865 */ 866 name->sa_family = so->so_family; 867 break; 868 869 case AF_INET6: { 870 #ifdef DEBUG 871 sin6_t *sin6 = (sin6_t *)name; 872 #endif /* DEBUG */ 873 874 if (name == NULL) { 875 error = EINVAL; 876 eprintsoline(so, error); 877 goto done; 878 } 879 if ((size_t)namelen != sizeof (sin6_t)) { 880 error = name->sa_family != so->so_family ? 881 EAFNOSUPPORT : EINVAL; 882 eprintsoline(so, error); 883 goto done; 884 } 885 if (name->sa_family != so->so_family) { 886 /* 887 * With IPv6 we require the family to match 888 * unlike in IPv4. 889 */ 890 error = EAFNOSUPPORT; 891 eprintsoline(so, error); 892 goto done; 893 } 894 #ifdef DEBUG 895 /* 896 * Verify that apps don't forget to clear 897 * sin6_scope_id etc 898 */ 899 if (sin6->sin6_scope_id != 0 && 900 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 901 zcmn_err(getzoneid(), CE_WARN, 902 "bind with uninitialized sin6_scope_id " 903 "(%d) on socket. Pid = %d\n", 904 (int)sin6->sin6_scope_id, 905 (int)curproc->p_pid); 906 } 907 if (sin6->__sin6_src_id != 0) { 908 zcmn_err(getzoneid(), CE_WARN, 909 "bind with uninitialized __sin6_src_id " 910 "(%d) on socket. Pid = %d\n", 911 (int)sin6->__sin6_src_id, 912 (int)curproc->p_pid); 913 } 914 #endif /* DEBUG */ 915 break; 916 } 917 default: 918 /* 919 * Don't do any length or sa_family check to allow 920 * non-sockaddr style addresses. 921 */ 922 if (name == NULL) { 923 error = EINVAL; 924 eprintsoline(so, error); 925 goto done; 926 } 927 break; 928 } 929 930 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 931 error = ENAMETOOLONG; 932 eprintsoline(so, error); 933 goto done; 934 } 935 /* 936 * Save local address. 937 */ 938 sti->sti_laddr_len = (socklen_t)namelen; 939 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 940 bcopy(name, sti->sti_laddr_sa, namelen); 941 942 addr = sti->sti_laddr_sa; 943 addrlen = (t_uscalar_t)sti->sti_laddr_len; 944 switch (so->so_family) { 945 case AF_INET6: 946 case AF_INET: 947 break; 948 case AF_UNIX: { 949 struct sockaddr_un *soun = 950 (struct sockaddr_un *)sti->sti_laddr_sa; 951 struct vnode *vp, *rvp; 952 struct vattr vattr; 953 954 ASSERT(sti->sti_ux_bound_vp == NULL); 955 /* 956 * Create vnode for the specified path name. 957 * Keep vnode held with a reference in sti_ux_bound_vp. 958 * Use the vnode pointer as the address used in the 959 * bind with the transport. 960 * 961 * Use the same mode as in BSD. In particular this does 962 * not observe the umask. 963 */ 964 /* MAXPATHLEN + soun_family + nul termination */ 965 if (sti->sti_laddr_len > 966 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 967 error = ENAMETOOLONG; 968 eprintsoline(so, error); 969 goto done; 970 } 971 vattr.va_type = VSOCK; 972 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 973 vattr.va_mask = AT_TYPE|AT_MODE; 974 /* NOTE: holding so_lock */ 975 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 976 EXCL, 0, &vp, CRMKNOD, 0, 0); 977 if (error) { 978 if (error == EEXIST) 979 error = EADDRINUSE; 980 eprintsoline(so, error); 981 goto done; 982 } 983 /* 984 * Establish pointer from the underlying filesystem 985 * vnode to the socket node. 986 * sti_ux_bound_vp and v_stream->sd_vnode form the 987 * cross-linkage between the underlying filesystem 988 * node and the socket node. 989 */ 990 991 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 992 VN_HOLD(rvp); 993 VN_RELE(vp); 994 vp = rvp; 995 } 996 997 ASSERT(SOTOV(so)->v_stream); 998 mutex_enter(&vp->v_lock); 999 vp->v_stream = SOTOV(so)->v_stream; 1000 sti->sti_ux_bound_vp = vp; 1001 mutex_exit(&vp->v_lock); 1002 1003 /* 1004 * Use the vnode pointer value as a unique address 1005 * (together with the magic number to avoid conflicts 1006 * with implicit binds) in the transport provider. 1007 */ 1008 sti->sti_ux_laddr.soua_vp = 1009 (void *)sti->sti_ux_bound_vp; 1010 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1011 addr = &sti->sti_ux_laddr; 1012 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1013 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1014 addrlen, 1015 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1016 break; 1017 } 1018 } /* end switch (so->so_family) */ 1019 } 1020 1021 /* 1022 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1023 * the transport can start passing up T_CONN_IND messages 1024 * as soon as it receives the bind req and strsock_proto() 1025 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1026 */ 1027 if (flags & _SOBIND_LISTEN) { 1028 if ((so->so_state & SS_ACCEPTCONN) == 0) 1029 clear_acceptconn_on_err = B_TRUE; 1030 save_so_backlog = so->so_backlog; 1031 restore_backlog_on_err = B_TRUE; 1032 so->so_state |= SS_ACCEPTCONN; 1033 so->so_backlog = backlog; 1034 } 1035 1036 /* 1037 * If NL7C addr(s) have been configured check for addr/port match, 1038 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1039 * 1040 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1041 * family sockets only. If match mark as such. 1042 */ 1043 if (nl7c_enabled && ((addr != NULL && 1044 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1045 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1046 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1047 /* 1048 * NL7C is not supported in non-global zones, 1049 * we enforce this restriction here. 1050 */ 1051 if (so->so_zoneid == GLOBAL_ZONEID) { 1052 /* An NL7C socket, mark it */ 1053 sti->sti_nl7c_flags |= NL7C_ENABLED; 1054 if (nl7c == NULL) { 1055 /* 1056 * Was an AF_NCA bind() so add it to the 1057 * addr list for reporting purposes. 1058 */ 1059 nl7c = nl7c_add_addr(addr, addrlen); 1060 } 1061 } else 1062 nl7c = NULL; 1063 } 1064 1065 /* 1066 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1067 * for other transports we will send in a O_T_BIND_REQ. 1068 */ 1069 if (tcp_udp_xport && 1070 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1071 PRIM_type = T_BIND_REQ; 1072 1073 bind_req.PRIM_type = PRIM_type; 1074 bind_req.ADDR_length = addrlen; 1075 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1076 bind_req.CONIND_number = backlog; 1077 /* NOTE: holding so_lock while sleeping */ 1078 mp = soallocproto2(&bind_req, sizeof (bind_req), 1079 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1080 sti->sti_laddr_valid = 0; 1081 1082 /* Done using sti_laddr_sa - can drop the lock */ 1083 mutex_exit(&so->so_lock); 1084 1085 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1086 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1087 if (error) { 1088 eprintsoline(so, error); 1089 mutex_enter(&so->so_lock); 1090 goto done; 1091 } 1092 1093 mutex_enter(&so->so_lock); 1094 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1095 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1096 if (error) { 1097 eprintsoline(so, error); 1098 goto done; 1099 } 1100 ASSERT(mp); 1101 /* 1102 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1103 * strsock_proto while the lock was dropped above, the bind 1104 * is allowed to complete. 1105 */ 1106 1107 /* Mark as bound. This will be undone if we detect errors below. */ 1108 if (flags & _SOBIND_NOXLATE) { 1109 ASSERT(so->so_family == AF_UNIX); 1110 sti->sti_faddr_noxlate = 1; 1111 } 1112 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1113 so->so_state |= SS_ISBOUND; 1114 ASSERT(sti->sti_unbind_mp); 1115 1116 /* note that we've already set SS_ACCEPTCONN above */ 1117 1118 /* 1119 * Recompute addrlen - an unspecied bind sent down an 1120 * address of length zero but we expect the appropriate length 1121 * in return. 1122 */ 1123 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1124 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1125 1126 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1127 /* 1128 * The alignment restriction is really too strict but 1129 * we want enough alignment to inspect the fields of 1130 * a sockaddr_in. 1131 */ 1132 addr = sogetoff(mp, bind_ack->ADDR_offset, 1133 bind_ack->ADDR_length, 1134 __TPI_ALIGN_SIZE); 1135 if (addr == NULL) { 1136 freemsg(mp); 1137 error = EPROTO; 1138 eprintsoline(so, error); 1139 goto done; 1140 } 1141 if (!(flags & _SOBIND_UNSPEC)) { 1142 /* 1143 * Verify that the transport didn't return something we 1144 * did not want e.g. an address other than what we asked for. 1145 * 1146 * NOTE: These checks would go away if/when we switch to 1147 * using the new TPI (in which the transport would fail 1148 * the request instead of assigning a different address). 1149 * 1150 * NOTE2: For protocols that we don't know (i.e. any 1151 * other than AF_INET6, AF_INET and AF_UNIX), we 1152 * cannot know if the transport should be expected to 1153 * return the same address as that requested. 1154 * 1155 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1156 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1157 * 1158 * For example, in the case of netatalk it may be 1159 * inappropriate for the transport to return the 1160 * requested address (as it may have allocated a local 1161 * port number in behaviour similar to that of an 1162 * AF_INET bind request with a port number of zero). 1163 * 1164 * Given the definition of O_T_BIND_REQ, where the 1165 * transport may bind to an address other than the 1166 * requested address, it's not possible to determine 1167 * whether a returned address that differs from the 1168 * requested address is a reason to fail (because the 1169 * requested address was not available) or succeed 1170 * (because the transport allocated an appropriate 1171 * address and/or port). 1172 * 1173 * sockfs currently requires that the transport return 1174 * the requested address in the T_BIND_ACK, unless 1175 * there is code here to allow for any discrepancy. 1176 * Such code exists for AF_INET and AF_INET6. 1177 * 1178 * Netatalk chooses to return the requested address 1179 * rather than the (correct) allocated address. This 1180 * means that netatalk violates the TPI specification 1181 * (and would not function correctly if used from a 1182 * TLI application), but it does mean that it works 1183 * with sockfs. 1184 * 1185 * As noted above, using the newer XTI bind primitive 1186 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1187 * allow sockfs to be more sure about whether or not 1188 * the bind request had succeeded (as transports are 1189 * not permitted to bind to a different address than 1190 * that requested - they must return failure). 1191 * Unfortunately, support for T_BIND_REQ may not be 1192 * present in all transport implementations (netatalk, 1193 * for example, doesn't have it), making the 1194 * transition difficult. 1195 */ 1196 if (bind_ack->ADDR_length != addrlen) { 1197 /* Assumes that the requested address was in use */ 1198 freemsg(mp); 1199 error = EADDRINUSE; 1200 eprintsoline(so, error); 1201 goto done; 1202 } 1203 1204 switch (so->so_family) { 1205 case AF_INET6: 1206 case AF_INET: { 1207 sin_t *rname, *aname; 1208 1209 rname = (sin_t *)addr; 1210 aname = (sin_t *)sti->sti_laddr_sa; 1211 1212 /* 1213 * Take advantage of the alignment 1214 * of sin_port and sin6_port which fall 1215 * in the same place in their data structures. 1216 * Just use sin_port for either address family. 1217 * 1218 * This may become a problem if (heaven forbid) 1219 * there's a separate ipv6port_reserved... :-P 1220 * 1221 * Binding to port 0 has the semantics of letting 1222 * the transport bind to any port. 1223 * 1224 * If the transport is TCP or UDP since we had sent 1225 * a T_BIND_REQ we would not get a port other than 1226 * what we asked for. 1227 */ 1228 if (tcp_udp_xport) { 1229 /* 1230 * Pick up the new port number if we bound to 1231 * port 0. 1232 */ 1233 if (aname->sin_port == 0) 1234 aname->sin_port = rname->sin_port; 1235 sti->sti_laddr_valid = 1; 1236 break; 1237 } 1238 if (aname->sin_port != 0 && 1239 aname->sin_port != rname->sin_port) { 1240 freemsg(mp); 1241 error = EADDRINUSE; 1242 eprintsoline(so, error); 1243 goto done; 1244 } 1245 /* 1246 * Pick up the new port number if we bound to port 0. 1247 */ 1248 aname->sin_port = rname->sin_port; 1249 1250 /* 1251 * Unfortunately, addresses aren't _quite_ the same. 1252 */ 1253 if (so->so_family == AF_INET) { 1254 if (aname->sin_addr.s_addr != 1255 rname->sin_addr.s_addr) { 1256 freemsg(mp); 1257 error = EADDRNOTAVAIL; 1258 eprintsoline(so, error); 1259 goto done; 1260 } 1261 } else { 1262 sin6_t *rname6 = (sin6_t *)rname; 1263 sin6_t *aname6 = (sin6_t *)aname; 1264 1265 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1266 &rname6->sin6_addr)) { 1267 freemsg(mp); 1268 error = EADDRNOTAVAIL; 1269 eprintsoline(so, error); 1270 goto done; 1271 } 1272 } 1273 break; 1274 } 1275 case AF_UNIX: 1276 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1277 freemsg(mp); 1278 error = EADDRINUSE; 1279 eprintsoline(so, error); 1280 eprintso(so, 1281 ("addrlen %d, addr 0x%x, vp %p\n", 1282 addrlen, *((int *)addr), 1283 (void *)sti->sti_ux_bound_vp)); 1284 goto done; 1285 } 1286 sti->sti_laddr_valid = 1; 1287 break; 1288 default: 1289 /* 1290 * NOTE: This assumes that addresses can be 1291 * byte-compared for equivalence. 1292 */ 1293 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1294 freemsg(mp); 1295 error = EADDRINUSE; 1296 eprintsoline(so, error); 1297 goto done; 1298 } 1299 /* 1300 * Don't mark sti_laddr_valid, as we cannot be 1301 * sure that the returned address is the real 1302 * bound address when talking to an unknown 1303 * transport. 1304 */ 1305 break; 1306 } 1307 } else { 1308 /* 1309 * Save for returned address for getsockname. 1310 * Needed for unspecific bind unless transport supports 1311 * the TI_GETMYNAME ioctl. 1312 * Do this for AF_INET{,6} even though they do, as 1313 * caching info here is much better performance than 1314 * a TPI/STREAMS trip to the transport for getsockname. 1315 * Any which can't for some reason _must_ _not_ set 1316 * sti_laddr_valid here for the caching version of 1317 * getsockname to not break; 1318 */ 1319 switch (so->so_family) { 1320 case AF_UNIX: 1321 /* 1322 * Record the address bound with the transport 1323 * for use by socketpair. 1324 */ 1325 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1326 sti->sti_laddr_valid = 1; 1327 break; 1328 case AF_INET: 1329 case AF_INET6: 1330 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1331 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1332 sti->sti_laddr_valid = 1; 1333 break; 1334 default: 1335 /* 1336 * Don't mark sti_laddr_valid, as we cannot be 1337 * sure that the returned address is the real 1338 * bound address when talking to an unknown 1339 * transport. 1340 */ 1341 break; 1342 } 1343 } 1344 1345 if (nl7c != NULL) { 1346 /* Register listen()er sonode pointer with NL7C */ 1347 nl7c_listener_addr(nl7c, so); 1348 } 1349 1350 freemsg(mp); 1351 1352 done: 1353 if (error) { 1354 /* reset state & backlog to values held on entry */ 1355 if (clear_acceptconn_on_err == B_TRUE) 1356 so->so_state &= ~SS_ACCEPTCONN; 1357 if (restore_backlog_on_err == B_TRUE) 1358 so->so_backlog = save_so_backlog; 1359 1360 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1361 int err; 1362 1363 err = sotpi_unbind(so, 0); 1364 /* LINTED - statement has no consequent: if */ 1365 if (err) { 1366 eprintsoline(so, error); 1367 } else { 1368 ASSERT(!(so->so_state & SS_ISBOUND)); 1369 } 1370 } 1371 } 1372 if (!(flags & _SOBIND_LOCK_HELD)) { 1373 so_unlock_single(so, SOLOCKED); 1374 mutex_exit(&so->so_lock); 1375 } else { 1376 ASSERT(MUTEX_HELD(&so->so_lock)); 1377 ASSERT(so->so_flag & SOLOCKED); 1378 } 1379 return (error); 1380 } 1381 1382 /* bind the socket */ 1383 static int 1384 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1385 int flags, struct cred *cr) 1386 { 1387 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1388 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1389 1390 flags &= ~_SOBIND_SOCKETPAIR; 1391 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1392 } 1393 1394 /* 1395 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1396 * address, or when listen needs to unbind and bind. 1397 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1398 * so that a sobind can pick them up. 1399 */ 1400 static int 1401 sotpi_unbind(struct sonode *so, int flags) 1402 { 1403 struct T_unbind_req unbind_req; 1404 int error = 0; 1405 mblk_t *mp; 1406 sotpi_info_t *sti = SOTOTPI(so); 1407 1408 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1409 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1410 1411 ASSERT(MUTEX_HELD(&so->so_lock)); 1412 ASSERT(so->so_flag & SOLOCKED); 1413 1414 if (!(so->so_state & SS_ISBOUND)) { 1415 error = EINVAL; 1416 eprintsoline(so, error); 1417 goto done; 1418 } 1419 1420 mutex_exit(&so->so_lock); 1421 1422 /* 1423 * Flush the read and write side (except stream head read queue) 1424 * and send down T_UNBIND_REQ. 1425 */ 1426 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1427 1428 unbind_req.PRIM_type = T_UNBIND_REQ; 1429 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1430 0, _ALLOC_SLEEP, CRED()); 1431 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1432 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1433 mutex_enter(&so->so_lock); 1434 if (error) { 1435 eprintsoline(so, error); 1436 goto done; 1437 } 1438 1439 error = sowaitokack(so, T_UNBIND_REQ); 1440 if (error) { 1441 eprintsoline(so, error); 1442 goto done; 1443 } 1444 1445 /* 1446 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1447 * strsock_proto while the lock was dropped above, the unbind 1448 * is allowed to complete. 1449 */ 1450 if (!(flags & _SOUNBIND_REBIND)) { 1451 /* 1452 * Clear out bound address. 1453 */ 1454 vnode_t *vp; 1455 1456 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1457 sti->sti_ux_bound_vp = NULL; 1458 vn_rele_stream(vp); 1459 } 1460 /* Clear out address */ 1461 sti->sti_laddr_len = 0; 1462 } 1463 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1464 sti->sti_laddr_valid = 0; 1465 1466 done: 1467 1468 /* If the caller held the lock don't release it here */ 1469 ASSERT(MUTEX_HELD(&so->so_lock)); 1470 ASSERT(so->so_flag & SOLOCKED); 1471 1472 return (error); 1473 } 1474 1475 /* 1476 * listen on the socket. 1477 * For TPI conforming transports this has to first unbind with the transport 1478 * and then bind again using the new backlog. 1479 */ 1480 /* ARGSUSED */ 1481 int 1482 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1483 { 1484 int error = 0; 1485 sotpi_info_t *sti = SOTOTPI(so); 1486 1487 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1488 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1489 1490 if (sti->sti_serv_type == T_CLTS) 1491 return (EOPNOTSUPP); 1492 1493 /* 1494 * If the socket is ready to accept connections already, then 1495 * return without doing anything. This avoids a problem where 1496 * a second listen() call fails if a connection is pending and 1497 * leaves the socket unbound. Only when we are not unbinding 1498 * with the transport can we safely increase the backlog. 1499 */ 1500 if (so->so_state & SS_ACCEPTCONN && 1501 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1502 /*CONSTCOND*/ 1503 !solisten_tpi_tcp)) 1504 return (0); 1505 1506 if (so->so_state & SS_ISCONNECTED) 1507 return (EINVAL); 1508 1509 mutex_enter(&so->so_lock); 1510 so_lock_single(so); /* Set SOLOCKED */ 1511 1512 /* 1513 * If the listen doesn't change the backlog we do nothing. 1514 * This avoids an EPROTO error from the transport. 1515 */ 1516 if ((so->so_state & SS_ACCEPTCONN) && 1517 so->so_backlog == backlog) 1518 goto done; 1519 1520 if (!(so->so_state & SS_ISBOUND)) { 1521 /* 1522 * Must have been explicitly bound in the UNIX domain. 1523 */ 1524 if (so->so_family == AF_UNIX) { 1525 error = EINVAL; 1526 goto done; 1527 } 1528 error = sotpi_bindlisten(so, NULL, 0, backlog, 1529 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1530 } else if (backlog > 0) { 1531 /* 1532 * AF_INET{,6} hack to avoid losing the port. 1533 * Assumes that all AF_INET{,6} transports can handle a 1534 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1535 * has already bound thus it is possible to avoid the unbind. 1536 */ 1537 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1538 /*CONSTCOND*/ 1539 !solisten_tpi_tcp)) { 1540 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1541 if (error) 1542 goto done; 1543 } 1544 error = sotpi_bindlisten(so, NULL, 0, backlog, 1545 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1546 } else { 1547 so->so_state |= SS_ACCEPTCONN; 1548 so->so_backlog = backlog; 1549 } 1550 if (error) 1551 goto done; 1552 ASSERT(so->so_state & SS_ACCEPTCONN); 1553 done: 1554 so_unlock_single(so, SOLOCKED); 1555 mutex_exit(&so->so_lock); 1556 return (error); 1557 } 1558 1559 /* 1560 * Disconnect either a specified seqno or all (-1). 1561 * The former is used on listening sockets only. 1562 * 1563 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1564 * the current use of sodisconnect(seqno == -1) is only for shutdown 1565 * so there is no point (and potentially incorrect) to unbind. 1566 */ 1567 static int 1568 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1569 { 1570 struct T_discon_req discon_req; 1571 int error = 0; 1572 mblk_t *mp; 1573 1574 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1575 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1576 1577 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1578 mutex_enter(&so->so_lock); 1579 so_lock_single(so); /* Set SOLOCKED */ 1580 } else { 1581 ASSERT(MUTEX_HELD(&so->so_lock)); 1582 ASSERT(so->so_flag & SOLOCKED); 1583 } 1584 1585 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1586 error = EINVAL; 1587 eprintsoline(so, error); 1588 goto done; 1589 } 1590 1591 mutex_exit(&so->so_lock); 1592 /* 1593 * Flush the write side (unless this is a listener) 1594 * and then send down a T_DISCON_REQ. 1595 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1596 * and other messages.) 1597 */ 1598 if (!(so->so_state & SS_ACCEPTCONN)) 1599 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1600 1601 discon_req.PRIM_type = T_DISCON_REQ; 1602 discon_req.SEQ_number = seqno; 1603 mp = soallocproto1(&discon_req, sizeof (discon_req), 1604 0, _ALLOC_SLEEP, CRED()); 1605 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1606 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1607 mutex_enter(&so->so_lock); 1608 if (error) { 1609 eprintsoline(so, error); 1610 goto done; 1611 } 1612 1613 error = sowaitokack(so, T_DISCON_REQ); 1614 if (error) { 1615 eprintsoline(so, error); 1616 goto done; 1617 } 1618 /* 1619 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1620 * strsock_proto while the lock was dropped above, the disconnect 1621 * is allowed to complete. However, it is not possible to 1622 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1623 */ 1624 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1625 SOTOTPI(so)->sti_laddr_valid = 0; 1626 SOTOTPI(so)->sti_faddr_valid = 0; 1627 done: 1628 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1629 so_unlock_single(so, SOLOCKED); 1630 mutex_exit(&so->so_lock); 1631 } else { 1632 /* If the caller held the lock don't release it here */ 1633 ASSERT(MUTEX_HELD(&so->so_lock)); 1634 ASSERT(so->so_flag & SOLOCKED); 1635 } 1636 return (error); 1637 } 1638 1639 /* ARGSUSED */ 1640 int 1641 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1642 struct sonode **nsop) 1643 { 1644 struct T_conn_ind *conn_ind; 1645 struct T_conn_res *conn_res; 1646 int error = 0; 1647 mblk_t *mp, *ack_mp; 1648 struct sonode *nso; 1649 vnode_t *nvp; 1650 void *src; 1651 t_uscalar_t srclen; 1652 void *opt; 1653 t_uscalar_t optlen; 1654 t_scalar_t PRIM_type; 1655 t_scalar_t SEQ_number; 1656 size_t sinlen; 1657 sotpi_info_t *sti = SOTOTPI(so); 1658 sotpi_info_t *nsti; 1659 1660 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1661 (void *)so, fflag, (void *)nsop, 1662 pr_state(so->so_state, so->so_mode))); 1663 1664 /* 1665 * Defer single-threading the accepting socket until 1666 * the T_CONN_IND has been received and parsed and the 1667 * new sonode has been opened. 1668 */ 1669 1670 /* Check that we are not already connected */ 1671 if ((so->so_state & SS_ACCEPTCONN) == 0) 1672 goto conn_bad; 1673 again: 1674 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1675 goto e_bad; 1676 1677 ASSERT(mp != NULL); 1678 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1679 1680 /* 1681 * Save SEQ_number for error paths. 1682 */ 1683 SEQ_number = conn_ind->SEQ_number; 1684 1685 srclen = conn_ind->SRC_length; 1686 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1687 if (src == NULL) { 1688 error = EPROTO; 1689 freemsg(mp); 1690 eprintsoline(so, error); 1691 goto disconnect_unlocked; 1692 } 1693 optlen = conn_ind->OPT_length; 1694 switch (so->so_family) { 1695 case AF_INET: 1696 case AF_INET6: 1697 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1698 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1699 &opt, conn_ind->OPT_length); 1700 } else { 1701 /* 1702 * The transport (in this case TCP) hasn't sent up 1703 * a pointer to an instance for the accept fast-path. 1704 * Disable fast-path completely because the call to 1705 * sotpi_create() below would otherwise create an 1706 * incomplete TCP instance, which would lead to 1707 * problems when sockfs sends a normal T_CONN_RES 1708 * message down the new stream. 1709 */ 1710 if (sti->sti_direct) { 1711 int rval; 1712 /* 1713 * For consistency we inform tcp to disable 1714 * direct interface on the listener, though 1715 * we can certainly live without doing this 1716 * because no data will ever travel upstream 1717 * on the listening socket. 1718 */ 1719 sti->sti_direct = 0; 1720 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1721 0, 0, K_TO_K, cr, &rval); 1722 } 1723 opt = NULL; 1724 optlen = 0; 1725 } 1726 break; 1727 case AF_UNIX: 1728 default: 1729 if (optlen != 0) { 1730 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1731 __TPI_ALIGN_SIZE); 1732 if (opt == NULL) { 1733 error = EPROTO; 1734 freemsg(mp); 1735 eprintsoline(so, error); 1736 goto disconnect_unlocked; 1737 } 1738 } 1739 if (so->so_family == AF_UNIX) { 1740 if (!sti->sti_faddr_noxlate) { 1741 src = NULL; 1742 srclen = 0; 1743 } 1744 /* Extract src address from options */ 1745 if (optlen != 0) 1746 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1747 } 1748 break; 1749 } 1750 1751 /* 1752 * Create the new socket. 1753 */ 1754 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1755 if (nso == NULL) { 1756 ASSERT(error != 0); 1757 /* 1758 * Accept can not fail with ENOBUFS. sotpi_create 1759 * sleeps waiting for memory until a signal is caught 1760 * so return EINTR. 1761 */ 1762 freemsg(mp); 1763 if (error == ENOBUFS) 1764 error = EINTR; 1765 goto e_disc_unl; 1766 } 1767 nvp = SOTOV(nso); 1768 nsti = SOTOTPI(nso); 1769 1770 #ifdef DEBUG 1771 /* 1772 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1773 * it's inherited early to allow debugging of the accept code itself. 1774 */ 1775 nso->so_options |= so->so_options & SO_DEBUG; 1776 #endif /* DEBUG */ 1777 1778 /* 1779 * Save the SRC address from the T_CONN_IND 1780 * for getpeername to work on AF_UNIX and on transports that do not 1781 * support TI_GETPEERNAME. 1782 * 1783 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1784 * copyin_name(). 1785 */ 1786 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1787 error = EINVAL; 1788 freemsg(mp); 1789 eprintsoline(so, error); 1790 goto disconnect_vp_unlocked; 1791 } 1792 nsti->sti_faddr_len = (socklen_t)srclen; 1793 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1794 bcopy(src, nsti->sti_faddr_sa, srclen); 1795 nsti->sti_faddr_valid = 1; 1796 1797 /* 1798 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1799 */ 1800 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1801 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1802 cred_t *cr; 1803 pid_t cpid; 1804 1805 cr = msg_getcred(mp, &cpid); 1806 if (cr != NULL) { 1807 crhold(cr); 1808 nso->so_peercred = cr; 1809 nso->so_cpid = cpid; 1810 } 1811 freemsg(mp); 1812 1813 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1814 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1815 if (mp == NULL) { 1816 /* 1817 * Accept can not fail with ENOBUFS. 1818 * A signal was caught so return EINTR. 1819 */ 1820 error = EINTR; 1821 eprintsoline(so, error); 1822 goto disconnect_vp_unlocked; 1823 } 1824 conn_res = (struct T_conn_res *)mp->b_rptr; 1825 } else { 1826 /* 1827 * For efficency reasons we use msg_extractcred; no crhold 1828 * needed since db_credp is cleared (i.e., we move the cred 1829 * from the message to so_peercred. 1830 */ 1831 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1832 1833 mp->b_rptr = DB_BASE(mp); 1834 conn_res = (struct T_conn_res *)mp->b_rptr; 1835 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1836 1837 mblk_setcred(mp, cr, curproc->p_pid); 1838 } 1839 1840 /* 1841 * New socket must be bound at least in sockfs and, except for AF_INET, 1842 * (or AF_INET6) it also has to be bound in the transport provider. 1843 * We set the local address in the sonode from the T_OK_ACK of the 1844 * T_CONN_RES. For this reason the address we bind to here isn't 1845 * important. 1846 */ 1847 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1848 /*CONSTCOND*/ 1849 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1850 /* 1851 * Optimization for AF_INET{,6} transports 1852 * that can handle a T_CONN_RES without being bound. 1853 */ 1854 mutex_enter(&nso->so_lock); 1855 so_automatic_bind(nso); 1856 mutex_exit(&nso->so_lock); 1857 } else { 1858 /* Perform NULL bind with the transport provider. */ 1859 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1860 cr)) != 0) { 1861 ASSERT(error != ENOBUFS); 1862 freemsg(mp); 1863 eprintsoline(nso, error); 1864 goto disconnect_vp_unlocked; 1865 } 1866 } 1867 1868 /* 1869 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1870 * so that any data arriving on the new socket will cause the 1871 * appropriate signals to be delivered for the new socket. 1872 * 1873 * No other thread (except strsock_proto and strsock_misc) 1874 * can access the new socket thus we relax the locking. 1875 */ 1876 nso->so_pgrp = so->so_pgrp; 1877 nso->so_state |= so->so_state & SS_ASYNC; 1878 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1879 1880 if (nso->so_pgrp != 0) { 1881 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1882 eprintsoline(nso, error); 1883 error = 0; 1884 nso->so_pgrp = 0; 1885 } 1886 } 1887 1888 /* 1889 * Make note of the socket level options. TCP and IP level options 1890 * are already inherited. We could do all this after accept is 1891 * successful but doing it here simplifies code and no harm done 1892 * for error case. 1893 */ 1894 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1895 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1896 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1897 nso->so_sndbuf = so->so_sndbuf; 1898 nso->so_rcvbuf = so->so_rcvbuf; 1899 if (nso->so_options & SO_LINGER) 1900 nso->so_linger = so->so_linger; 1901 1902 /* 1903 * Note that the following sti_direct code path should be 1904 * removed once we are confident that the direct sockets 1905 * do not result in any degradation. 1906 */ 1907 if (sti->sti_direct) { 1908 1909 ASSERT(opt != NULL); 1910 1911 conn_res->OPT_length = optlen; 1912 conn_res->OPT_offset = MBLKL(mp); 1913 bcopy(&opt, mp->b_wptr, optlen); 1914 mp->b_wptr += optlen; 1915 conn_res->PRIM_type = T_CONN_RES; 1916 conn_res->ACCEPTOR_id = 0; 1917 PRIM_type = T_CONN_RES; 1918 1919 /* Send down the T_CONN_RES on acceptor STREAM */ 1920 error = kstrputmsg(SOTOV(nso), mp, NULL, 1921 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1922 if (error) { 1923 mutex_enter(&so->so_lock); 1924 so_lock_single(so); 1925 eprintsoline(so, error); 1926 goto disconnect_vp; 1927 } 1928 mutex_enter(&nso->so_lock); 1929 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1930 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1931 if (error) { 1932 mutex_exit(&nso->so_lock); 1933 mutex_enter(&so->so_lock); 1934 so_lock_single(so); 1935 eprintsoline(so, error); 1936 goto disconnect_vp; 1937 } 1938 if (nso->so_family == AF_INET) { 1939 sin_t *sin; 1940 1941 sin = (sin_t *)(ack_mp->b_rptr + 1942 sizeof (struct T_ok_ack)); 1943 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1944 nsti->sti_laddr_len = sizeof (sin_t); 1945 } else { 1946 sin6_t *sin6; 1947 1948 sin6 = (sin6_t *)(ack_mp->b_rptr + 1949 sizeof (struct T_ok_ack)); 1950 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1951 nsti->sti_laddr_len = sizeof (sin6_t); 1952 } 1953 freemsg(ack_mp); 1954 1955 nso->so_state |= SS_ISCONNECTED; 1956 nso->so_proto_handle = (sock_lower_handle_t)opt; 1957 nsti->sti_laddr_valid = 1; 1958 1959 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 1960 /* 1961 * A NL7C marked listen()er so the new socket 1962 * inherits the listen()er's NL7C state, except 1963 * for NL7C_POLLIN. 1964 * 1965 * Only call NL7C to process the new socket if 1966 * the listen socket allows blocking i/o. 1967 */ 1968 nsti->sti_nl7c_flags = 1969 sti->sti_nl7c_flags & (~NL7C_POLLIN); 1970 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1971 /* 1972 * Nonblocking accept() just make it 1973 * persist to defer processing to the 1974 * read-side syscall (e.g. read). 1975 */ 1976 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 1977 } else if (nl7c_process(nso, B_FALSE)) { 1978 /* 1979 * NL7C has completed processing on the 1980 * socket, close the socket and back to 1981 * the top to await the next T_CONN_IND. 1982 */ 1983 mutex_exit(&nso->so_lock); 1984 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1985 cr, NULL); 1986 VN_RELE(nvp); 1987 goto again; 1988 } 1989 /* Pass the new socket out */ 1990 } 1991 1992 mutex_exit(&nso->so_lock); 1993 1994 /* 1995 * It's possible, through the use of autopush for example, 1996 * that the acceptor stream may not support sti_direct 1997 * semantics. If the new socket does not support sti_direct 1998 * we issue a _SIOCSOCKFALLBACK to inform the transport 1999 * as we would in the I_PUSH case. 2000 */ 2001 if (nsti->sti_direct == 0) { 2002 int rval; 2003 2004 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2005 0, 0, K_TO_K, cr, &rval)) != 0) { 2006 mutex_enter(&so->so_lock); 2007 so_lock_single(so); 2008 eprintsoline(so, error); 2009 goto disconnect_vp; 2010 } 2011 } 2012 2013 /* 2014 * Pass out new socket. 2015 */ 2016 if (nsop != NULL) 2017 *nsop = nso; 2018 2019 return (0); 2020 } 2021 2022 /* 2023 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2024 * which don't support the FireEngine accept fast-path. It is also 2025 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2026 * again. Neither sockfs nor TCP attempt to find out if some other 2027 * random module has been inserted in between (in which case we 2028 * should follow TLI accept behaviour). We blindly assume the worst 2029 * case and revert back to old behaviour i.e. TCP will not send us 2030 * any option (eager) and the accept should happen on the listener 2031 * queue. Any queued T_conn_ind have already got their options removed 2032 * by so_sock2_stream() when "sockmod" was I_POP'd. 2033 */ 2034 /* 2035 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2036 */ 2037 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2038 #ifdef _ILP32 2039 queue_t *q; 2040 2041 /* 2042 * Find read queue in driver 2043 * Can safely do this since we "own" nso/nvp. 2044 */ 2045 q = strvp2wq(nvp)->q_next; 2046 while (SAMESTR(q)) 2047 q = q->q_next; 2048 q = RD(q); 2049 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2050 #else 2051 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2052 #endif /* _ILP32 */ 2053 conn_res->PRIM_type = O_T_CONN_RES; 2054 PRIM_type = O_T_CONN_RES; 2055 } else { 2056 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2057 conn_res->PRIM_type = T_CONN_RES; 2058 PRIM_type = T_CONN_RES; 2059 } 2060 conn_res->SEQ_number = SEQ_number; 2061 conn_res->OPT_length = 0; 2062 conn_res->OPT_offset = 0; 2063 2064 mutex_enter(&so->so_lock); 2065 so_lock_single(so); /* Set SOLOCKED */ 2066 mutex_exit(&so->so_lock); 2067 2068 error = kstrputmsg(SOTOV(so), mp, NULL, 2069 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2070 mutex_enter(&so->so_lock); 2071 if (error) { 2072 eprintsoline(so, error); 2073 goto disconnect_vp; 2074 } 2075 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2076 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2077 if (error) { 2078 eprintsoline(so, error); 2079 goto disconnect_vp; 2080 } 2081 mutex_exit(&so->so_lock); 2082 /* 2083 * If there is a sin/sin6 appended onto the T_OK_ACK use 2084 * that to set the local address. If this is not present 2085 * then we zero out the address and don't set the 2086 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2087 * the pathname from the listening socket. 2088 * In the case where this is TCP or an AF_UNIX socket the 2089 * client side may have queued data or a T_ORDREL in the 2090 * transport. Having now sent the T_CONN_RES we may receive 2091 * those queued messages at any time. Hold the acceptor 2092 * so_lock until its state and laddr are finalized. 2093 */ 2094 mutex_enter(&nso->so_lock); 2095 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2096 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2097 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2098 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2099 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2100 nsti->sti_laddr_len = sinlen; 2101 nsti->sti_laddr_valid = 1; 2102 } else if (nso->so_family == AF_UNIX) { 2103 ASSERT(so->so_family == AF_UNIX); 2104 nsti->sti_laddr_len = sti->sti_laddr_len; 2105 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2106 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2107 nsti->sti_laddr_len); 2108 nsti->sti_laddr_valid = 1; 2109 } else { 2110 nsti->sti_laddr_len = sti->sti_laddr_len; 2111 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2112 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2113 nsti->sti_laddr_sa->sa_family = nso->so_family; 2114 } 2115 nso->so_state |= SS_ISCONNECTED; 2116 mutex_exit(&nso->so_lock); 2117 2118 freemsg(ack_mp); 2119 2120 mutex_enter(&so->so_lock); 2121 so_unlock_single(so, SOLOCKED); 2122 mutex_exit(&so->so_lock); 2123 2124 /* 2125 * Pass out new socket. 2126 */ 2127 if (nsop != NULL) 2128 *nsop = nso; 2129 2130 return (0); 2131 2132 2133 eproto_disc_unl: 2134 error = EPROTO; 2135 e_disc_unl: 2136 eprintsoline(so, error); 2137 goto disconnect_unlocked; 2138 2139 pr_disc_vp_unl: 2140 eprintsoline(so, error); 2141 disconnect_vp_unlocked: 2142 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2143 VN_RELE(nvp); 2144 disconnect_unlocked: 2145 (void) sodisconnect(so, SEQ_number, 0); 2146 return (error); 2147 2148 pr_disc_vp: 2149 eprintsoline(so, error); 2150 disconnect_vp: 2151 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2152 so_unlock_single(so, SOLOCKED); 2153 mutex_exit(&so->so_lock); 2154 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2155 VN_RELE(nvp); 2156 return (error); 2157 2158 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2159 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2160 ? EOPNOTSUPP : EINVAL; 2161 e_bad: 2162 eprintsoline(so, error); 2163 return (error); 2164 } 2165 2166 /* 2167 * connect a socket. 2168 * 2169 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2170 * unconnect (by specifying a null address). 2171 */ 2172 int 2173 sotpi_connect(struct sonode *so, 2174 struct sockaddr *name, 2175 socklen_t namelen, 2176 int fflag, 2177 int flags, 2178 struct cred *cr) 2179 { 2180 struct T_conn_req conn_req; 2181 int error = 0; 2182 mblk_t *mp; 2183 void *src; 2184 socklen_t srclen; 2185 void *addr; 2186 socklen_t addrlen; 2187 boolean_t need_unlock; 2188 sotpi_info_t *sti = SOTOTPI(so); 2189 2190 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2191 (void *)so, (void *)name, namelen, fflag, flags, 2192 pr_state(so->so_state, so->so_mode))); 2193 2194 /* 2195 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2196 * avoid sleeping for memory with SOLOCKED held. 2197 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2198 * + sizeof (struct T_opthdr). 2199 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2200 * exceed sti_faddr_maxlen). 2201 */ 2202 mp = soallocproto(sizeof (struct T_conn_req) + 2203 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2204 cr); 2205 if (mp == NULL) { 2206 /* 2207 * Connect can not fail with ENOBUFS. A signal was 2208 * caught so return EINTR. 2209 */ 2210 error = EINTR; 2211 eprintsoline(so, error); 2212 return (error); 2213 } 2214 2215 mutex_enter(&so->so_lock); 2216 /* 2217 * Make sure there is a preallocated T_unbind_req message 2218 * before any binding. This message is allocated when the 2219 * socket is created. Since another thread can consume 2220 * so_unbind_mp by the time we return from so_lock_single(), 2221 * we should check the availability of so_unbind_mp after 2222 * we return from so_lock_single(). 2223 */ 2224 2225 so_lock_single(so); /* Set SOLOCKED */ 2226 need_unlock = B_TRUE; 2227 2228 if (sti->sti_unbind_mp == NULL) { 2229 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2230 /* NOTE: holding so_lock while sleeping */ 2231 sti->sti_unbind_mp = 2232 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2233 if (sti->sti_unbind_mp == NULL) { 2234 error = EINTR; 2235 goto done; 2236 } 2237 } 2238 2239 /* 2240 * Can't have done a listen before connecting. 2241 */ 2242 if (so->so_state & SS_ACCEPTCONN) { 2243 error = EOPNOTSUPP; 2244 goto done; 2245 } 2246 2247 /* 2248 * Must be bound with the transport 2249 */ 2250 if (!(so->so_state & SS_ISBOUND)) { 2251 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2252 /*CONSTCOND*/ 2253 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2254 /* 2255 * Optimization for AF_INET{,6} transports 2256 * that can handle a T_CONN_REQ without being bound. 2257 */ 2258 so_automatic_bind(so); 2259 } else { 2260 error = sotpi_bind(so, NULL, 0, 2261 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2262 if (error) 2263 goto done; 2264 } 2265 ASSERT(so->so_state & SS_ISBOUND); 2266 flags |= _SOCONNECT_DID_BIND; 2267 } 2268 2269 /* 2270 * Handle a connect to a name parameter of type AF_UNSPEC like a 2271 * connect to a null address. This is the portable method to 2272 * unconnect a socket. 2273 */ 2274 if ((namelen >= sizeof (sa_family_t)) && 2275 (name->sa_family == AF_UNSPEC)) { 2276 name = NULL; 2277 namelen = 0; 2278 } 2279 2280 /* 2281 * Check that we are not already connected. 2282 * A connection-oriented socket cannot be reconnected. 2283 * A connected connection-less socket can be 2284 * - connected to a different address by a subsequent connect 2285 * - "unconnected" by a connect to the NULL address 2286 */ 2287 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2288 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2289 if (so->so_mode & SM_CONNREQUIRED) { 2290 /* Connection-oriented socket */ 2291 error = so->so_state & SS_ISCONNECTED ? 2292 EISCONN : EALREADY; 2293 goto done; 2294 } 2295 /* Connection-less socket */ 2296 if (name == NULL) { 2297 /* 2298 * Remove the connected state and clear SO_DGRAM_ERRIND 2299 * since it was set when the socket was connected. 2300 * If this is UDP also send down a T_DISCON_REQ. 2301 */ 2302 int val; 2303 2304 if ((so->so_family == AF_INET || 2305 so->so_family == AF_INET6) && 2306 (so->so_type == SOCK_DGRAM || 2307 so->so_type == SOCK_RAW) && 2308 /*CONSTCOND*/ 2309 !soconnect_tpi_udp) { 2310 /* XXX What about implicitly unbinding here? */ 2311 error = sodisconnect(so, -1, 2312 _SODISCONNECT_LOCK_HELD); 2313 } else { 2314 so->so_state &= 2315 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2316 sti->sti_faddr_valid = 0; 2317 sti->sti_faddr_len = 0; 2318 } 2319 2320 /* Remove SOLOCKED since setsockopt will grab it */ 2321 so_unlock_single(so, SOLOCKED); 2322 mutex_exit(&so->so_lock); 2323 2324 val = 0; 2325 (void) sotpi_setsockopt(so, SOL_SOCKET, 2326 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2327 cr); 2328 2329 mutex_enter(&so->so_lock); 2330 so_lock_single(so); /* Set SOLOCKED */ 2331 goto done; 2332 } 2333 } 2334 ASSERT(so->so_state & SS_ISBOUND); 2335 2336 if (name == NULL || namelen == 0) { 2337 error = EINVAL; 2338 goto done; 2339 } 2340 /* 2341 * Mark the socket if sti_faddr_sa represents the transport level 2342 * address. 2343 */ 2344 if (flags & _SOCONNECT_NOXLATE) { 2345 struct sockaddr_ux *soaddr_ux; 2346 2347 ASSERT(so->so_family == AF_UNIX); 2348 if (namelen != sizeof (struct sockaddr_ux)) { 2349 error = EINVAL; 2350 goto done; 2351 } 2352 soaddr_ux = (struct sockaddr_ux *)name; 2353 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2354 namelen = sizeof (soaddr_ux->sou_addr); 2355 sti->sti_faddr_noxlate = 1; 2356 } 2357 2358 /* 2359 * Length and family checks. 2360 */ 2361 error = so_addr_verify(so, name, namelen); 2362 if (error) 2363 goto bad; 2364 2365 /* 2366 * Save foreign address. Needed for AF_UNIX as well as 2367 * transport providers that do not support TI_GETPEERNAME. 2368 * Also used for cached foreign address for TCP and UDP. 2369 */ 2370 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2371 error = EINVAL; 2372 goto done; 2373 } 2374 sti->sti_faddr_len = (socklen_t)namelen; 2375 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2376 bcopy(name, sti->sti_faddr_sa, namelen); 2377 sti->sti_faddr_valid = 1; 2378 2379 if (so->so_family == AF_UNIX) { 2380 if (sti->sti_faddr_noxlate) { 2381 /* 2382 * sti_faddr is a transport-level address, so 2383 * don't pass it as an option. Do save it in 2384 * sti_ux_faddr, used for connected DG send. 2385 */ 2386 src = NULL; 2387 srclen = 0; 2388 addr = sti->sti_faddr_sa; 2389 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2390 bcopy(addr, &sti->sti_ux_faddr, 2391 sizeof (sti->sti_ux_faddr)); 2392 } else { 2393 /* 2394 * Pass the sockaddr_un source address as an option 2395 * and translate the remote address. 2396 * Holding so_lock thus sti_laddr_sa can not change. 2397 */ 2398 src = sti->sti_laddr_sa; 2399 srclen = (t_uscalar_t)sti->sti_laddr_len; 2400 dprintso(so, 1, 2401 ("sotpi_connect UNIX: srclen %d, src %p\n", 2402 srclen, src)); 2403 /* 2404 * Translate the destination address into our 2405 * internal form, and save it in sti_ux_faddr. 2406 * After this call, addr==&sti->sti_ux_taddr, 2407 * and we copy that to sti->sti_ux_faddr so 2408 * we save the connected peer address. 2409 */ 2410 error = so_ux_addr_xlate(so, 2411 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2412 (flags & _SOCONNECT_XPG4_2), 2413 &addr, &addrlen); 2414 if (error) 2415 goto bad; 2416 bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr, 2417 sizeof (sti->sti_ux_faddr)); 2418 } 2419 } else { 2420 addr = sti->sti_faddr_sa; 2421 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2422 src = NULL; 2423 srclen = 0; 2424 } 2425 /* 2426 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2427 * option which asks the transport provider to send T_UDERR_IND 2428 * messages. These T_UDERR_IND messages are used to return connected 2429 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2430 * 2431 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2432 * we send down a T_CONN_REQ. This is needed to let the 2433 * transport assign a local address that is consistent with 2434 * the remote address. Applications depend on a getsockname() 2435 * after a connect() to retrieve the "source" IP address for 2436 * the connected socket. Invalidate the cached local address 2437 * to force getsockname() to enquire of the transport. 2438 */ 2439 if (!(so->so_mode & SM_CONNREQUIRED)) { 2440 /* 2441 * Datagram socket. 2442 */ 2443 int32_t val; 2444 2445 so_unlock_single(so, SOLOCKED); 2446 mutex_exit(&so->so_lock); 2447 2448 val = 1; 2449 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2450 &val, (t_uscalar_t)sizeof (val), cr); 2451 2452 mutex_enter(&so->so_lock); 2453 so_lock_single(so); /* Set SOLOCKED */ 2454 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2455 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2456 soconnect_tpi_udp) { 2457 soisconnected(so); 2458 goto done; 2459 } 2460 /* 2461 * Send down T_CONN_REQ etc. 2462 * Clear fflag to avoid returning EWOULDBLOCK. 2463 */ 2464 fflag = 0; 2465 ASSERT(so->so_family != AF_UNIX); 2466 sti->sti_laddr_valid = 0; 2467 } else if (sti->sti_laddr_len != 0) { 2468 /* 2469 * If the local address or port was "any" then it may be 2470 * changed by the transport as a result of the 2471 * connect. Invalidate the cached version if we have one. 2472 */ 2473 switch (so->so_family) { 2474 case AF_INET: 2475 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2476 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2477 INADDR_ANY || 2478 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2479 sti->sti_laddr_valid = 0; 2480 break; 2481 2482 case AF_INET6: 2483 ASSERT(sti->sti_laddr_len == 2484 (socklen_t)sizeof (sin6_t)); 2485 if (IN6_IS_ADDR_UNSPECIFIED( 2486 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2487 IN6_IS_ADDR_V4MAPPED_ANY( 2488 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2489 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2490 sti->sti_laddr_valid = 0; 2491 break; 2492 2493 default: 2494 break; 2495 } 2496 } 2497 2498 /* 2499 * Check for failure of an earlier call 2500 */ 2501 if (so->so_error != 0) 2502 goto so_bad; 2503 2504 /* 2505 * Send down T_CONN_REQ. Message was allocated above. 2506 */ 2507 conn_req.PRIM_type = T_CONN_REQ; 2508 conn_req.DEST_length = addrlen; 2509 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2510 if (srclen == 0) { 2511 conn_req.OPT_length = 0; 2512 conn_req.OPT_offset = 0; 2513 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2514 soappendmsg(mp, addr, addrlen); 2515 } else { 2516 /* 2517 * There is a AF_UNIX sockaddr_un to include as a source 2518 * address option. 2519 */ 2520 struct T_opthdr toh; 2521 2522 toh.level = SOL_SOCKET; 2523 toh.name = SO_SRCADDR; 2524 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2525 toh.status = 0; 2526 conn_req.OPT_length = 2527 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2528 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2529 _TPI_ALIGN_TOPT(addrlen)); 2530 2531 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2532 soappendmsg(mp, addr, addrlen); 2533 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2534 soappendmsg(mp, &toh, sizeof (toh)); 2535 soappendmsg(mp, src, srclen); 2536 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2537 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2538 } 2539 /* 2540 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2541 * in order to have the right state when the T_CONN_CON shows up. 2542 */ 2543 soisconnecting(so); 2544 mutex_exit(&so->so_lock); 2545 2546 if (AU_AUDITING()) 2547 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2548 2549 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2550 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2551 mp = NULL; 2552 mutex_enter(&so->so_lock); 2553 if (error != 0) 2554 goto bad; 2555 2556 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2557 goto bad; 2558 2559 /* Allow other threads to access the socket */ 2560 so_unlock_single(so, SOLOCKED); 2561 need_unlock = B_FALSE; 2562 2563 /* 2564 * Wait until we get a T_CONN_CON or an error 2565 */ 2566 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2567 so_lock_single(so); /* Set SOLOCKED */ 2568 need_unlock = B_TRUE; 2569 } 2570 2571 done: 2572 freemsg(mp); 2573 switch (error) { 2574 case EINPROGRESS: 2575 case EALREADY: 2576 case EISCONN: 2577 case EINTR: 2578 /* Non-fatal errors */ 2579 sti->sti_laddr_valid = 0; 2580 /* FALLTHRU */ 2581 case 0: 2582 break; 2583 default: 2584 ASSERT(need_unlock); 2585 /* 2586 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2587 * and invalidate local-address cache 2588 */ 2589 so->so_state &= ~SS_ISCONNECTING; 2590 sti->sti_laddr_valid = 0; 2591 /* A discon_ind might have already unbound us */ 2592 if ((flags & _SOCONNECT_DID_BIND) && 2593 (so->so_state & SS_ISBOUND)) { 2594 int err; 2595 2596 err = sotpi_unbind(so, 0); 2597 /* LINTED - statement has no conseq */ 2598 if (err) { 2599 eprintsoline(so, err); 2600 } 2601 } 2602 break; 2603 } 2604 if (need_unlock) 2605 so_unlock_single(so, SOLOCKED); 2606 mutex_exit(&so->so_lock); 2607 return (error); 2608 2609 so_bad: error = sogeterr(so, B_TRUE); 2610 bad: eprintsoline(so, error); 2611 goto done; 2612 } 2613 2614 /* ARGSUSED */ 2615 int 2616 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2617 { 2618 struct T_ordrel_req ordrel_req; 2619 mblk_t *mp; 2620 uint_t old_state, state_change; 2621 int error = 0; 2622 sotpi_info_t *sti = SOTOTPI(so); 2623 2624 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2625 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2626 2627 mutex_enter(&so->so_lock); 2628 so_lock_single(so); /* Set SOLOCKED */ 2629 2630 /* 2631 * SunOS 4.X has no check for datagram sockets. 2632 * 5.X checks that it is connected (ENOTCONN) 2633 * X/Open requires that we check the connected state. 2634 */ 2635 if (!(so->so_state & SS_ISCONNECTED)) { 2636 if (!xnet_skip_checks) { 2637 error = ENOTCONN; 2638 if (xnet_check_print) { 2639 printf("sockfs: X/Open shutdown check " 2640 "caused ENOTCONN\n"); 2641 } 2642 } 2643 goto done; 2644 } 2645 /* 2646 * Record the current state and then perform any state changes. 2647 * Then use the difference between the old and new states to 2648 * determine which messages need to be sent. 2649 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2650 * duplicate calls to shutdown(). 2651 */ 2652 old_state = so->so_state; 2653 2654 switch (how) { 2655 case 0: 2656 socantrcvmore(so); 2657 break; 2658 case 1: 2659 socantsendmore(so); 2660 break; 2661 case 2: 2662 socantsendmore(so); 2663 socantrcvmore(so); 2664 break; 2665 default: 2666 error = EINVAL; 2667 goto done; 2668 } 2669 2670 /* 2671 * Assumes that the SS_CANT* flags are never cleared in the above code. 2672 */ 2673 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2674 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2675 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2676 2677 switch (state_change) { 2678 case 0: 2679 dprintso(so, 1, 2680 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2681 so->so_state)); 2682 goto done; 2683 2684 case SS_CANTRCVMORE: 2685 mutex_exit(&so->so_lock); 2686 strseteof(SOTOV(so), 1); 2687 /* 2688 * strseteof takes care of read side wakeups, 2689 * pollwakeups, and signals. 2690 */ 2691 /* 2692 * Get the read lock before flushing data to avoid problems 2693 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2694 */ 2695 mutex_enter(&so->so_lock); 2696 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2697 mutex_exit(&so->so_lock); 2698 2699 /* Flush read side queue */ 2700 strflushrq(SOTOV(so), FLUSHALL); 2701 2702 mutex_enter(&so->so_lock); 2703 so_unlock_read(so); /* Clear SOREADLOCKED */ 2704 break; 2705 2706 case SS_CANTSENDMORE: 2707 mutex_exit(&so->so_lock); 2708 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2709 mutex_enter(&so->so_lock); 2710 break; 2711 2712 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2713 mutex_exit(&so->so_lock); 2714 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2715 strseteof(SOTOV(so), 1); 2716 /* 2717 * strseteof takes care of read side wakeups, 2718 * pollwakeups, and signals. 2719 */ 2720 /* 2721 * Get the read lock before flushing data to avoid problems 2722 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2723 */ 2724 mutex_enter(&so->so_lock); 2725 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2726 mutex_exit(&so->so_lock); 2727 2728 /* Flush read side queue */ 2729 strflushrq(SOTOV(so), FLUSHALL); 2730 2731 mutex_enter(&so->so_lock); 2732 so_unlock_read(so); /* Clear SOREADLOCKED */ 2733 break; 2734 } 2735 2736 ASSERT(MUTEX_HELD(&so->so_lock)); 2737 2738 /* 2739 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2740 * was set due to this call and the new state has both of them set: 2741 * Send the AF_UNIX close indication 2742 * For T_COTS send a discon_ind 2743 * 2744 * If cantsend was set due to this call: 2745 * For T_COTSORD send an ordrel_ind 2746 * 2747 * Note that for T_CLTS there is no message sent here. 2748 */ 2749 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2750 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2751 /* 2752 * For SunOS 4.X compatibility we tell the other end 2753 * that we are unable to receive at this point. 2754 */ 2755 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2756 so_unix_close(so); 2757 2758 if (sti->sti_serv_type == T_COTS) 2759 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2760 } 2761 if ((state_change & SS_CANTSENDMORE) && 2762 (sti->sti_serv_type == T_COTS_ORD)) { 2763 /* Send an orderly release */ 2764 ordrel_req.PRIM_type = T_ORDREL_REQ; 2765 2766 mutex_exit(&so->so_lock); 2767 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2768 0, _ALLOC_SLEEP, cr); 2769 /* 2770 * Send down the T_ORDREL_REQ even if there is flow control. 2771 * This prevents shutdown from blocking. 2772 * Note that there is no T_OK_ACK for ordrel_req. 2773 */ 2774 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2775 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2776 mutex_enter(&so->so_lock); 2777 if (error) { 2778 eprintsoline(so, error); 2779 goto done; 2780 } 2781 } 2782 2783 done: 2784 so_unlock_single(so, SOLOCKED); 2785 mutex_exit(&so->so_lock); 2786 return (error); 2787 } 2788 2789 /* 2790 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2791 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2792 * that we have closed. 2793 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2794 * T_UNITDATA_REQ containing the same option. 2795 * 2796 * For SOCK_DGRAM half-connections (somebody connected to this end 2797 * but this end is not connect) we don't know where to send any 2798 * SO_UNIX_CLOSE. 2799 * 2800 * We have to ignore stream head errors just in case there has been 2801 * a shutdown(output). 2802 * Ignore any flow control to try to get the message more quickly to the peer. 2803 * While locally ignoring flow control solves the problem when there 2804 * is only the loopback transport on the stream it would not provide 2805 * the correct AF_UNIX socket semantics when one or more modules have 2806 * been pushed. 2807 */ 2808 void 2809 so_unix_close(struct sonode *so) 2810 { 2811 struct T_opthdr toh; 2812 mblk_t *mp; 2813 sotpi_info_t *sti = SOTOTPI(so); 2814 2815 ASSERT(MUTEX_HELD(&so->so_lock)); 2816 2817 ASSERT(so->so_family == AF_UNIX); 2818 2819 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2820 (SS_ISCONNECTED|SS_ISBOUND)) 2821 return; 2822 2823 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2824 (void *)so, pr_state(so->so_state, so->so_mode))); 2825 2826 toh.level = SOL_SOCKET; 2827 toh.name = SO_UNIX_CLOSE; 2828 2829 /* zero length + header */ 2830 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2831 toh.status = 0; 2832 2833 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2834 struct T_optdata_req tdr; 2835 2836 tdr.PRIM_type = T_OPTDATA_REQ; 2837 tdr.DATA_flag = 0; 2838 2839 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2840 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2841 2842 /* NOTE: holding so_lock while sleeping */ 2843 mp = soallocproto2(&tdr, sizeof (tdr), 2844 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2845 } else { 2846 struct T_unitdata_req tudr; 2847 void *addr; 2848 socklen_t addrlen; 2849 void *src; 2850 socklen_t srclen; 2851 struct T_opthdr toh2; 2852 t_scalar_t size; 2853 2854 /* 2855 * We know this is an AF_UNIX connected DGRAM socket. 2856 * We therefore already have the destination address 2857 * in the internal form needed for this send. This is 2858 * similar to the sosend_dgram call later in this file 2859 * when there's no user-specified destination address. 2860 */ 2861 if (sti->sti_faddr_noxlate) { 2862 /* 2863 * Already have a transport internal address. Do not 2864 * pass any (transport internal) source address. 2865 */ 2866 addr = sti->sti_faddr_sa; 2867 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2868 src = NULL; 2869 srclen = 0; 2870 } else { 2871 /* 2872 * Pass the sockaddr_un source address as an option 2873 * and translate the remote address. 2874 * Holding so_lock thus sti_laddr_sa can not change. 2875 */ 2876 src = sti->sti_laddr_sa; 2877 srclen = (socklen_t)sti->sti_laddr_len; 2878 dprintso(so, 1, 2879 ("so_ux_close: srclen %d, src %p\n", 2880 srclen, src)); 2881 /* 2882 * Use the destination address saved in connect. 2883 */ 2884 addr = &sti->sti_ux_faddr; 2885 addrlen = sizeof (sti->sti_ux_faddr); 2886 } 2887 tudr.PRIM_type = T_UNITDATA_REQ; 2888 tudr.DEST_length = addrlen; 2889 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2890 if (srclen == 0) { 2891 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2892 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2893 _TPI_ALIGN_TOPT(addrlen)); 2894 2895 size = tudr.OPT_offset + tudr.OPT_length; 2896 /* NOTE: holding so_lock while sleeping */ 2897 mp = soallocproto2(&tudr, sizeof (tudr), 2898 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2899 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2900 soappendmsg(mp, &toh, sizeof (toh)); 2901 } else { 2902 /* 2903 * There is a AF_UNIX sockaddr_un to include as a 2904 * source address option. 2905 */ 2906 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2907 _TPI_ALIGN_TOPT(srclen)); 2908 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2909 _TPI_ALIGN_TOPT(addrlen)); 2910 2911 toh2.level = SOL_SOCKET; 2912 toh2.name = SO_SRCADDR; 2913 toh2.len = (t_uscalar_t)(srclen + 2914 sizeof (struct T_opthdr)); 2915 toh2.status = 0; 2916 2917 size = tudr.OPT_offset + tudr.OPT_length; 2918 2919 /* NOTE: holding so_lock while sleeping */ 2920 mp = soallocproto2(&tudr, sizeof (tudr), 2921 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2922 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2923 soappendmsg(mp, &toh, sizeof (toh)); 2924 soappendmsg(mp, &toh2, sizeof (toh2)); 2925 soappendmsg(mp, src, srclen); 2926 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2927 } 2928 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2929 } 2930 mutex_exit(&so->so_lock); 2931 (void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2932 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2933 mutex_enter(&so->so_lock); 2934 } 2935 2936 /* 2937 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2938 * In addition, the caller typically verifies that there is some 2939 * potential state to clear by checking 2940 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2941 * before calling this routine. 2942 * Note that such a check can be made without holding so_lock since 2943 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2944 * decrements sti_oobsigcnt. 2945 * 2946 * When data is read *after* the point that all pending 2947 * oob data has been consumed the oob indication is cleared. 2948 * 2949 * This logic keeps select/poll returning POLLRDBAND and 2950 * SIOCATMARK returning true until we have read past 2951 * the mark. 2952 */ 2953 static void 2954 sorecv_update_oobstate(struct sonode *so) 2955 { 2956 sotpi_info_t *sti = SOTOTPI(so); 2957 2958 mutex_enter(&so->so_lock); 2959 ASSERT(so_verify_oobstate(so)); 2960 dprintso(so, 1, 2961 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2962 sti->sti_oobsigcnt, 2963 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2964 if (sti->sti_oobsigcnt == 0) { 2965 /* No more pending oob indications */ 2966 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2967 freemsg(so->so_oobmsg); 2968 so->so_oobmsg = NULL; 2969 } 2970 ASSERT(so_verify_oobstate(so)); 2971 mutex_exit(&so->so_lock); 2972 } 2973 2974 /* 2975 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2976 */ 2977 static int 2978 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2979 { 2980 sotpi_info_t *sti = SOTOTPI(so); 2981 int error = 0; 2982 mblk_t *tmp = NULL; 2983 mblk_t *pmp = NULL; 2984 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 2985 2986 ASSERT(nmp != NULL); 2987 2988 while (nmp != NULL && uiop->uio_resid > 0) { 2989 ssize_t n; 2990 2991 if (DB_TYPE(nmp) == M_DATA) { 2992 /* 2993 * We have some data, uiomove up to resid bytes. 2994 */ 2995 n = MIN(MBLKL(nmp), uiop->uio_resid); 2996 if (n > 0) 2997 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2998 nmp->b_rptr += n; 2999 if (nmp->b_rptr == nmp->b_wptr) { 3000 pmp = nmp; 3001 nmp = nmp->b_cont; 3002 } 3003 if (error) 3004 break; 3005 } else { 3006 /* 3007 * We only handle data, save for caller to handle. 3008 */ 3009 if (pmp != NULL) { 3010 pmp->b_cont = nmp->b_cont; 3011 } 3012 nmp->b_cont = NULL; 3013 if (*rmp == NULL) { 3014 *rmp = nmp; 3015 } else { 3016 tmp->b_cont = nmp; 3017 } 3018 nmp = nmp->b_cont; 3019 tmp = nmp; 3020 } 3021 } 3022 if (pmp != NULL) { 3023 /* Free any mblk_t(s) which we have consumed */ 3024 pmp->b_cont = NULL; 3025 freemsg(sti->sti_nl7c_rcv_mp); 3026 } 3027 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3028 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3029 if (error == 0) { 3030 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3031 3032 error = p->r_v.r_v2; 3033 p->r_v.r_v2 = 0; 3034 } 3035 rp->r_vals = sti->sti_nl7c_rcv_rval; 3036 sti->sti_nl7c_rcv_rval = 0; 3037 } else { 3038 /* More mblk_t(s) to process so no rval to return */ 3039 rp->r_vals = 0; 3040 } 3041 return (error); 3042 } 3043 /* 3044 * Receive the next message on the queue. 3045 * If msg_controllen is non-zero when called the caller is interested in 3046 * any received control info (options). 3047 * If msg_namelen is non-zero when called the caller is interested in 3048 * any received source address. 3049 * The routine returns with msg_control and msg_name pointing to 3050 * kmem_alloc'ed memory which the caller has to free. 3051 */ 3052 /* ARGSUSED */ 3053 int 3054 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3055 struct cred *cr) 3056 { 3057 union T_primitives *tpr; 3058 mblk_t *mp; 3059 uchar_t pri; 3060 int pflag, opflag; 3061 void *control; 3062 t_uscalar_t controllen; 3063 t_uscalar_t namelen; 3064 int so_state = so->so_state; /* Snapshot */ 3065 ssize_t saved_resid; 3066 rval_t rval; 3067 int flags; 3068 clock_t timout; 3069 int error = 0; 3070 sotpi_info_t *sti = SOTOTPI(so); 3071 3072 flags = msg->msg_flags; 3073 msg->msg_flags = 0; 3074 3075 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3076 (void *)so, (void *)msg, flags, 3077 pr_state(so->so_state, so->so_mode), so->so_error)); 3078 3079 if (so->so_version == SOV_STREAM) { 3080 so_update_attrs(so, SOACC); 3081 /* The imaginary "sockmod" has been popped - act as a stream */ 3082 return (strread(SOTOV(so), uiop, cr)); 3083 } 3084 3085 /* 3086 * If we are not connected because we have never been connected 3087 * we return ENOTCONN. If we have been connected (but are no longer 3088 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3089 * the EOF. 3090 * 3091 * An alternative would be to post an ENOTCONN error in stream head 3092 * (read+write) and clear it when we're connected. However, that error 3093 * would cause incorrect poll/select behavior! 3094 */ 3095 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3096 (so->so_mode & SM_CONNREQUIRED)) { 3097 return (ENOTCONN); 3098 } 3099 3100 /* 3101 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3102 * after checking that the read queue is empty) and returns zero. 3103 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3104 * is zero. 3105 */ 3106 3107 if (flags & MSG_OOB) { 3108 /* Check that the transport supports OOB */ 3109 if (!(so->so_mode & SM_EXDATA)) 3110 return (EOPNOTSUPP); 3111 so_update_attrs(so, SOACC); 3112 return (sorecvoob(so, msg, uiop, flags, 3113 (so->so_options & SO_OOBINLINE))); 3114 } 3115 3116 so_update_attrs(so, SOACC); 3117 3118 /* 3119 * Set msg_controllen and msg_namelen to zero here to make it 3120 * simpler in the cases that no control or name is returned. 3121 */ 3122 controllen = msg->msg_controllen; 3123 namelen = msg->msg_namelen; 3124 msg->msg_controllen = 0; 3125 msg->msg_namelen = 0; 3126 3127 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3128 namelen, controllen)); 3129 3130 mutex_enter(&so->so_lock); 3131 /* 3132 * If an NL7C enabled socket and not waiting for write data. 3133 */ 3134 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3135 NL7C_ENABLED) { 3136 if (sti->sti_nl7c_uri) { 3137 /* Close uri processing for a previous request */ 3138 nl7c_close(so); 3139 } 3140 if ((so_state & SS_CANTRCVMORE) && 3141 sti->sti_nl7c_rcv_mp == NULL) { 3142 /* Nothing to process, EOF */ 3143 mutex_exit(&so->so_lock); 3144 return (0); 3145 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3146 /* Persistent NL7C socket, try to process request */ 3147 boolean_t ret; 3148 3149 ret = nl7c_process(so, 3150 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3151 rval.r_vals = sti->sti_nl7c_rcv_rval; 3152 error = rval.r_v.r_v2; 3153 if (error) { 3154 /* Error of some sort, return it */ 3155 mutex_exit(&so->so_lock); 3156 return (error); 3157 } 3158 if (sti->sti_nl7c_flags && 3159 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3160 /* 3161 * Still an NL7C socket and no data 3162 * to pass up to the caller. 3163 */ 3164 mutex_exit(&so->so_lock); 3165 if (ret) { 3166 /* EOF */ 3167 return (0); 3168 } else { 3169 /* Need more data */ 3170 return (EAGAIN); 3171 } 3172 } 3173 } else { 3174 /* 3175 * Not persistent so no further NL7C processing. 3176 */ 3177 sti->sti_nl7c_flags = 0; 3178 } 3179 } 3180 /* 3181 * Only one reader is allowed at any given time. This is needed 3182 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3183 * 3184 * This is slightly different that BSD behavior in that it fails with 3185 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3186 * is single-threaded using sblock(), which is dropped while waiting 3187 * for data to appear. The difference shows up e.g. if one 3188 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3189 * does use nonblocking io and different threads are reading each 3190 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3191 * in this case as long as the read queue doesn't get empty. 3192 * In this implementation the thread using nonblocking io can 3193 * get an EWOULDBLOCK error due to the blocking thread executing 3194 * e.g. in the uiomove in kstrgetmsg. 3195 * This difference is not believed to be significant. 3196 */ 3197 /* Set SOREADLOCKED */ 3198 error = so_lock_read_intr(so, 3199 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3200 mutex_exit(&so->so_lock); 3201 if (error) 3202 return (error); 3203 3204 /* 3205 * Tell kstrgetmsg to not inspect the stream head errors until all 3206 * queued data has been consumed. 3207 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3208 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3209 * 3210 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3211 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3212 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3213 */ 3214 pflag = MSG_ANY | MSG_DELAYERROR; 3215 if (flags & MSG_PEEK) { 3216 pflag |= MSG_IPEEK; 3217 flags &= ~MSG_WAITALL; 3218 } 3219 if (so->so_mode & SM_ATOMIC) 3220 pflag |= MSG_DISCARDTAIL; 3221 3222 if (flags & MSG_DONTWAIT) 3223 timout = 0; 3224 else if (so->so_rcvtimeo != 0) 3225 timout = TICK_TO_MSEC(so->so_rcvtimeo); 3226 else 3227 timout = -1; 3228 opflag = pflag; 3229 retry: 3230 saved_resid = uiop->uio_resid; 3231 pri = 0; 3232 mp = NULL; 3233 if (sti->sti_nl7c_rcv_mp != NULL) { 3234 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3235 error = nl7c_sorecv(so, &mp, uiop, &rval); 3236 } else { 3237 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3238 timout, &rval); 3239 } 3240 if (error != 0) { 3241 /* kstrgetmsg returns ETIME when timeout expires */ 3242 if (error == ETIME) 3243 error = EWOULDBLOCK; 3244 goto out; 3245 } 3246 /* 3247 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3248 * For non-datagrams MOREDATA is used to set MSG_EOR. 3249 */ 3250 ASSERT(!(rval.r_val1 & MORECTL)); 3251 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3252 msg->msg_flags |= MSG_TRUNC; 3253 3254 if (mp == NULL) { 3255 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3256 /* 3257 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3258 * The draft Posix socket spec states that the mark should 3259 * not be cleared when peeking. We follow the latter. 3260 */ 3261 if ((so->so_state & 3262 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3263 (uiop->uio_resid != saved_resid) && 3264 !(flags & MSG_PEEK)) { 3265 sorecv_update_oobstate(so); 3266 } 3267 3268 mutex_enter(&so->so_lock); 3269 /* Set MSG_EOR based on MOREDATA */ 3270 if (!(rval.r_val1 & MOREDATA)) { 3271 if (so->so_state & SS_SAVEDEOR) { 3272 msg->msg_flags |= MSG_EOR; 3273 so->so_state &= ~SS_SAVEDEOR; 3274 } 3275 } 3276 /* 3277 * If some data was received (i.e. not EOF) and the 3278 * read/recv* has not been satisfied wait for some more. 3279 */ 3280 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3281 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3282 mutex_exit(&so->so_lock); 3283 pflag = opflag | MSG_NOMARK; 3284 goto retry; 3285 } 3286 goto out_locked; 3287 } 3288 3289 /* strsock_proto has already verified length and alignment */ 3290 tpr = (union T_primitives *)mp->b_rptr; 3291 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3292 3293 switch (tpr->type) { 3294 case T_DATA_IND: { 3295 if ((so->so_state & 3296 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3297 (uiop->uio_resid != saved_resid) && 3298 !(flags & MSG_PEEK)) { 3299 sorecv_update_oobstate(so); 3300 } 3301 3302 /* 3303 * Set msg_flags to MSG_EOR based on 3304 * MORE_flag and MOREDATA. 3305 */ 3306 mutex_enter(&so->so_lock); 3307 so->so_state &= ~SS_SAVEDEOR; 3308 if (!(tpr->data_ind.MORE_flag & 1)) { 3309 if (!(rval.r_val1 & MOREDATA)) 3310 msg->msg_flags |= MSG_EOR; 3311 else 3312 so->so_state |= SS_SAVEDEOR; 3313 } 3314 freemsg(mp); 3315 /* 3316 * If some data was received (i.e. not EOF) and the 3317 * read/recv* has not been satisfied wait for some more. 3318 */ 3319 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3320 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3321 mutex_exit(&so->so_lock); 3322 pflag = opflag | MSG_NOMARK; 3323 goto retry; 3324 } 3325 goto out_locked; 3326 } 3327 case T_UNITDATA_IND: { 3328 void *addr; 3329 t_uscalar_t addrlen; 3330 void *abuf; 3331 t_uscalar_t optlen; 3332 void *opt; 3333 3334 if ((so->so_state & 3335 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3336 (uiop->uio_resid != saved_resid) && 3337 !(flags & MSG_PEEK)) { 3338 sorecv_update_oobstate(so); 3339 } 3340 3341 if (namelen != 0) { 3342 /* Caller wants source address */ 3343 addrlen = tpr->unitdata_ind.SRC_length; 3344 addr = sogetoff(mp, 3345 tpr->unitdata_ind.SRC_offset, 3346 addrlen, 1); 3347 if (addr == NULL) { 3348 freemsg(mp); 3349 error = EPROTO; 3350 eprintsoline(so, error); 3351 goto out; 3352 } 3353 if (so->so_family == AF_UNIX) { 3354 /* 3355 * Can not use the transport level address. 3356 * If there is a SO_SRCADDR option carrying 3357 * the socket level address it will be 3358 * extracted below. 3359 */ 3360 addr = NULL; 3361 addrlen = 0; 3362 } 3363 } 3364 optlen = tpr->unitdata_ind.OPT_length; 3365 if (optlen != 0) { 3366 t_uscalar_t ncontrollen; 3367 3368 /* 3369 * Extract any source address option. 3370 * Determine how large cmsg buffer is needed. 3371 */ 3372 opt = sogetoff(mp, 3373 tpr->unitdata_ind.OPT_offset, 3374 optlen, __TPI_ALIGN_SIZE); 3375 3376 if (opt == NULL) { 3377 freemsg(mp); 3378 error = EPROTO; 3379 eprintsoline(so, error); 3380 goto out; 3381 } 3382 if (so->so_family == AF_UNIX) 3383 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3384 ncontrollen = so_cmsglen(mp, opt, optlen, 3385 !(flags & MSG_XPG4_2)); 3386 if (controllen != 0) 3387 controllen = ncontrollen; 3388 else if (ncontrollen != 0) 3389 msg->msg_flags |= MSG_CTRUNC; 3390 } else { 3391 controllen = 0; 3392 } 3393 3394 if (namelen != 0) { 3395 /* 3396 * Return address to caller. 3397 * Caller handles truncation if length 3398 * exceeds msg_namelen. 3399 * NOTE: AF_UNIX NUL termination is ensured by 3400 * the sender's copyin_name(). 3401 */ 3402 abuf = kmem_alloc(addrlen, KM_SLEEP); 3403 3404 bcopy(addr, abuf, addrlen); 3405 msg->msg_name = abuf; 3406 msg->msg_namelen = addrlen; 3407 } 3408 3409 if (controllen != 0) { 3410 /* 3411 * Return control msg to caller. 3412 * Caller handles truncation if length 3413 * exceeds msg_controllen. 3414 */ 3415 control = kmem_zalloc(controllen, KM_SLEEP); 3416 3417 error = so_opt2cmsg(mp, opt, optlen, 3418 !(flags & MSG_XPG4_2), 3419 control, controllen); 3420 if (error) { 3421 freemsg(mp); 3422 if (msg->msg_namelen != 0) 3423 kmem_free(msg->msg_name, 3424 msg->msg_namelen); 3425 kmem_free(control, controllen); 3426 eprintsoline(so, error); 3427 goto out; 3428 } 3429 msg->msg_control = control; 3430 msg->msg_controllen = controllen; 3431 } 3432 3433 freemsg(mp); 3434 goto out; 3435 } 3436 case T_OPTDATA_IND: { 3437 struct T_optdata_req *tdr; 3438 void *opt; 3439 t_uscalar_t optlen; 3440 3441 if ((so->so_state & 3442 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3443 (uiop->uio_resid != saved_resid) && 3444 !(flags & MSG_PEEK)) { 3445 sorecv_update_oobstate(so); 3446 } 3447 3448 tdr = (struct T_optdata_req *)mp->b_rptr; 3449 optlen = tdr->OPT_length; 3450 if (optlen != 0) { 3451 t_uscalar_t ncontrollen; 3452 /* 3453 * Determine how large cmsg buffer is needed. 3454 */ 3455 opt = sogetoff(mp, 3456 tpr->optdata_ind.OPT_offset, 3457 optlen, __TPI_ALIGN_SIZE); 3458 3459 if (opt == NULL) { 3460 freemsg(mp); 3461 error = EPROTO; 3462 eprintsoline(so, error); 3463 goto out; 3464 } 3465 3466 ncontrollen = so_cmsglen(mp, opt, optlen, 3467 !(flags & MSG_XPG4_2)); 3468 if (controllen != 0) 3469 controllen = ncontrollen; 3470 else if (ncontrollen != 0) 3471 msg->msg_flags |= MSG_CTRUNC; 3472 } else { 3473 controllen = 0; 3474 } 3475 3476 if (controllen != 0) { 3477 /* 3478 * Return control msg to caller. 3479 * Caller handles truncation if length 3480 * exceeds msg_controllen. 3481 */ 3482 control = kmem_zalloc(controllen, KM_SLEEP); 3483 3484 error = so_opt2cmsg(mp, opt, optlen, 3485 !(flags & MSG_XPG4_2), 3486 control, controllen); 3487 if (error) { 3488 freemsg(mp); 3489 kmem_free(control, controllen); 3490 eprintsoline(so, error); 3491 goto out; 3492 } 3493 msg->msg_control = control; 3494 msg->msg_controllen = controllen; 3495 } 3496 3497 /* 3498 * Set msg_flags to MSG_EOR based on 3499 * DATA_flag and MOREDATA. 3500 */ 3501 mutex_enter(&so->so_lock); 3502 so->so_state &= ~SS_SAVEDEOR; 3503 if (!(tpr->data_ind.MORE_flag & 1)) { 3504 if (!(rval.r_val1 & MOREDATA)) 3505 msg->msg_flags |= MSG_EOR; 3506 else 3507 so->so_state |= SS_SAVEDEOR; 3508 } 3509 freemsg(mp); 3510 /* 3511 * If some data was received (i.e. not EOF) and the 3512 * read/recv* has not been satisfied wait for some more. 3513 * Not possible to wait if control info was received. 3514 */ 3515 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3516 controllen == 0 && 3517 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3518 mutex_exit(&so->so_lock); 3519 pflag = opflag | MSG_NOMARK; 3520 goto retry; 3521 } 3522 goto out_locked; 3523 } 3524 case T_EXDATA_IND: { 3525 dprintso(so, 1, 3526 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3527 "state %s\n", 3528 sti->sti_oobsigcnt, sti->sti_oobcnt, 3529 saved_resid - uiop->uio_resid, 3530 pr_state(so->so_state, so->so_mode))); 3531 /* 3532 * kstrgetmsg handles MSGMARK so there is nothing to 3533 * inspect in the T_EXDATA_IND. 3534 * strsock_proto makes the stream head queue the T_EXDATA_IND 3535 * as a separate message with no M_DATA component. Furthermore, 3536 * the stream head does not consolidate M_DATA messages onto 3537 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3538 * remains a message by itself. This is needed since MSGMARK 3539 * marks both the whole message as well as the last byte 3540 * of the message. 3541 */ 3542 freemsg(mp); 3543 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3544 if (flags & MSG_PEEK) { 3545 /* 3546 * Even though we are peeking we consume the 3547 * T_EXDATA_IND thereby moving the mark information 3548 * to SS_RCVATMARK. Then the oob code below will 3549 * retry the peeking kstrgetmsg. 3550 * Note that the stream head read queue is 3551 * never flushed without holding SOREADLOCKED 3552 * thus the T_EXDATA_IND can not disappear 3553 * underneath us. 3554 */ 3555 dprintso(so, 1, 3556 ("sotpi_recvmsg: consume EXDATA_IND " 3557 "counts %d/%d state %s\n", 3558 sti->sti_oobsigcnt, 3559 sti->sti_oobcnt, 3560 pr_state(so->so_state, so->so_mode))); 3561 3562 pflag = MSG_ANY | MSG_DELAYERROR; 3563 if (so->so_mode & SM_ATOMIC) 3564 pflag |= MSG_DISCARDTAIL; 3565 3566 pri = 0; 3567 mp = NULL; 3568 3569 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3570 &pri, &pflag, (clock_t)-1, &rval); 3571 ASSERT(uiop->uio_resid == saved_resid); 3572 3573 if (error) { 3574 #ifdef SOCK_DEBUG 3575 if (error != EWOULDBLOCK && error != EINTR) { 3576 eprintsoline(so, error); 3577 } 3578 #endif /* SOCK_DEBUG */ 3579 goto out; 3580 } 3581 ASSERT(mp); 3582 tpr = (union T_primitives *)mp->b_rptr; 3583 ASSERT(tpr->type == T_EXDATA_IND); 3584 freemsg(mp); 3585 } /* end "if (flags & MSG_PEEK)" */ 3586 3587 /* 3588 * Decrement the number of queued and pending oob. 3589 * 3590 * SS_RCVATMARK is cleared when we read past a mark. 3591 * SS_HAVEOOBDATA is cleared when we've read past the 3592 * last mark. 3593 * SS_OOBPEND is cleared if we've read past the last 3594 * mark and no (new) SIGURG has been posted. 3595 */ 3596 mutex_enter(&so->so_lock); 3597 ASSERT(so_verify_oobstate(so)); 3598 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3599 ASSERT(sti->sti_oobsigcnt > 0); 3600 sti->sti_oobsigcnt--; 3601 ASSERT(sti->sti_oobcnt > 0); 3602 sti->sti_oobcnt--; 3603 /* 3604 * Since the T_EXDATA_IND has been removed from the stream 3605 * head, but we have not read data past the mark, 3606 * sockfs needs to track that the socket is still at the mark. 3607 * 3608 * Since no data was received call kstrgetmsg again to wait 3609 * for data. 3610 */ 3611 so->so_state |= SS_RCVATMARK; 3612 mutex_exit(&so->so_lock); 3613 dprintso(so, 1, 3614 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3615 sti->sti_oobsigcnt, sti->sti_oobcnt, 3616 pr_state(so->so_state, so->so_mode))); 3617 pflag = opflag; 3618 goto retry; 3619 } 3620 default: 3621 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3622 (void *)so, tpr->type, (void *)mp); 3623 ASSERT(0); 3624 freemsg(mp); 3625 error = EPROTO; 3626 eprintsoline(so, error); 3627 goto out; 3628 } 3629 /* NOTREACHED */ 3630 out: 3631 mutex_enter(&so->so_lock); 3632 out_locked: 3633 so_unlock_read(so); /* Clear SOREADLOCKED */ 3634 mutex_exit(&so->so_lock); 3635 return (error); 3636 } 3637 3638 /* 3639 * Sending data with options on a datagram socket. 3640 * Assumes caller has verified that SS_ISBOUND etc. are set. 3641 * 3642 * For AF_UNIX the destination address may be already in 3643 * internal form, as indicated by sti->sti_faddr_noxlate 3644 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3645 * translate the destination address to internal form. 3646 * 3647 * The source address is passed as an option. If passing 3648 * file descriptors, those are passed as file pointers in 3649 * another option. 3650 */ 3651 static int 3652 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3653 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3654 { 3655 struct T_unitdata_req tudr; 3656 mblk_t *mp; 3657 int error; 3658 void *addr; 3659 socklen_t addrlen; 3660 void *src; 3661 socklen_t srclen; 3662 ssize_t len; 3663 int size; 3664 struct T_opthdr toh; 3665 struct fdbuf *fdbuf; 3666 t_uscalar_t optlen; 3667 void *fds; 3668 int fdlen; 3669 sotpi_info_t *sti = SOTOTPI(so); 3670 3671 ASSERT(name && namelen); 3672 ASSERT(control && controllen); 3673 3674 len = uiop->uio_resid; 3675 if (len > (ssize_t)sti->sti_tidu_size) { 3676 return (EMSGSIZE); 3677 } 3678 3679 if (sti->sti_faddr_noxlate == 0 && 3680 (flags & MSG_SENDTO_NOXLATE) == 0) { 3681 /* 3682 * Length and family checks. 3683 * Don't verify internal form. 3684 */ 3685 error = so_addr_verify(so, name, namelen); 3686 if (error) { 3687 eprintsoline(so, error); 3688 return (error); 3689 } 3690 } 3691 3692 if (so->so_family == AF_UNIX) { 3693 if (sti->sti_faddr_noxlate) { 3694 /* 3695 * Already have a transport internal address. Do not 3696 * pass any (transport internal) source address. 3697 */ 3698 addr = name; 3699 addrlen = namelen; 3700 src = NULL; 3701 srclen = 0; 3702 } else if (flags & MSG_SENDTO_NOXLATE) { 3703 /* 3704 * Have an internal form dest. address. 3705 * Pass the source address as usual. 3706 */ 3707 addr = name; 3708 addrlen = namelen; 3709 src = sti->sti_laddr_sa; 3710 srclen = (socklen_t)sti->sti_laddr_len; 3711 } else { 3712 /* 3713 * Pass the sockaddr_un source address as an option 3714 * and translate the remote address. 3715 * 3716 * Note that this code does not prevent sti_laddr_sa 3717 * from changing while it is being used. Thus 3718 * if an unbind+bind occurs concurrently with this 3719 * send the peer might see a partially new and a 3720 * partially old "from" address. 3721 */ 3722 src = sti->sti_laddr_sa; 3723 srclen = (socklen_t)sti->sti_laddr_len; 3724 dprintso(so, 1, 3725 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3726 srclen, src)); 3727 /* 3728 * The sendmsg caller specified a destination 3729 * address, which we must translate into our 3730 * internal form. addr = &sti->sti_ux_taddr 3731 */ 3732 error = so_ux_addr_xlate(so, name, namelen, 3733 (flags & MSG_XPG4_2), 3734 &addr, &addrlen); 3735 if (error) { 3736 eprintsoline(so, error); 3737 return (error); 3738 } 3739 } 3740 } else { 3741 addr = name; 3742 addrlen = namelen; 3743 src = NULL; 3744 srclen = 0; 3745 } 3746 optlen = so_optlen(control, controllen, 3747 !(flags & MSG_XPG4_2)); 3748 tudr.PRIM_type = T_UNITDATA_REQ; 3749 tudr.DEST_length = addrlen; 3750 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3751 if (srclen != 0) 3752 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3753 _TPI_ALIGN_TOPT(srclen)); 3754 else 3755 tudr.OPT_length = optlen; 3756 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3757 _TPI_ALIGN_TOPT(addrlen)); 3758 3759 size = tudr.OPT_offset + tudr.OPT_length; 3760 3761 /* 3762 * File descriptors only when SM_FDPASSING set. 3763 */ 3764 error = so_getfdopt(control, controllen, 3765 !(flags & MSG_XPG4_2), &fds, &fdlen); 3766 if (error) 3767 return (error); 3768 if (fdlen != -1) { 3769 if (!(so->so_mode & SM_FDPASSING)) 3770 return (EOPNOTSUPP); 3771 3772 error = fdbuf_create(fds, fdlen, &fdbuf); 3773 if (error) 3774 return (error); 3775 mp = fdbuf_allocmsg(size, fdbuf); 3776 } else { 3777 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3778 if (mp == NULL) { 3779 /* 3780 * Caught a signal waiting for memory. 3781 * Let send* return EINTR. 3782 */ 3783 return (EINTR); 3784 } 3785 } 3786 soappendmsg(mp, &tudr, sizeof (tudr)); 3787 soappendmsg(mp, addr, addrlen); 3788 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3789 3790 if (fdlen != -1) { 3791 ASSERT(fdbuf != NULL); 3792 toh.level = SOL_SOCKET; 3793 toh.name = SO_FILEP; 3794 toh.len = fdbuf->fd_size + 3795 (t_uscalar_t)sizeof (struct T_opthdr); 3796 toh.status = 0; 3797 soappendmsg(mp, &toh, sizeof (toh)); 3798 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3799 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3800 } 3801 if (srclen != 0) { 3802 /* 3803 * There is a AF_UNIX sockaddr_un to include as a source 3804 * address option. 3805 */ 3806 toh.level = SOL_SOCKET; 3807 toh.name = SO_SRCADDR; 3808 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3809 toh.status = 0; 3810 soappendmsg(mp, &toh, sizeof (toh)); 3811 soappendmsg(mp, src, srclen); 3812 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3813 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3814 } 3815 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3816 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3817 /* At most 3 bytes left in the message */ 3818 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3819 ASSERT(MBLKL(mp) <= (ssize_t)size); 3820 3821 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3822 if (AU_AUDITING()) 3823 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3824 3825 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3826 #ifdef SOCK_DEBUG 3827 if (error) { 3828 eprintsoline(so, error); 3829 } 3830 #endif /* SOCK_DEBUG */ 3831 return (error); 3832 } 3833 3834 /* 3835 * Sending data with options on a connected stream socket. 3836 * Assumes caller has verified that SS_ISCONNECTED is set. 3837 */ 3838 static int 3839 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3840 t_uscalar_t controllen, int flags) 3841 { 3842 struct T_optdata_req tdr; 3843 mblk_t *mp; 3844 int error; 3845 ssize_t iosize; 3846 int size; 3847 struct fdbuf *fdbuf; 3848 t_uscalar_t optlen; 3849 void *fds; 3850 int fdlen; 3851 struct T_opthdr toh; 3852 sotpi_info_t *sti = SOTOTPI(so); 3853 3854 dprintso(so, 1, 3855 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3856 3857 /* 3858 * Has to be bound and connected. However, since no locks are 3859 * held the state could have changed after sotpi_sendmsg checked it 3860 * thus it is not possible to ASSERT on the state. 3861 */ 3862 3863 /* Options on connection-oriented only when SM_OPTDATA set. */ 3864 if (!(so->so_mode & SM_OPTDATA)) 3865 return (EOPNOTSUPP); 3866 3867 do { 3868 /* 3869 * Set the MORE flag if uio_resid does not fit in this 3870 * message or if the caller passed in "more". 3871 * Error for transports with zero tidu_size. 3872 */ 3873 tdr.PRIM_type = T_OPTDATA_REQ; 3874 iosize = sti->sti_tidu_size; 3875 if (iosize <= 0) 3876 return (EMSGSIZE); 3877 if (uiop->uio_resid > iosize) { 3878 tdr.DATA_flag = 1; 3879 } else { 3880 if (more) 3881 tdr.DATA_flag = 1; 3882 else 3883 tdr.DATA_flag = 0; 3884 iosize = uiop->uio_resid; 3885 } 3886 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3887 tdr.DATA_flag, iosize)); 3888 3889 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3890 tdr.OPT_length = optlen; 3891 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3892 3893 size = (int)sizeof (tdr) + optlen; 3894 /* 3895 * File descriptors only when SM_FDPASSING set. 3896 */ 3897 error = so_getfdopt(control, controllen, 3898 !(flags & MSG_XPG4_2), &fds, &fdlen); 3899 if (error) 3900 return (error); 3901 if (fdlen != -1) { 3902 if (!(so->so_mode & SM_FDPASSING)) 3903 return (EOPNOTSUPP); 3904 3905 error = fdbuf_create(fds, fdlen, &fdbuf); 3906 if (error) 3907 return (error); 3908 mp = fdbuf_allocmsg(size, fdbuf); 3909 } else { 3910 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3911 if (mp == NULL) { 3912 /* 3913 * Caught a signal waiting for memory. 3914 * Let send* return EINTR. 3915 */ 3916 return (EINTR); 3917 } 3918 } 3919 soappendmsg(mp, &tdr, sizeof (tdr)); 3920 3921 if (fdlen != -1) { 3922 ASSERT(fdbuf != NULL); 3923 toh.level = SOL_SOCKET; 3924 toh.name = SO_FILEP; 3925 toh.len = fdbuf->fd_size + 3926 (t_uscalar_t)sizeof (struct T_opthdr); 3927 toh.status = 0; 3928 soappendmsg(mp, &toh, sizeof (toh)); 3929 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3930 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3931 } 3932 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3933 /* At most 3 bytes left in the message */ 3934 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3935 ASSERT(MBLKL(mp) <= (ssize_t)size); 3936 3937 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3938 3939 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3940 0, MSG_BAND, 0); 3941 if (error) { 3942 eprintsoline(so, error); 3943 return (error); 3944 } 3945 control = NULL; 3946 if (uiop->uio_resid > 0) { 3947 /* 3948 * Recheck for fatal errors. Fail write even though 3949 * some data have been written. This is consistent 3950 * with strwrite semantics and BSD sockets semantics. 3951 */ 3952 if (so->so_state & SS_CANTSENDMORE) { 3953 eprintsoline(so, error); 3954 return (EPIPE); 3955 } 3956 if (so->so_error != 0) { 3957 mutex_enter(&so->so_lock); 3958 error = sogeterr(so, B_TRUE); 3959 mutex_exit(&so->so_lock); 3960 if (error != 0) { 3961 eprintsoline(so, error); 3962 return (error); 3963 } 3964 } 3965 } 3966 } while (uiop->uio_resid > 0); 3967 return (0); 3968 } 3969 3970 /* 3971 * Sending data on a datagram socket. 3972 * Assumes caller has verified that SS_ISBOUND etc. are set. 3973 * 3974 * For AF_UNIX the destination address may be already in 3975 * internal form, as indicated by sti->sti_faddr_noxlate 3976 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3977 * translate the destination address to internal form. 3978 * 3979 * The source address is passed as an option. 3980 */ 3981 int 3982 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3983 struct uio *uiop, int flags) 3984 { 3985 struct T_unitdata_req tudr; 3986 mblk_t *mp; 3987 int error; 3988 void *addr; 3989 socklen_t addrlen; 3990 void *src; 3991 socklen_t srclen; 3992 ssize_t len; 3993 sotpi_info_t *sti = SOTOTPI(so); 3994 3995 ASSERT(name != NULL && namelen != 0); 3996 3997 len = uiop->uio_resid; 3998 if (len > sti->sti_tidu_size) { 3999 error = EMSGSIZE; 4000 goto done; 4001 } 4002 4003 if (sti->sti_faddr_noxlate == 0 && 4004 (flags & MSG_SENDTO_NOXLATE) == 0) { 4005 /* 4006 * Length and family checks. 4007 * Don't verify internal form. 4008 */ 4009 error = so_addr_verify(so, name, namelen); 4010 if (error != 0) 4011 goto done; 4012 } 4013 4014 if (sti->sti_direct) /* Never on AF_UNIX */ 4015 return (sodgram_direct(so, name, namelen, uiop, flags)); 4016 4017 if (so->so_family == AF_UNIX) { 4018 if (sti->sti_faddr_noxlate) { 4019 /* 4020 * Already have a transport internal address. Do not 4021 * pass any (transport internal) source address. 4022 */ 4023 addr = name; 4024 addrlen = namelen; 4025 src = NULL; 4026 srclen = 0; 4027 } else if (flags & MSG_SENDTO_NOXLATE) { 4028 /* 4029 * Have an internal form dest. address. 4030 * Pass the source address as usual. 4031 */ 4032 addr = name; 4033 addrlen = namelen; 4034 src = sti->sti_laddr_sa; 4035 srclen = (socklen_t)sti->sti_laddr_len; 4036 } else { 4037 /* 4038 * Pass the sockaddr_un source address as an option 4039 * and translate the remote address. 4040 * 4041 * Note that this code does not prevent sti_laddr_sa 4042 * from changing while it is being used. Thus 4043 * if an unbind+bind occurs concurrently with this 4044 * send the peer might see a partially new and a 4045 * partially old "from" address. 4046 */ 4047 src = sti->sti_laddr_sa; 4048 srclen = (socklen_t)sti->sti_laddr_len; 4049 dprintso(so, 1, 4050 ("sosend_dgram UNIX: srclen %d, src %p\n", 4051 srclen, src)); 4052 /* 4053 * The sendmsg caller specified a destination 4054 * address, which we must translate into our 4055 * internal form. addr = &sti->sti_ux_taddr 4056 */ 4057 error = so_ux_addr_xlate(so, name, namelen, 4058 (flags & MSG_XPG4_2), 4059 &addr, &addrlen); 4060 if (error) { 4061 eprintsoline(so, error); 4062 goto done; 4063 } 4064 } 4065 } else { 4066 addr = name; 4067 addrlen = namelen; 4068 src = NULL; 4069 srclen = 0; 4070 } 4071 tudr.PRIM_type = T_UNITDATA_REQ; 4072 tudr.DEST_length = addrlen; 4073 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4074 if (srclen == 0) { 4075 tudr.OPT_length = 0; 4076 tudr.OPT_offset = 0; 4077 4078 mp = soallocproto2(&tudr, sizeof (tudr), 4079 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4080 if (mp == NULL) { 4081 /* 4082 * Caught a signal waiting for memory. 4083 * Let send* return EINTR. 4084 */ 4085 error = EINTR; 4086 goto done; 4087 } 4088 } else { 4089 /* 4090 * There is a AF_UNIX sockaddr_un to include as a source 4091 * address option. 4092 */ 4093 struct T_opthdr toh; 4094 ssize_t size; 4095 4096 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4097 _TPI_ALIGN_TOPT(srclen)); 4098 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4099 _TPI_ALIGN_TOPT(addrlen)); 4100 4101 toh.level = SOL_SOCKET; 4102 toh.name = SO_SRCADDR; 4103 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4104 toh.status = 0; 4105 4106 size = tudr.OPT_offset + tudr.OPT_length; 4107 mp = soallocproto2(&tudr, sizeof (tudr), 4108 addr, addrlen, size, _ALLOC_INTR, CRED()); 4109 if (mp == NULL) { 4110 /* 4111 * Caught a signal waiting for memory. 4112 * Let send* return EINTR. 4113 */ 4114 error = EINTR; 4115 goto done; 4116 } 4117 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4118 soappendmsg(mp, &toh, sizeof (toh)); 4119 soappendmsg(mp, src, srclen); 4120 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4121 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4122 } 4123 4124 if (AU_AUDITING()) 4125 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4126 4127 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4128 done: 4129 #ifdef SOCK_DEBUG 4130 if (error) { 4131 eprintsoline(so, error); 4132 } 4133 #endif /* SOCK_DEBUG */ 4134 return (error); 4135 } 4136 4137 /* 4138 * Sending data on a connected stream socket. 4139 * Assumes caller has verified that SS_ISCONNECTED is set. 4140 */ 4141 int 4142 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4143 int sflag) 4144 { 4145 struct T_data_req tdr; 4146 mblk_t *mp; 4147 int error; 4148 ssize_t iosize; 4149 sotpi_info_t *sti = SOTOTPI(so); 4150 4151 dprintso(so, 1, 4152 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4153 (void *)so, uiop->uio_resid, prim, sflag)); 4154 4155 /* 4156 * Has to be bound and connected. However, since no locks are 4157 * held the state could have changed after sotpi_sendmsg checked it 4158 * thus it is not possible to ASSERT on the state. 4159 */ 4160 4161 do { 4162 /* 4163 * Set the MORE flag if uio_resid does not fit in this 4164 * message or if the caller passed in "more". 4165 * Error for transports with zero tidu_size. 4166 */ 4167 tdr.PRIM_type = prim; 4168 iosize = sti->sti_tidu_size; 4169 if (iosize <= 0) 4170 return (EMSGSIZE); 4171 if (uiop->uio_resid > iosize) { 4172 tdr.MORE_flag = 1; 4173 } else { 4174 if (more) 4175 tdr.MORE_flag = 1; 4176 else 4177 tdr.MORE_flag = 0; 4178 iosize = uiop->uio_resid; 4179 } 4180 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4181 prim, tdr.MORE_flag, iosize)); 4182 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4183 if (mp == NULL) { 4184 /* 4185 * Caught a signal waiting for memory. 4186 * Let send* return EINTR. 4187 */ 4188 return (EINTR); 4189 } 4190 4191 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4192 0, sflag | MSG_BAND, 0); 4193 if (error) { 4194 eprintsoline(so, error); 4195 return (error); 4196 } 4197 if (uiop->uio_resid > 0) { 4198 /* 4199 * Recheck for fatal errors. Fail write even though 4200 * some data have been written. This is consistent 4201 * with strwrite semantics and BSD sockets semantics. 4202 */ 4203 if (so->so_state & SS_CANTSENDMORE) { 4204 eprintsoline(so, error); 4205 return (EPIPE); 4206 } 4207 if (so->so_error != 0) { 4208 mutex_enter(&so->so_lock); 4209 error = sogeterr(so, B_TRUE); 4210 mutex_exit(&so->so_lock); 4211 if (error != 0) { 4212 eprintsoline(so, error); 4213 return (error); 4214 } 4215 } 4216 } 4217 } while (uiop->uio_resid > 0); 4218 return (0); 4219 } 4220 4221 /* 4222 * Check the state for errors and call the appropriate send function. 4223 * 4224 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4225 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4226 * after sending the message. 4227 * 4228 * The caller may optionally specify a destination address, for either 4229 * stream or datagram sockets. This table summarizes the cases: 4230 * 4231 * Socket type Dest. given Connected Result 4232 * ----------- ----------- --------- -------------- 4233 * Stream * Yes send to conn. addr. 4234 * Stream * No error ENOTCONN 4235 * Dgram yes * send to given addr. 4236 * Dgram no yes send to conn. addr. 4237 * Dgram no no error EDESTADDRREQ 4238 * 4239 * There are subtleties around the destination address when using 4240 * AF_UNIX datagram sockets. When the sendmsg call specifies the 4241 * destination address, it's in (struct sockaddr_un) form and we 4242 * need to translate it to our internal form (struct so_ux_addr). 4243 * 4244 * When the sendmsg call does not specify a destination address 4245 * we're using the peer address saved during sotpi_connect, and 4246 * that address is already in internal form. In this case, the 4247 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags 4248 * passed to sosend_dgram or sosend_dgramcmsg to indicate that 4249 * those functions should skip translation to internal form. 4250 * Avoiding that translation is not only more efficient, but it's 4251 * also necessary when a process does a connect on an AF_UNIX 4252 * datagram socket and then drops privileges. After the process 4253 * has dropped privileges, it may no longer be able to lookup the 4254 * the external name in the filesystem, but it should still be 4255 * able to send messages on the connected socket by leaving the 4256 * destination name unspecified. 4257 * 4258 * Yet more subtleties arise with sockets connected by socketpair(), 4259 * which puts internal form addresses in the fields where normally 4260 * the external form is found, and sets sti_faddr_noxlate=1, which 4261 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions 4262 * to skip translation of destination addresses to internal form. 4263 * However, beware that the flag sti_faddr_noxlate=1 also triggers 4264 * different behaviour almost everywhere AF_UNIX addresses appear. 4265 */ 4266 static int 4267 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4268 struct cred *cr) 4269 { 4270 int so_state; 4271 int so_mode; 4272 int error; 4273 struct sockaddr *name; 4274 t_uscalar_t namelen; 4275 int dontroute; 4276 int flags; 4277 sotpi_info_t *sti = SOTOTPI(so); 4278 4279 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4280 (void *)so, (void *)msg, msg->msg_flags, 4281 pr_state(so->so_state, so->so_mode), so->so_error)); 4282 4283 if (so->so_version == SOV_STREAM) { 4284 /* The imaginary "sockmod" has been popped - act as a stream */ 4285 so_update_attrs(so, SOMOD); 4286 return (strwrite(SOTOV(so), uiop, cr)); 4287 } 4288 4289 mutex_enter(&so->so_lock); 4290 so_state = so->so_state; 4291 4292 if (so_state & SS_CANTSENDMORE) { 4293 mutex_exit(&so->so_lock); 4294 return (EPIPE); 4295 } 4296 4297 if (so->so_error != 0) { 4298 error = sogeterr(so, B_TRUE); 4299 if (error != 0) { 4300 mutex_exit(&so->so_lock); 4301 return (error); 4302 } 4303 } 4304 4305 name = (struct sockaddr *)msg->msg_name; 4306 namelen = msg->msg_namelen; 4307 flags = msg->msg_flags; 4308 4309 /* 4310 * Historically, this function does not validate the flags 4311 * passed in, and any errant bits are ignored. However, 4312 * we would not want any such errant flag bits accidently 4313 * being treated as one of the internal-only flags, so 4314 * clear the internal-only flag bits. 4315 */ 4316 flags &= ~MSG_SENDTO_NOXLATE; 4317 4318 so_mode = so->so_mode; 4319 4320 if (name == NULL) { 4321 if (!(so_state & SS_ISCONNECTED)) { 4322 mutex_exit(&so->so_lock); 4323 if (so_mode & SM_CONNREQUIRED) 4324 return (ENOTCONN); 4325 else 4326 return (EDESTADDRREQ); 4327 } 4328 /* 4329 * This is a connected socket. 4330 */ 4331 if (so_mode & SM_CONNREQUIRED) { 4332 /* 4333 * This is a connected STREAM socket, 4334 * destination not specified. 4335 */ 4336 name = NULL; 4337 namelen = 0; 4338 } else { 4339 /* 4340 * Datagram send on connected socket with 4341 * the destination name not specified. 4342 * Use the peer address from connect. 4343 */ 4344 if (so->so_family == AF_UNIX) { 4345 /* 4346 * Use the (internal form) address saved 4347 * in sotpi_connect. See above. 4348 */ 4349 name = (void *)&sti->sti_ux_faddr; 4350 namelen = sizeof (sti->sti_ux_faddr); 4351 flags |= MSG_SENDTO_NOXLATE; 4352 } else { 4353 ASSERT(sti->sti_faddr_sa); 4354 name = sti->sti_faddr_sa; 4355 namelen = (t_uscalar_t)sti->sti_faddr_len; 4356 } 4357 } 4358 } else { 4359 /* 4360 * Sendmsg specifies a destination name 4361 */ 4362 if (!(so_state & SS_ISCONNECTED) && 4363 (so_mode & SM_CONNREQUIRED)) { 4364 /* i.e. TCP not connected */ 4365 mutex_exit(&so->so_lock); 4366 return (ENOTCONN); 4367 } 4368 /* 4369 * Ignore the address on connection-oriented sockets. 4370 * Just like BSD this code does not generate an error for 4371 * TCP (a CONNREQUIRED socket) when sending to an address 4372 * passed in with sendto/sendmsg. Instead the data is 4373 * delivered on the connection as if no address had been 4374 * supplied. 4375 */ 4376 if ((so_state & SS_ISCONNECTED) && 4377 !(so_mode & SM_CONNREQUIRED)) { 4378 mutex_exit(&so->so_lock); 4379 return (EISCONN); 4380 } 4381 if (!(so_state & SS_ISBOUND)) { 4382 so_lock_single(so); /* Set SOLOCKED */ 4383 error = sotpi_bind(so, NULL, 0, 4384 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4385 so_unlock_single(so, SOLOCKED); 4386 if (error) { 4387 mutex_exit(&so->so_lock); 4388 eprintsoline(so, error); 4389 return (error); 4390 } 4391 } 4392 /* 4393 * Handle delayed datagram errors. These are only queued 4394 * when the application sets SO_DGRAM_ERRIND. 4395 * Return the error if we are sending to the address 4396 * that was returned in the last T_UDERROR_IND. 4397 * If sending to some other address discard the delayed 4398 * error indication. 4399 */ 4400 if (sti->sti_delayed_error) { 4401 struct T_uderror_ind *tudi; 4402 void *addr; 4403 t_uscalar_t addrlen; 4404 boolean_t match = B_FALSE; 4405 4406 ASSERT(sti->sti_eaddr_mp); 4407 error = sti->sti_delayed_error; 4408 sti->sti_delayed_error = 0; 4409 tudi = 4410 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4411 addrlen = tudi->DEST_length; 4412 addr = sogetoff(sti->sti_eaddr_mp, 4413 tudi->DEST_offset, addrlen, 1); 4414 ASSERT(addr); /* Checked by strsock_proto */ 4415 switch (so->so_family) { 4416 case AF_INET: { 4417 /* Compare just IP address and port */ 4418 sin_t *sin1 = (sin_t *)name; 4419 sin_t *sin2 = (sin_t *)addr; 4420 4421 if (addrlen == sizeof (sin_t) && 4422 namelen == addrlen && 4423 sin1->sin_port == sin2->sin_port && 4424 sin1->sin_addr.s_addr == 4425 sin2->sin_addr.s_addr) 4426 match = B_TRUE; 4427 break; 4428 } 4429 case AF_INET6: { 4430 /* Compare just IP address and port. Not flow */ 4431 sin6_t *sin1 = (sin6_t *)name; 4432 sin6_t *sin2 = (sin6_t *)addr; 4433 4434 if (addrlen == sizeof (sin6_t) && 4435 namelen == addrlen && 4436 sin1->sin6_port == sin2->sin6_port && 4437 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4438 &sin2->sin6_addr)) 4439 match = B_TRUE; 4440 break; 4441 } 4442 case AF_UNIX: 4443 default: 4444 if (namelen == addrlen && 4445 bcmp(name, addr, namelen) == 0) 4446 match = B_TRUE; 4447 } 4448 if (match) { 4449 freemsg(sti->sti_eaddr_mp); 4450 sti->sti_eaddr_mp = NULL; 4451 mutex_exit(&so->so_lock); 4452 #ifdef DEBUG 4453 dprintso(so, 0, 4454 ("sockfs delayed error %d for %s\n", 4455 error, 4456 pr_addr(so->so_family, name, namelen))); 4457 #endif /* DEBUG */ 4458 return (error); 4459 } 4460 freemsg(sti->sti_eaddr_mp); 4461 sti->sti_eaddr_mp = NULL; 4462 } 4463 } 4464 mutex_exit(&so->so_lock); 4465 4466 dontroute = 0; 4467 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4468 uint32_t val; 4469 4470 val = 1; 4471 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4472 &val, (t_uscalar_t)sizeof (val), cr); 4473 if (error) 4474 return (error); 4475 dontroute = 1; 4476 } 4477 4478 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4479 error = EOPNOTSUPP; 4480 goto done; 4481 } 4482 if (msg->msg_controllen != 0) { 4483 if (!(so_mode & SM_CONNREQUIRED)) { 4484 so_update_attrs(so, SOMOD); 4485 error = sosend_dgramcmsg(so, name, namelen, uiop, 4486 msg->msg_control, msg->msg_controllen, flags); 4487 } else { 4488 if (flags & MSG_OOB) { 4489 /* Can't generate T_EXDATA_REQ with options */ 4490 error = EOPNOTSUPP; 4491 goto done; 4492 } 4493 so_update_attrs(so, SOMOD); 4494 error = sosend_svccmsg(so, uiop, 4495 !(flags & MSG_EOR), 4496 msg->msg_control, msg->msg_controllen, 4497 flags); 4498 } 4499 goto done; 4500 } 4501 4502 so_update_attrs(so, SOMOD); 4503 if (!(so_mode & SM_CONNREQUIRED)) { 4504 /* 4505 * If there is no SO_DONTROUTE to turn off return immediately 4506 * from send_dgram. This can allow tail-call optimizations. 4507 */ 4508 if (!dontroute) { 4509 return (sosend_dgram(so, name, namelen, uiop, flags)); 4510 } 4511 error = sosend_dgram(so, name, namelen, uiop, flags); 4512 } else { 4513 t_scalar_t prim; 4514 int sflag; 4515 4516 /* Ignore msg_name in the connected state */ 4517 if (flags & MSG_OOB) { 4518 prim = T_EXDATA_REQ; 4519 /* 4520 * Send down T_EXDATA_REQ even if there is flow 4521 * control for data. 4522 */ 4523 sflag = MSG_IGNFLOW; 4524 } else { 4525 if (so_mode & SM_BYTESTREAM) { 4526 /* Byte stream transport - use write */ 4527 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4528 4529 /* Send M_DATA messages */ 4530 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4531 (error = nl7c_data(so, uiop)) >= 0) { 4532 /* NL7C consumed the data */ 4533 return (error); 4534 } 4535 /* 4536 * If there is no SO_DONTROUTE to turn off, 4537 * sti_direct is on, and there is no flow 4538 * control, we can take the fast path. 4539 */ 4540 if (!dontroute && sti->sti_direct != 0 && 4541 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4542 return (sostream_direct(so, uiop, 4543 NULL, cr)); 4544 } 4545 error = strwrite(SOTOV(so), uiop, cr); 4546 goto done; 4547 } 4548 prim = T_DATA_REQ; 4549 sflag = 0; 4550 } 4551 /* 4552 * If there is no SO_DONTROUTE to turn off return immediately 4553 * from sosend_svc. This can allow tail-call optimizations. 4554 */ 4555 if (!dontroute) 4556 return (sosend_svc(so, uiop, prim, 4557 !(flags & MSG_EOR), sflag)); 4558 error = sosend_svc(so, uiop, prim, 4559 !(flags & MSG_EOR), sflag); 4560 } 4561 ASSERT(dontroute); 4562 done: 4563 if (dontroute) { 4564 uint32_t val; 4565 4566 val = 0; 4567 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4568 &val, (t_uscalar_t)sizeof (val), cr); 4569 } 4570 return (error); 4571 } 4572 4573 /* 4574 * kstrwritemp() has very similar semantics as that of strwrite(). 4575 * The main difference is it obtains mblks from the caller and also 4576 * does not do any copy as done in strwrite() from user buffers to 4577 * kernel buffers. 4578 * 4579 * Currently, this routine is used by sendfile to send data allocated 4580 * within the kernel without any copying. This interface does not use the 4581 * synchronous stream interface as synch. stream interface implies 4582 * copying. 4583 */ 4584 int 4585 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4586 { 4587 struct stdata *stp; 4588 struct queue *wqp; 4589 mblk_t *newmp; 4590 char waitflag; 4591 int tempmode; 4592 int error = 0; 4593 int done = 0; 4594 struct sonode *so; 4595 boolean_t direct; 4596 4597 ASSERT(vp->v_stream); 4598 stp = vp->v_stream; 4599 4600 so = VTOSO(vp); 4601 direct = _SOTOTPI(so)->sti_direct; 4602 4603 /* 4604 * This is the sockfs direct fast path. canputnext() need 4605 * not be accurate so we don't grab the sd_lock here. If 4606 * we get flow-controlled, we grab sd_lock just before the 4607 * do..while loop below to emulate what strwrite() does. 4608 */ 4609 wqp = stp->sd_wrq; 4610 if (canputnext(wqp) && direct && 4611 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4612 return (sostream_direct(so, NULL, mp, CRED())); 4613 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4614 /* Fast check of flags before acquiring the lock */ 4615 mutex_enter(&stp->sd_lock); 4616 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4617 mutex_exit(&stp->sd_lock); 4618 if (error != 0) { 4619 if (!(stp->sd_flag & STPLEX) && 4620 (stp->sd_wput_opt & SW_SIGPIPE)) { 4621 error = EPIPE; 4622 } 4623 return (error); 4624 } 4625 } 4626 4627 waitflag = WRITEWAIT; 4628 if (stp->sd_flag & OLDNDELAY) 4629 tempmode = fmode & ~FNDELAY; 4630 else 4631 tempmode = fmode; 4632 4633 mutex_enter(&stp->sd_lock); 4634 do { 4635 if (canputnext(wqp)) { 4636 mutex_exit(&stp->sd_lock); 4637 if (stp->sd_wputdatafunc != NULL) { 4638 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4639 NULL, NULL, NULL); 4640 if (newmp == NULL) { 4641 /* The caller will free mp */ 4642 return (ECOMM); 4643 } 4644 mp = newmp; 4645 } 4646 putnext(wqp, mp); 4647 return (0); 4648 } 4649 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4650 &done); 4651 } while (error == 0 && !done); 4652 4653 mutex_exit(&stp->sd_lock); 4654 /* 4655 * EAGAIN tells the application to try again. ENOMEM 4656 * is returned only if the memory allocation size 4657 * exceeds the physical limits of the system. ENOMEM 4658 * can't be true here. 4659 */ 4660 if (error == ENOMEM) 4661 error = EAGAIN; 4662 return (error); 4663 } 4664 4665 /* ARGSUSED */ 4666 static int 4667 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4668 struct cred *cr, mblk_t **mpp) 4669 { 4670 int error; 4671 4672 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4673 return (EAFNOSUPPORT); 4674 4675 if (so->so_state & SS_CANTSENDMORE) 4676 return (EPIPE); 4677 4678 if (so->so_type != SOCK_STREAM) 4679 return (EOPNOTSUPP); 4680 4681 if ((so->so_state & SS_ISCONNECTED) == 0) 4682 return (ENOTCONN); 4683 4684 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4685 if (error == 0) 4686 *mpp = NULL; 4687 return (error); 4688 } 4689 4690 /* 4691 * Sending data on a datagram socket. 4692 * Assumes caller has verified that SS_ISBOUND etc. are set. 4693 */ 4694 /* ARGSUSED */ 4695 static int 4696 sodgram_direct(struct sonode *so, struct sockaddr *name, 4697 socklen_t namelen, struct uio *uiop, int flags) 4698 { 4699 struct T_unitdata_req tudr; 4700 mblk_t *mp = NULL; 4701 int error = 0; 4702 void *addr; 4703 socklen_t addrlen; 4704 ssize_t len; 4705 struct stdata *stp = SOTOV(so)->v_stream; 4706 int so_state; 4707 queue_t *udp_wq; 4708 boolean_t connected; 4709 mblk_t *mpdata = NULL; 4710 sotpi_info_t *sti = SOTOTPI(so); 4711 uint32_t auditing = AU_AUDITING(); 4712 4713 ASSERT(name != NULL && namelen != 0); 4714 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4715 ASSERT(!(so->so_mode & SM_EXDATA)); 4716 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4717 ASSERT(SOTOV(so)->v_type == VSOCK); 4718 4719 /* Caller checked for proper length */ 4720 len = uiop->uio_resid; 4721 ASSERT(len <= sti->sti_tidu_size); 4722 4723 /* Length and family checks have been done by caller */ 4724 ASSERT(name->sa_family == so->so_family); 4725 ASSERT(so->so_family == AF_INET || 4726 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4727 ASSERT(so->so_family == AF_INET6 || 4728 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4729 4730 addr = name; 4731 addrlen = namelen; 4732 4733 if (stp->sd_sidp != NULL && 4734 (error = straccess(stp, JCWRITE)) != 0) 4735 goto done; 4736 4737 so_state = so->so_state; 4738 4739 connected = so_state & SS_ISCONNECTED; 4740 if (!connected) { 4741 tudr.PRIM_type = T_UNITDATA_REQ; 4742 tudr.DEST_length = addrlen; 4743 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4744 tudr.OPT_length = 0; 4745 tudr.OPT_offset = 0; 4746 4747 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4748 _ALLOC_INTR, CRED()); 4749 if (mp == NULL) { 4750 /* 4751 * Caught a signal waiting for memory. 4752 * Let send* return EINTR. 4753 */ 4754 error = EINTR; 4755 goto done; 4756 } 4757 } 4758 4759 /* 4760 * For UDP we don't break up the copyin into smaller pieces 4761 * as in the TCP case. That means if ENOMEM is returned by 4762 * mcopyinuio() then the uio vector has not been modified at 4763 * all and we fallback to either strwrite() or kstrputmsg() 4764 * below. Note also that we never generate priority messages 4765 * from here. 4766 */ 4767 udp_wq = stp->sd_wrq->q_next; 4768 if (canput(udp_wq) && 4769 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4770 ASSERT(DB_TYPE(mpdata) == M_DATA); 4771 ASSERT(uiop->uio_resid == 0); 4772 if (!connected) 4773 linkb(mp, mpdata); 4774 else 4775 mp = mpdata; 4776 if (auditing) 4777 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4778 4779 udp_wput(udp_wq, mp); 4780 return (0); 4781 } 4782 4783 ASSERT(mpdata == NULL); 4784 if (error != 0 && error != ENOMEM) { 4785 freemsg(mp); 4786 return (error); 4787 } 4788 4789 /* 4790 * For connected, let strwrite() handle the blocking case. 4791 * Otherwise we fall thru and use kstrputmsg(). 4792 */ 4793 if (connected) 4794 return (strwrite(SOTOV(so), uiop, CRED())); 4795 4796 if (auditing) 4797 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4798 4799 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4800 done: 4801 #ifdef SOCK_DEBUG 4802 if (error != 0) { 4803 eprintsoline(so, error); 4804 } 4805 #endif /* SOCK_DEBUG */ 4806 return (error); 4807 } 4808 4809 int 4810 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4811 { 4812 struct stdata *stp = SOTOV(so)->v_stream; 4813 ssize_t iosize, rmax, maxblk; 4814 queue_t *tcp_wq = stp->sd_wrq->q_next; 4815 mblk_t *newmp; 4816 int error = 0, wflag = 0; 4817 4818 ASSERT(so->so_mode & SM_BYTESTREAM); 4819 ASSERT(SOTOV(so)->v_type == VSOCK); 4820 4821 if (stp->sd_sidp != NULL && 4822 (error = straccess(stp, JCWRITE)) != 0) 4823 return (error); 4824 4825 if (uiop == NULL) { 4826 /* 4827 * kstrwritemp() should have checked sd_flag and 4828 * flow-control before coming here. If we end up 4829 * here it means that we can simply pass down the 4830 * data to tcp. 4831 */ 4832 ASSERT(mp != NULL); 4833 if (stp->sd_wputdatafunc != NULL) { 4834 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4835 NULL, NULL, NULL); 4836 if (newmp == NULL) { 4837 /* The caller will free mp */ 4838 return (ECOMM); 4839 } 4840 mp = newmp; 4841 } 4842 tcp_wput(tcp_wq, mp); 4843 return (0); 4844 } 4845 4846 /* Fallback to strwrite() to do proper error handling */ 4847 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4848 return (strwrite(SOTOV(so), uiop, cr)); 4849 4850 rmax = stp->sd_qn_maxpsz; 4851 ASSERT(rmax >= 0 || rmax == INFPSZ); 4852 if (rmax == 0 || uiop->uio_resid <= 0) 4853 return (0); 4854 4855 if (rmax == INFPSZ) 4856 rmax = uiop->uio_resid; 4857 4858 maxblk = stp->sd_maxblk; 4859 4860 for (;;) { 4861 iosize = MIN(uiop->uio_resid, rmax); 4862 4863 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4864 if (mp == NULL) { 4865 /* 4866 * Fallback to strwrite() for ENOMEM; if this 4867 * is our first time in this routine and the uio 4868 * vector has not been modified, we will end up 4869 * calling strwrite() without any flag set. 4870 */ 4871 if (error == ENOMEM) 4872 goto slow_send; 4873 else 4874 return (error); 4875 } 4876 ASSERT(uiop->uio_resid >= 0); 4877 /* 4878 * If mp is non-NULL and ENOMEM is set, it means that 4879 * mcopyinuio() was able to break down some of the user 4880 * data into one or more mblks. Send the partial data 4881 * to tcp and let the rest be handled in strwrite(). 4882 */ 4883 ASSERT(error == 0 || error == ENOMEM); 4884 if (stp->sd_wputdatafunc != NULL) { 4885 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4886 NULL, NULL, NULL); 4887 if (newmp == NULL) { 4888 /* The caller will free mp */ 4889 return (ECOMM); 4890 } 4891 mp = newmp; 4892 } 4893 tcp_wput(tcp_wq, mp); 4894 4895 wflag |= NOINTR; 4896 4897 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4898 ASSERT(error == 0); 4899 break; 4900 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4901 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4902 slow_send: 4903 /* 4904 * We were able to send down partial data using 4905 * the direct call interface, but are now relying 4906 * on strwrite() to handle the non-fastpath cases. 4907 * If the socket is blocking we will sleep in 4908 * strwaitq() until write is permitted, otherwise, 4909 * we will need to return the amount of bytes 4910 * written so far back to the app. This is the 4911 * reason why we pass NOINTR flag to strwrite() 4912 * for non-blocking socket, because we don't want 4913 * to return EAGAIN when portion of the user data 4914 * has actually been sent down. 4915 */ 4916 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4917 } 4918 } 4919 return (0); 4920 } 4921 4922 /* 4923 * Update sti_faddr by asking the transport (unless AF_UNIX). 4924 */ 4925 /* ARGSUSED */ 4926 int 4927 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4928 boolean_t accept, struct cred *cr) 4929 { 4930 struct strbuf strbuf; 4931 int error = 0, res; 4932 void *addr; 4933 t_uscalar_t addrlen; 4934 k_sigset_t smask; 4935 sotpi_info_t *sti = SOTOTPI(so); 4936 4937 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4938 (void *)so, pr_state(so->so_state, so->so_mode))); 4939 4940 ASSERT(*namelen > 0); 4941 mutex_enter(&so->so_lock); 4942 so_lock_single(so); /* Set SOLOCKED */ 4943 4944 if (accept) { 4945 bcopy(sti->sti_faddr_sa, name, 4946 MIN(*namelen, sti->sti_faddr_len)); 4947 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4948 goto done; 4949 } 4950 4951 if (!(so->so_state & SS_ISCONNECTED)) { 4952 error = ENOTCONN; 4953 goto done; 4954 } 4955 /* Added this check for X/Open */ 4956 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4957 error = EINVAL; 4958 if (xnet_check_print) { 4959 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4960 } 4961 goto done; 4962 } 4963 4964 if (sti->sti_faddr_valid) { 4965 bcopy(sti->sti_faddr_sa, name, 4966 MIN(*namelen, sti->sti_faddr_len)); 4967 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4968 goto done; 4969 } 4970 4971 #ifdef DEBUG 4972 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4973 pr_addr(so->so_family, sti->sti_faddr_sa, 4974 (t_uscalar_t)sti->sti_faddr_len))); 4975 #endif /* DEBUG */ 4976 4977 if (so->so_family == AF_UNIX) { 4978 /* Transport has different name space - return local info */ 4979 if (sti->sti_faddr_noxlate) 4980 *namelen = 0; 4981 error = 0; 4982 goto done; 4983 } 4984 4985 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4986 4987 ASSERT(sti->sti_faddr_sa); 4988 /* Allocate local buffer to use with ioctl */ 4989 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4990 mutex_exit(&so->so_lock); 4991 addr = kmem_alloc(addrlen, KM_SLEEP); 4992 4993 /* 4994 * Issue TI_GETPEERNAME with signals masked. 4995 * Put the result in sti_faddr_sa so that getpeername works after 4996 * a shutdown(output). 4997 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4998 * back to the socket. 4999 */ 5000 strbuf.buf = addr; 5001 strbuf.maxlen = addrlen; 5002 strbuf.len = 0; 5003 5004 sigintr(&smask, 0); 5005 res = 0; 5006 ASSERT(cr); 5007 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 5008 0, K_TO_K, cr, &res); 5009 sigunintr(&smask); 5010 5011 mutex_enter(&so->so_lock); 5012 /* 5013 * If there is an error record the error in so_error put don't fail 5014 * the getpeername. Instead fallback on the recorded 5015 * sti->sti_faddr_sa. 5016 */ 5017 if (error) { 5018 /* 5019 * Various stream head errors can be returned to the ioctl. 5020 * However, it is impossible to determine which ones of 5021 * these are really socket level errors that were incorrectly 5022 * consumed by the ioctl. Thus this code silently ignores the 5023 * error - to code explicitly does not reinstate the error 5024 * using soseterror(). 5025 * Experiments have shows that at least this set of 5026 * errors are reported and should not be reinstated on the 5027 * socket: 5028 * EINVAL E.g. if an I_LINK was in effect when 5029 * getpeername was called. 5030 * EPIPE The ioctl error semantics prefer the write 5031 * side error over the read side error. 5032 * ENOTCONN The transport just got disconnected but 5033 * sockfs had not yet seen the T_DISCON_IND 5034 * when issuing the ioctl. 5035 */ 5036 error = 0; 5037 } else if (res == 0 && strbuf.len > 0 && 5038 (so->so_state & SS_ISCONNECTED)) { 5039 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 5040 sti->sti_faddr_len = (socklen_t)strbuf.len; 5041 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 5042 sti->sti_faddr_valid = 1; 5043 5044 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 5045 *namelen = sti->sti_faddr_len; 5046 } 5047 kmem_free(addr, addrlen); 5048 #ifdef DEBUG 5049 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 5050 pr_addr(so->so_family, sti->sti_faddr_sa, 5051 (t_uscalar_t)sti->sti_faddr_len))); 5052 #endif /* DEBUG */ 5053 done: 5054 so_unlock_single(so, SOLOCKED); 5055 mutex_exit(&so->so_lock); 5056 return (error); 5057 } 5058 5059 /* 5060 * Update sti_laddr by asking the transport (unless AF_UNIX). 5061 */ 5062 int 5063 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5064 struct cred *cr) 5065 { 5066 struct strbuf strbuf; 5067 int error = 0, res; 5068 void *addr; 5069 t_uscalar_t addrlen; 5070 k_sigset_t smask; 5071 sotpi_info_t *sti = SOTOTPI(so); 5072 5073 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5074 (void *)so, pr_state(so->so_state, so->so_mode))); 5075 5076 ASSERT(*namelen > 0); 5077 mutex_enter(&so->so_lock); 5078 so_lock_single(so); /* Set SOLOCKED */ 5079 5080 #ifdef DEBUG 5081 5082 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5083 pr_addr(so->so_family, sti->sti_laddr_sa, 5084 (t_uscalar_t)sti->sti_laddr_len))); 5085 #endif /* DEBUG */ 5086 if (sti->sti_laddr_valid) { 5087 bcopy(sti->sti_laddr_sa, name, 5088 MIN(*namelen, sti->sti_laddr_len)); 5089 *namelen = sti->sti_laddr_len; 5090 goto done; 5091 } 5092 5093 if (so->so_family == AF_UNIX) { 5094 /* 5095 * Transport has different name space - return local info. If we 5096 * have enough space, let consumers know the family. 5097 */ 5098 if (*namelen >= sizeof (sa_family_t)) { 5099 name->sa_family = AF_UNIX; 5100 *namelen = sizeof (sa_family_t); 5101 } else { 5102 *namelen = 0; 5103 } 5104 error = 0; 5105 goto done; 5106 } 5107 if (!(so->so_state & SS_ISBOUND)) { 5108 /* If not bound, then nothing to return. */ 5109 error = 0; 5110 goto done; 5111 } 5112 5113 /* Allocate local buffer to use with ioctl */ 5114 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5115 mutex_exit(&so->so_lock); 5116 addr = kmem_alloc(addrlen, KM_SLEEP); 5117 5118 /* 5119 * Issue TI_GETMYNAME with signals masked. 5120 * Put the result in sti_laddr_sa so that getsockname works after 5121 * a shutdown(output). 5122 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5123 * back to the socket. 5124 */ 5125 strbuf.buf = addr; 5126 strbuf.maxlen = addrlen; 5127 strbuf.len = 0; 5128 5129 sigintr(&smask, 0); 5130 res = 0; 5131 ASSERT(cr); 5132 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5133 0, K_TO_K, cr, &res); 5134 sigunintr(&smask); 5135 5136 mutex_enter(&so->so_lock); 5137 /* 5138 * If there is an error record the error in so_error put don't fail 5139 * the getsockname. Instead fallback on the recorded 5140 * sti->sti_laddr_sa. 5141 */ 5142 if (error) { 5143 /* 5144 * Various stream head errors can be returned to the ioctl. 5145 * However, it is impossible to determine which ones of 5146 * these are really socket level errors that were incorrectly 5147 * consumed by the ioctl. Thus this code silently ignores the 5148 * error - to code explicitly does not reinstate the error 5149 * using soseterror(). 5150 * Experiments have shows that at least this set of 5151 * errors are reported and should not be reinstated on the 5152 * socket: 5153 * EINVAL E.g. if an I_LINK was in effect when 5154 * getsockname was called. 5155 * EPIPE The ioctl error semantics prefer the write 5156 * side error over the read side error. 5157 */ 5158 error = 0; 5159 } else if (res == 0 && strbuf.len > 0 && 5160 (so->so_state & SS_ISBOUND)) { 5161 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5162 sti->sti_laddr_len = (socklen_t)strbuf.len; 5163 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5164 sti->sti_laddr_valid = 1; 5165 5166 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5167 *namelen = sti->sti_laddr_len; 5168 } 5169 kmem_free(addr, addrlen); 5170 #ifdef DEBUG 5171 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5172 pr_addr(so->so_family, sti->sti_laddr_sa, 5173 (t_uscalar_t)sti->sti_laddr_len))); 5174 #endif /* DEBUG */ 5175 done: 5176 so_unlock_single(so, SOLOCKED); 5177 mutex_exit(&so->so_lock); 5178 return (error); 5179 } 5180 5181 /* 5182 * Get socket options. For SOL_SOCKET options some options are handled 5183 * by the sockfs while others use the value recorded in the sonode as a 5184 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5185 * 5186 * On the return most *optlenp bytes are copied to optval. 5187 */ 5188 /* ARGSUSED */ 5189 int 5190 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5191 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5192 { 5193 struct T_optmgmt_req optmgmt_req; 5194 struct T_optmgmt_ack *optmgmt_ack; 5195 struct opthdr oh; 5196 struct opthdr *opt_res; 5197 mblk_t *mp = NULL; 5198 int error = 0; 5199 void *option = NULL; /* Set if fallback value */ 5200 t_uscalar_t maxlen = *optlenp; 5201 t_uscalar_t len; 5202 uint32_t value; 5203 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5204 struct timeval32 tmo_val32; 5205 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5206 5207 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5208 (void *)so, level, option_name, optval, (void *)optlenp, 5209 pr_state(so->so_state, so->so_mode))); 5210 5211 mutex_enter(&so->so_lock); 5212 so_lock_single(so); /* Set SOLOCKED */ 5213 5214 /* 5215 * Check for SOL_SOCKET options. 5216 * Certain SOL_SOCKET options are returned directly whereas 5217 * others only provide a default (fallback) value should 5218 * the T_SVR4_OPTMGMT_REQ fail. 5219 */ 5220 if (level == SOL_SOCKET) { 5221 /* Check parameters */ 5222 switch (option_name) { 5223 case SO_TYPE: 5224 case SO_ERROR: 5225 case SO_DEBUG: 5226 case SO_ACCEPTCONN: 5227 case SO_REUSEADDR: 5228 case SO_REUSEPORT: 5229 case SO_KEEPALIVE: 5230 case SO_DONTROUTE: 5231 case SO_BROADCAST: 5232 case SO_USELOOPBACK: 5233 case SO_OOBINLINE: 5234 case SO_SNDBUF: 5235 case SO_RCVBUF: 5236 #ifdef notyet 5237 case SO_SNDLOWAT: 5238 case SO_RCVLOWAT: 5239 #endif /* notyet */ 5240 case SO_DOMAIN: 5241 case SO_DGRAM_ERRIND: 5242 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5243 error = EINVAL; 5244 eprintsoline(so, error); 5245 goto done2; 5246 } 5247 break; 5248 case SO_RCVTIMEO: 5249 case SO_SNDTIMEO: 5250 if (get_udatamodel() == DATAMODEL_NONE || 5251 get_udatamodel() == DATAMODEL_NATIVE) { 5252 if (maxlen < sizeof (struct timeval)) { 5253 error = EINVAL; 5254 eprintsoline(so, error); 5255 goto done2; 5256 } 5257 } else { 5258 if (maxlen < sizeof (struct timeval32)) { 5259 error = EINVAL; 5260 eprintsoline(so, error); 5261 goto done2; 5262 } 5263 5264 } 5265 break; 5266 case SO_LINGER: 5267 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5268 error = EINVAL; 5269 eprintsoline(so, error); 5270 goto done2; 5271 } 5272 break; 5273 case SO_SND_BUFINFO: 5274 if (maxlen < (t_uscalar_t) 5275 sizeof (struct so_snd_bufinfo)) { 5276 error = EINVAL; 5277 eprintsoline(so, error); 5278 goto done2; 5279 } 5280 break; 5281 } 5282 5283 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5284 5285 switch (option_name) { 5286 case SO_TYPE: 5287 value = so->so_type; 5288 option = &value; 5289 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5290 5291 case SO_ERROR: 5292 value = sogeterr(so, B_TRUE); 5293 option = &value; 5294 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5295 5296 case SO_ACCEPTCONN: 5297 if (so->so_state & SS_ACCEPTCONN) 5298 value = SO_ACCEPTCONN; 5299 else 5300 value = 0; 5301 #ifdef DEBUG 5302 if (value) { 5303 dprintso(so, 1, 5304 ("sotpi_getsockopt: 0x%x is set\n", 5305 option_name)); 5306 } else { 5307 dprintso(so, 1, 5308 ("sotpi_getsockopt: 0x%x not set\n", 5309 option_name)); 5310 } 5311 #endif /* DEBUG */ 5312 option = &value; 5313 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5314 5315 case SO_DEBUG: 5316 case SO_REUSEADDR: 5317 case SO_REUSEPORT: 5318 case SO_KEEPALIVE: 5319 case SO_DONTROUTE: 5320 case SO_BROADCAST: 5321 case SO_USELOOPBACK: 5322 case SO_OOBINLINE: 5323 case SO_DGRAM_ERRIND: 5324 value = (so->so_options & option_name); 5325 #ifdef DEBUG 5326 if (value) { 5327 dprintso(so, 1, 5328 ("sotpi_getsockopt: 0x%x is set\n", 5329 option_name)); 5330 } else { 5331 dprintso(so, 1, 5332 ("sotpi_getsockopt: 0x%x not set\n", 5333 option_name)); 5334 } 5335 #endif /* DEBUG */ 5336 option = &value; 5337 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5338 5339 /* 5340 * The following options are only returned by sockfs when the 5341 * T_SVR4_OPTMGMT_REQ fails. 5342 */ 5343 case SO_LINGER: 5344 option = &so->so_linger; 5345 len = (t_uscalar_t)sizeof (struct linger); 5346 break; 5347 case SO_SNDBUF: { 5348 ssize_t lvalue; 5349 5350 /* 5351 * If the option has not been set then get a default 5352 * value from the read queue. This value is 5353 * returned if the transport fails 5354 * the T_SVR4_OPTMGMT_REQ. 5355 */ 5356 lvalue = so->so_sndbuf; 5357 if (lvalue == 0) { 5358 mutex_exit(&so->so_lock); 5359 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5360 QHIWAT, 0, &lvalue); 5361 mutex_enter(&so->so_lock); 5362 dprintso(so, 1, 5363 ("got SO_SNDBUF %ld from q\n", lvalue)); 5364 } 5365 value = (int)lvalue; 5366 option = &value; 5367 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5368 break; 5369 } 5370 case SO_RCVBUF: { 5371 ssize_t lvalue; 5372 5373 /* 5374 * If the option has not been set then get a default 5375 * value from the read queue. This value is 5376 * returned if the transport fails 5377 * the T_SVR4_OPTMGMT_REQ. 5378 * 5379 * XXX If SO_RCVBUF has been set and this is an 5380 * XPG 4.2 application then do not ask the transport 5381 * since the transport might adjust the value and not 5382 * return exactly what was set by the application. 5383 * For non-XPG 4.2 application we return the value 5384 * that the transport is actually using. 5385 */ 5386 lvalue = so->so_rcvbuf; 5387 if (lvalue == 0) { 5388 mutex_exit(&so->so_lock); 5389 (void) strqget(RD(strvp2wq(SOTOV(so))), 5390 QHIWAT, 0, &lvalue); 5391 mutex_enter(&so->so_lock); 5392 dprintso(so, 1, 5393 ("got SO_RCVBUF %ld from q\n", lvalue)); 5394 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5395 value = (int)lvalue; 5396 option = &value; 5397 goto copyout; /* skip asking transport */ 5398 } 5399 value = (int)lvalue; 5400 option = &value; 5401 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5402 break; 5403 } 5404 case SO_DOMAIN: 5405 value = so->so_family; 5406 option = &value; 5407 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5408 5409 #ifdef notyet 5410 /* 5411 * We do not implement the semantics of these options 5412 * thus we shouldn't implement the options either. 5413 */ 5414 case SO_SNDLOWAT: 5415 value = so->so_sndlowat; 5416 option = &value; 5417 break; 5418 case SO_RCVLOWAT: 5419 value = so->so_rcvlowat; 5420 option = &value; 5421 break; 5422 #endif /* notyet */ 5423 case SO_SNDTIMEO: 5424 case SO_RCVTIMEO: { 5425 clock_t val; 5426 5427 if (option_name == SO_RCVTIMEO) 5428 val = drv_hztousec(so->so_rcvtimeo); 5429 else 5430 val = drv_hztousec(so->so_sndtimeo); 5431 tmo_val.tv_sec = val / (1000 * 1000); 5432 tmo_val.tv_usec = val % (1000 * 1000); 5433 if (get_udatamodel() == DATAMODEL_NONE || 5434 get_udatamodel() == DATAMODEL_NATIVE) { 5435 option = &tmo_val; 5436 len = sizeof (struct timeval); 5437 } else { 5438 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5439 option = &tmo_val32; 5440 len = sizeof (struct timeval32); 5441 } 5442 break; 5443 } 5444 case SO_SND_BUFINFO: { 5445 snd_bufinfo.sbi_wroff = 5446 (so->so_proto_props).sopp_wroff; 5447 snd_bufinfo.sbi_maxblk = 5448 (so->so_proto_props).sopp_maxblk; 5449 snd_bufinfo.sbi_maxpsz = 5450 (so->so_proto_props).sopp_maxpsz; 5451 snd_bufinfo.sbi_tail = 5452 (so->so_proto_props).sopp_tail; 5453 option = &snd_bufinfo; 5454 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5455 break; 5456 } 5457 } 5458 } 5459 5460 mutex_exit(&so->so_lock); 5461 5462 /* Send request */ 5463 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5464 optmgmt_req.MGMT_flags = T_CHECK; 5465 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5466 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5467 5468 oh.level = level; 5469 oh.name = option_name; 5470 oh.len = maxlen; 5471 5472 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5473 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5474 /* Let option management work in the presence of data flow control */ 5475 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5476 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5477 mp = NULL; 5478 mutex_enter(&so->so_lock); 5479 if (error) { 5480 eprintsoline(so, error); 5481 goto done2; 5482 } 5483 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5484 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5485 if (error) { 5486 if (option != NULL) { 5487 /* We have a fallback value */ 5488 error = 0; 5489 goto copyout; 5490 } 5491 eprintsoline(so, error); 5492 goto done2; 5493 } 5494 ASSERT(mp); 5495 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5496 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5497 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5498 if (opt_res == NULL) { 5499 if (option != NULL) { 5500 /* We have a fallback value */ 5501 error = 0; 5502 goto copyout; 5503 } 5504 error = EPROTO; 5505 eprintsoline(so, error); 5506 goto done; 5507 } 5508 option = &opt_res[1]; 5509 5510 /* check to ensure that the option is within bounds */ 5511 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5512 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5513 if (option != NULL) { 5514 /* We have a fallback value */ 5515 error = 0; 5516 goto copyout; 5517 } 5518 error = EPROTO; 5519 eprintsoline(so, error); 5520 goto done; 5521 } 5522 5523 len = opt_res->len; 5524 5525 copyout: { 5526 t_uscalar_t size = MIN(len, maxlen); 5527 bcopy(option, optval, size); 5528 bcopy(&size, optlenp, sizeof (size)); 5529 } 5530 done: 5531 freemsg(mp); 5532 done2: 5533 so_unlock_single(so, SOLOCKED); 5534 mutex_exit(&so->so_lock); 5535 5536 return (error); 5537 } 5538 5539 /* 5540 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5541 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5542 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5543 * setsockopt has to work even if the transport does not support the option. 5544 */ 5545 /* ARGSUSED */ 5546 int 5547 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5548 const void *optval, t_uscalar_t optlen, struct cred *cr) 5549 { 5550 struct T_optmgmt_req optmgmt_req; 5551 struct opthdr oh; 5552 mblk_t *mp; 5553 int error = 0; 5554 boolean_t handled = B_FALSE; 5555 5556 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5557 (void *)so, level, option_name, optval, optlen, 5558 pr_state(so->so_state, so->so_mode))); 5559 5560 /* X/Open requires this check */ 5561 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5562 if (xnet_check_print) 5563 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5564 return (EINVAL); 5565 } 5566 5567 mutex_enter(&so->so_lock); 5568 so_lock_single(so); /* Set SOLOCKED */ 5569 mutex_exit(&so->so_lock); 5570 5571 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5572 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5573 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5574 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5575 5576 oh.level = level; 5577 oh.name = option_name; 5578 oh.len = optlen; 5579 5580 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5581 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5582 /* Let option management work in the presence of data flow control */ 5583 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5584 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5585 mp = NULL; 5586 mutex_enter(&so->so_lock); 5587 if (error) { 5588 eprintsoline(so, error); 5589 goto done2; 5590 } 5591 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5592 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5593 if (error) { 5594 eprintsoline(so, error); 5595 goto done; 5596 } 5597 ASSERT(mp); 5598 /* No need to verify T_optmgmt_ack */ 5599 freemsg(mp); 5600 done: 5601 /* 5602 * Check for SOL_SOCKET options and record their values. 5603 * If we know about a SOL_SOCKET parameter and the transport 5604 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5605 * EPROTO) we let the setsockopt succeed. 5606 */ 5607 if (level == SOL_SOCKET) { 5608 /* Check parameters */ 5609 switch (option_name) { 5610 case SO_DEBUG: 5611 case SO_REUSEADDR: 5612 case SO_REUSEPORT: 5613 case SO_KEEPALIVE: 5614 case SO_DONTROUTE: 5615 case SO_BROADCAST: 5616 case SO_USELOOPBACK: 5617 case SO_OOBINLINE: 5618 case SO_SNDBUF: 5619 case SO_RCVBUF: 5620 #ifdef notyet 5621 case SO_SNDLOWAT: 5622 case SO_RCVLOWAT: 5623 #endif /* notyet */ 5624 case SO_DGRAM_ERRIND: 5625 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5626 error = EINVAL; 5627 eprintsoline(so, error); 5628 goto done2; 5629 } 5630 ASSERT(optval); 5631 handled = B_TRUE; 5632 break; 5633 case SO_SNDTIMEO: 5634 case SO_RCVTIMEO: 5635 if (get_udatamodel() == DATAMODEL_NONE || 5636 get_udatamodel() == DATAMODEL_NATIVE) { 5637 if (optlen != sizeof (struct timeval)) { 5638 error = EINVAL; 5639 eprintsoline(so, error); 5640 goto done2; 5641 } 5642 } else { 5643 if (optlen != sizeof (struct timeval32)) { 5644 error = EINVAL; 5645 eprintsoline(so, error); 5646 goto done2; 5647 } 5648 } 5649 ASSERT(optval); 5650 handled = B_TRUE; 5651 break; 5652 case SO_LINGER: 5653 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5654 error = EINVAL; 5655 eprintsoline(so, error); 5656 goto done2; 5657 } 5658 ASSERT(optval); 5659 handled = B_TRUE; 5660 break; 5661 } 5662 5663 #define intvalue (*(int32_t *)optval) 5664 5665 switch (option_name) { 5666 case SO_TYPE: 5667 case SO_ERROR: 5668 case SO_ACCEPTCONN: 5669 /* Can't be set */ 5670 error = ENOPROTOOPT; 5671 goto done2; 5672 case SO_LINGER: { 5673 struct linger *l = (struct linger *)optval; 5674 5675 so->so_linger.l_linger = l->l_linger; 5676 if (l->l_onoff) { 5677 so->so_linger.l_onoff = SO_LINGER; 5678 so->so_options |= SO_LINGER; 5679 } else { 5680 so->so_linger.l_onoff = 0; 5681 so->so_options &= ~SO_LINGER; 5682 } 5683 break; 5684 } 5685 5686 case SO_DEBUG: 5687 #ifdef SOCK_TEST 5688 if (intvalue & 2) 5689 sock_test_timelimit = 10 * hz; 5690 else 5691 sock_test_timelimit = 0; 5692 5693 if (intvalue & 4) 5694 do_useracc = 0; 5695 else 5696 do_useracc = 1; 5697 #endif /* SOCK_TEST */ 5698 /* FALLTHRU */ 5699 case SO_REUSEADDR: 5700 case SO_REUSEPORT: 5701 case SO_KEEPALIVE: 5702 case SO_DONTROUTE: 5703 case SO_BROADCAST: 5704 case SO_USELOOPBACK: 5705 case SO_OOBINLINE: 5706 case SO_DGRAM_ERRIND: 5707 if (intvalue != 0) { 5708 dprintso(so, 1, 5709 ("socket_setsockopt: setting 0x%x\n", 5710 option_name)); 5711 so->so_options |= option_name; 5712 } else { 5713 dprintso(so, 1, 5714 ("socket_setsockopt: clearing 0x%x\n", 5715 option_name)); 5716 so->so_options &= ~option_name; 5717 } 5718 break; 5719 /* 5720 * The following options are only returned by us when the 5721 * transport layer fails. 5722 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5723 * since the transport might adjust the value and not 5724 * return exactly what was set by the application. 5725 */ 5726 case SO_SNDBUF: 5727 so->so_sndbuf = intvalue; 5728 break; 5729 case SO_RCVBUF: 5730 so->so_rcvbuf = intvalue; 5731 break; 5732 case SO_RCVPSH: 5733 so->so_rcv_timer_interval = intvalue; 5734 break; 5735 #ifdef notyet 5736 /* 5737 * We do not implement the semantics of these options 5738 * thus we shouldn't implement the options either. 5739 */ 5740 case SO_SNDLOWAT: 5741 so->so_sndlowat = intvalue; 5742 break; 5743 case SO_RCVLOWAT: 5744 so->so_rcvlowat = intvalue; 5745 break; 5746 #endif /* notyet */ 5747 case SO_SNDTIMEO: 5748 case SO_RCVTIMEO: { 5749 struct timeval tl; 5750 clock_t val; 5751 5752 if (get_udatamodel() == DATAMODEL_NONE || 5753 get_udatamodel() == DATAMODEL_NATIVE) 5754 bcopy(&tl, (struct timeval *)optval, 5755 sizeof (struct timeval)); 5756 else 5757 TIMEVAL32_TO_TIMEVAL(&tl, 5758 (struct timeval32 *)optval); 5759 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5760 if (option_name == SO_RCVTIMEO) 5761 so->so_rcvtimeo = drv_usectohz(val); 5762 else 5763 so->so_sndtimeo = drv_usectohz(val); 5764 break; 5765 } 5766 } 5767 #undef intvalue 5768 5769 if (error) { 5770 if ((error == ENOPROTOOPT || error == EPROTO || 5771 error == EINVAL) && handled) { 5772 dprintso(so, 1, 5773 ("setsockopt: ignoring error %d for 0x%x\n", 5774 error, option_name)); 5775 error = 0; 5776 } 5777 } 5778 } 5779 done2: 5780 so_unlock_single(so, SOLOCKED); 5781 mutex_exit(&so->so_lock); 5782 return (error); 5783 } 5784 5785 /* 5786 * sotpi_close() is called when the last open reference goes away. 5787 */ 5788 /* ARGSUSED */ 5789 int 5790 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5791 { 5792 struct vnode *vp = SOTOV(so); 5793 dev_t dev; 5794 int error = 0; 5795 sotpi_info_t *sti = SOTOTPI(so); 5796 5797 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5798 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5799 5800 dev = sti->sti_dev; 5801 5802 ASSERT(STREAMSTAB(getmajor(dev))); 5803 5804 mutex_enter(&so->so_lock); 5805 so_lock_single(so); /* Set SOLOCKED */ 5806 5807 ASSERT(so_verify_oobstate(so)); 5808 5809 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5810 sti->sti_nl7c_flags = 0; 5811 nl7c_close(so); 5812 } 5813 5814 if (vp->v_stream != NULL) { 5815 vnode_t *ux_vp; 5816 5817 if (so->so_family == AF_UNIX) { 5818 /* Could avoid this when CANTSENDMORE for !dgram */ 5819 so_unix_close(so); 5820 } 5821 5822 mutex_exit(&so->so_lock); 5823 /* 5824 * Disassemble the linkage from the AF_UNIX underlying file 5825 * system vnode to this socket (by atomically clearing 5826 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5827 * and frees the stream head. 5828 */ 5829 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5830 ASSERT(ux_vp->v_stream); 5831 sti->sti_ux_bound_vp = NULL; 5832 vn_rele_stream(ux_vp); 5833 } 5834 error = strclose(vp, flag, cr); 5835 vp->v_stream = NULL; 5836 mutex_enter(&so->so_lock); 5837 } 5838 5839 /* 5840 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5841 */ 5842 so_flush_discon_ind(so); 5843 5844 so_unlock_single(so, SOLOCKED); 5845 mutex_exit(&so->so_lock); 5846 5847 /* 5848 * Needed for STREAMs. 5849 * Decrement the device driver's reference count for streams 5850 * opened via the clone dip. The driver was held in clone_open(). 5851 * The absence of clone_close() forces this asymmetry. 5852 */ 5853 if (so->so_flag & SOCLONE) 5854 ddi_rele_driver(getmajor(dev)); 5855 5856 return (error); 5857 } 5858 5859 static int 5860 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5861 struct cred *cr, int32_t *rvalp) 5862 { 5863 struct vnode *vp = SOTOV(so); 5864 sotpi_info_t *sti = SOTOTPI(so); 5865 int error = 0; 5866 5867 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5868 cmd, arg, pr_state(so->so_state, so->so_mode))); 5869 5870 switch (cmd) { 5871 case SIOCSQPTR: 5872 /* 5873 * SIOCSQPTR is valid only when helper stream is created 5874 * by the protocol. 5875 */ 5876 case _I_INSERT: 5877 case _I_REMOVE: 5878 /* 5879 * Since there's no compelling reason to support these ioctls 5880 * on sockets, and doing so would increase the complexity 5881 * markedly, prevent it. 5882 */ 5883 return (EOPNOTSUPP); 5884 5885 case I_FIND: 5886 case I_LIST: 5887 case I_LOOK: 5888 case I_POP: 5889 case I_PUSH: 5890 /* 5891 * To prevent races and inconsistencies between the actual 5892 * state of the stream and the state according to the sonode, 5893 * we serialize all operations which modify or operate on the 5894 * list of modules on the socket's stream. 5895 */ 5896 mutex_enter(&sti->sti_plumb_lock); 5897 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5898 mutex_exit(&sti->sti_plumb_lock); 5899 return (error); 5900 5901 default: 5902 if (so->so_version != SOV_STREAM) 5903 break; 5904 5905 /* 5906 * The imaginary "sockmod" has been popped; act as a stream. 5907 */ 5908 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5909 } 5910 5911 ASSERT(so->so_version != SOV_STREAM); 5912 5913 /* 5914 * Process socket-specific ioctls. 5915 */ 5916 switch (cmd) { 5917 case FIONBIO: { 5918 int32_t value; 5919 5920 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5921 (mode & (int)FKIOCTL))) 5922 return (EFAULT); 5923 5924 mutex_enter(&so->so_lock); 5925 if (value) { 5926 so->so_state |= SS_NDELAY; 5927 } else { 5928 so->so_state &= ~SS_NDELAY; 5929 } 5930 mutex_exit(&so->so_lock); 5931 return (0); 5932 } 5933 5934 case FIOASYNC: { 5935 int32_t value; 5936 5937 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5938 (mode & (int)FKIOCTL))) 5939 return (EFAULT); 5940 5941 mutex_enter(&so->so_lock); 5942 /* 5943 * SS_ASYNC flag not already set correctly? 5944 * (!value != !(so->so_state & SS_ASYNC)) 5945 * but some engineers find that too hard to read. 5946 */ 5947 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5948 value != 0 && (so->so_state & SS_ASYNC) == 0) 5949 error = so_flip_async(so, vp, mode, cr); 5950 mutex_exit(&so->so_lock); 5951 return (error); 5952 } 5953 5954 case SIOCSPGRP: 5955 case FIOSETOWN: { 5956 pid_t pgrp; 5957 5958 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5959 (mode & (int)FKIOCTL))) 5960 return (EFAULT); 5961 5962 mutex_enter(&so->so_lock); 5963 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5964 /* Any change? */ 5965 if (pgrp != so->so_pgrp) 5966 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5967 mutex_exit(&so->so_lock); 5968 return (error); 5969 } 5970 case SIOCGPGRP: 5971 case FIOGETOWN: 5972 if (so_copyout(&so->so_pgrp, (void *)arg, 5973 sizeof (pid_t), (mode & (int)FKIOCTL))) 5974 return (EFAULT); 5975 return (0); 5976 5977 case SIOCATMARK: { 5978 int retval; 5979 uint_t so_state; 5980 5981 /* 5982 * strwaitmark has a finite timeout after which it 5983 * returns -1 if the mark state is undetermined. 5984 * In order to avoid any race between the mark state 5985 * in sockfs and the mark state in the stream head this 5986 * routine loops until the mark state can be determined 5987 * (or the urgent data indication has been removed by some 5988 * other thread). 5989 */ 5990 do { 5991 mutex_enter(&so->so_lock); 5992 so_state = so->so_state; 5993 mutex_exit(&so->so_lock); 5994 if (so_state & SS_RCVATMARK) { 5995 retval = 1; 5996 } else if (!(so_state & SS_OOBPEND)) { 5997 /* 5998 * No SIGURG has been generated -- there is no 5999 * pending or present urgent data. Thus can't 6000 * possibly be at the mark. 6001 */ 6002 retval = 0; 6003 } else { 6004 /* 6005 * Have the stream head wait until there is 6006 * either some messages on the read queue, or 6007 * STRATMARK or STRNOTATMARK gets set. The 6008 * STRNOTATMARK flag is used so that the 6009 * transport can send up a MSGNOTMARKNEXT 6010 * M_DATA to indicate that it is not 6011 * at the mark and additional data is not about 6012 * to be send upstream. 6013 * 6014 * If the mark state is undetermined this will 6015 * return -1 and we will loop rechecking the 6016 * socket state. 6017 */ 6018 retval = strwaitmark(vp); 6019 } 6020 } while (retval == -1); 6021 6022 if (so_copyout(&retval, (void *)arg, sizeof (int), 6023 (mode & (int)FKIOCTL))) 6024 return (EFAULT); 6025 return (0); 6026 } 6027 6028 case I_FDINSERT: 6029 case I_SENDFD: 6030 case I_RECVFD: 6031 case I_ATMARK: 6032 case _SIOCSOCKFALLBACK: 6033 /* 6034 * These ioctls do not apply to sockets. I_FDINSERT can be 6035 * used to send M_PROTO messages without modifying the socket 6036 * state. I_SENDFD/RECVFD should not be used for socket file 6037 * descriptor passing since they assume a twisted stream. 6038 * SIOCATMARK must be used instead of I_ATMARK. 6039 * 6040 * _SIOCSOCKFALLBACK from an application should never be 6041 * processed. It is only generated by socktpi_open() or 6042 * in response to I_POP or I_PUSH. 6043 */ 6044 #ifdef DEBUG 6045 zcmn_err(getzoneid(), CE_WARN, 6046 "Unsupported STREAMS ioctl 0x%x on socket. " 6047 "Pid = %d\n", cmd, curproc->p_pid); 6048 #endif /* DEBUG */ 6049 return (EOPNOTSUPP); 6050 6051 case _I_GETPEERCRED: 6052 if ((mode & FKIOCTL) == 0) 6053 return (EINVAL); 6054 6055 mutex_enter(&so->so_lock); 6056 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6057 error = ENOTSUP; 6058 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6059 error = ENOTCONN; 6060 } else if (so->so_peercred != NULL) { 6061 k_peercred_t *kp = (k_peercred_t *)arg; 6062 kp->pc_cr = so->so_peercred; 6063 kp->pc_cpid = so->so_cpid; 6064 crhold(so->so_peercred); 6065 } else { 6066 error = EINVAL; 6067 } 6068 mutex_exit(&so->so_lock); 6069 return (error); 6070 6071 default: 6072 /* 6073 * Do the higher-order bits of the ioctl cmd indicate 6074 * that it is an I_* streams ioctl? 6075 */ 6076 if ((cmd & 0xffffff00U) == STR && 6077 so->so_version == SOV_SOCKBSD) { 6078 #ifdef DEBUG 6079 zcmn_err(getzoneid(), CE_WARN, 6080 "Unsupported STREAMS ioctl 0x%x on socket. " 6081 "Pid = %d\n", cmd, curproc->p_pid); 6082 #endif /* DEBUG */ 6083 return (EOPNOTSUPP); 6084 } 6085 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6086 } 6087 } 6088 6089 /* 6090 * Handle plumbing-related ioctls. 6091 */ 6092 static int 6093 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6094 struct cred *cr, int32_t *rvalp) 6095 { 6096 static const char sockmod_name[] = "sockmod"; 6097 struct sonode *so = VTOSO(vp); 6098 char mname[FMNAMESZ + 1]; 6099 int error; 6100 sotpi_info_t *sti = SOTOTPI(so); 6101 6102 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6103 6104 if (so->so_version == SOV_SOCKBSD) 6105 return (EOPNOTSUPP); 6106 6107 if (so->so_version == SOV_STREAM) { 6108 /* 6109 * The imaginary "sockmod" has been popped - act as a stream. 6110 * If this is a push of sockmod then change back to a socket. 6111 */ 6112 if (cmd == I_PUSH) { 6113 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6114 (void *)arg, mname, sizeof (mname), NULL); 6115 6116 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6117 dprintso(so, 0, ("socktpi_ioctl: going to " 6118 "socket version\n")); 6119 so_stream2sock(so); 6120 return (0); 6121 } 6122 } 6123 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6124 } 6125 6126 switch (cmd) { 6127 case I_PUSH: 6128 if (sti->sti_direct) { 6129 mutex_enter(&so->so_lock); 6130 so_lock_single(so); 6131 mutex_exit(&so->so_lock); 6132 6133 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6134 cr, rvalp); 6135 6136 mutex_enter(&so->so_lock); 6137 if (error == 0) 6138 sti->sti_direct = 0; 6139 so_unlock_single(so, SOLOCKED); 6140 mutex_exit(&so->so_lock); 6141 6142 if (error != 0) 6143 return (error); 6144 } 6145 6146 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6147 if (error == 0) 6148 sti->sti_pushcnt++; 6149 return (error); 6150 6151 case I_POP: 6152 if (sti->sti_pushcnt == 0) { 6153 /* Emulate sockmod being popped */ 6154 dprintso(so, 0, 6155 ("socktpi_ioctl: going to STREAMS version\n")); 6156 return (so_sock2stream(so)); 6157 } 6158 6159 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6160 if (error == 0) 6161 sti->sti_pushcnt--; 6162 return (error); 6163 6164 case I_LIST: { 6165 struct str_mlist *kmlistp, *umlistp; 6166 struct str_list kstrlist; 6167 ssize_t kstrlistsize; 6168 int i, nmods; 6169 6170 STRUCT_DECL(str_list, ustrlist); 6171 STRUCT_INIT(ustrlist, mode); 6172 6173 if (arg == NULL) { 6174 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6175 if (error == 0) 6176 (*rvalp)++; /* Add one for sockmod */ 6177 return (error); 6178 } 6179 6180 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6181 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6182 if (error != 0) 6183 return (error); 6184 6185 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6186 if (nmods <= 0) 6187 return (EINVAL); 6188 /* 6189 * Ceiling nmods at nstrpush to prevent someone from 6190 * maliciously consuming lots of kernel memory. 6191 */ 6192 nmods = MIN(nmods, nstrpush); 6193 6194 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6195 kstrlist.sl_nmods = nmods; 6196 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6197 6198 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6199 cr, rvalp); 6200 if (error != 0) 6201 goto done; 6202 6203 /* 6204 * Considering the module list as a 0-based array of sl_nmods 6205 * modules, sockmod should conceptually exist at slot 6206 * sti_pushcnt. Insert sockmod at this location by sliding all 6207 * of the module names after so_pushcnt over by one. We know 6208 * that there will be room to do this since we allocated 6209 * sl_modlist with an additional slot. 6210 */ 6211 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6212 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6213 6214 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6215 kstrlist.sl_nmods++; 6216 6217 /* 6218 * Copy all of the entries out to ustrlist. 6219 */ 6220 kmlistp = kstrlist.sl_modlist; 6221 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6222 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6223 error = so_copyout(kmlistp++, umlistp++, 6224 sizeof (struct str_mlist), mode & FKIOCTL); 6225 if (error != 0) 6226 goto done; 6227 } 6228 6229 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6230 mode & FKIOCTL); 6231 if (error == 0) 6232 *rvalp = 0; 6233 done: 6234 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6235 return (error); 6236 } 6237 case I_LOOK: 6238 if (sti->sti_pushcnt == 0) { 6239 return (so_copyout(sockmod_name, (void *)arg, 6240 sizeof (sockmod_name), mode & FKIOCTL)); 6241 } 6242 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6243 6244 case I_FIND: 6245 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6246 if (error && error != EINVAL) 6247 return (error); 6248 6249 /* if not found and string was sockmod return 1 */ 6250 if (*rvalp == 0 || error == EINVAL) { 6251 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6252 (void *)arg, mname, sizeof (mname), NULL); 6253 if (error == ENAMETOOLONG) 6254 error = EINVAL; 6255 6256 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6257 *rvalp = 1; 6258 } 6259 return (error); 6260 6261 default: 6262 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6263 break; 6264 } 6265 6266 return (0); 6267 } 6268 6269 /* 6270 * Wrapper around the streams poll routine that implements socket poll 6271 * semantics. 6272 * The sockfs never calls pollwakeup itself - the stream head take care 6273 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6274 * stream head there can never be a deadlock due to holding so_lock across 6275 * pollwakeup and acquiring so_lock in this routine. 6276 * 6277 * However, since the performance of VOP_POLL is critical we avoid 6278 * acquiring so_lock here. This is based on two assumptions: 6279 * - The poll implementation holds locks to serialize the VOP_POLL call 6280 * and a pollwakeup for the same pollhead. This ensures that should 6281 * e.g. so_state change during a socktpi_poll call the pollwakeup 6282 * (which strsock_* and strrput conspire to issue) is issued after 6283 * the state change. Thus the pollwakeup will block until VOP_POLL has 6284 * returned and then wake up poll and have it call VOP_POLL again. 6285 * - The reading of so_state without holding so_lock does not result in 6286 * stale data that is older than the latest state change that has dropped 6287 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6288 * memory barrier to force the data into the coherency domain. 6289 */ 6290 static int 6291 sotpi_poll( 6292 struct sonode *so, 6293 short events, 6294 int anyyet, 6295 short *reventsp, 6296 struct pollhead **phpp) 6297 { 6298 short origevents = events; 6299 struct vnode *vp = SOTOV(so); 6300 int error; 6301 int so_state = so->so_state; /* snapshot */ 6302 sotpi_info_t *sti = SOTOTPI(so); 6303 6304 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6305 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6306 6307 ASSERT(vp->v_type == VSOCK); 6308 ASSERT(vp->v_stream != NULL); 6309 6310 if (so->so_version == SOV_STREAM) { 6311 /* The imaginary "sockmod" has been popped - act as a stream */ 6312 return (strpoll(vp->v_stream, events, anyyet, 6313 reventsp, phpp)); 6314 } 6315 6316 if (!(so_state & SS_ISCONNECTED) && 6317 (so->so_mode & SM_CONNREQUIRED)) { 6318 /* Not connected yet - turn off write side events */ 6319 events &= ~(POLLOUT|POLLWRBAND); 6320 } 6321 /* 6322 * Check for errors without calling strpoll if the caller wants them. 6323 * In sockets the errors are represented as input/output events 6324 * and there is no need to ask the stream head for this information. 6325 */ 6326 if (so->so_error != 0 && 6327 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6328 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6329 return (0); 6330 } 6331 /* 6332 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6333 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6334 * will not trigger a POLLIN event with POLLRDDATA set. 6335 * The handling of urgent data (causing POLLRDBAND) is done by 6336 * inspecting SS_OOBPEND below. 6337 */ 6338 events |= POLLRDDATA; 6339 6340 /* 6341 * After shutdown(output) a stream head write error is set. 6342 * However, we should not return output events. 6343 */ 6344 events |= POLLNOERR; 6345 error = strpoll(vp->v_stream, events, anyyet, 6346 reventsp, phpp); 6347 if (error) 6348 return (error); 6349 6350 ASSERT(!(*reventsp & POLLERR)); 6351 6352 /* 6353 * Notes on T_CONN_IND handling for sockets. 6354 * 6355 * If strpoll() returned without events, SR_POLLIN is guaranteed 6356 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6357 * 6358 * Since the so_lock is not held, soqueueconnind() may have run 6359 * and a T_CONN_IND may be waiting. We now check for any queued 6360 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6361 * to ensure poll returns. 6362 * 6363 * However: 6364 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6365 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6366 * the following actions will occur; taken together they ensure the 6367 * syscall will return. 6368 * 6369 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6370 * the accept() was run on a non-blocking socket sowaitconnind() 6371 * may have already returned EWOULDBLOCK, so not be waiting to 6372 * process the message. Additionally socktpi_poll() has probably 6373 * proceeded past the sti_conn_ind_head check below. 6374 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6375 * this thread, however that could occur before poll_common() 6376 * has entered cv_wait. 6377 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6378 * 6379 * Before proceeding to cv_wait() in poll_common() for an event, 6380 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6381 * and if set, re-calls strpoll() to ensure the late arriving 6382 * T_CONN_IND is recognized, and pollsys() returns. 6383 */ 6384 6385 if (sti->sti_conn_ind_head != NULL) 6386 *reventsp |= (POLLIN|POLLRDNORM) & events; 6387 6388 if (so->so_state & SS_CANTRCVMORE) { 6389 *reventsp |= POLLRDHUP & events; 6390 6391 if (so->so_state & SS_CANTSENDMORE) 6392 *reventsp |= POLLHUP; 6393 } 6394 6395 if (so->so_state & SS_OOBPEND) 6396 *reventsp |= POLLRDBAND & events; 6397 6398 if (sti->sti_nl7c_rcv_mp != NULL) { 6399 *reventsp |= (POLLIN|POLLRDNORM) & events; 6400 } 6401 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6402 ((POLLIN|POLLRDNORM) & *reventsp)) { 6403 sti->sti_nl7c_flags |= NL7C_POLLIN; 6404 } 6405 6406 return (0); 6407 } 6408 6409 /*ARGSUSED*/ 6410 static int 6411 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6412 { 6413 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6414 int error = 0; 6415 6416 error = sonode_constructor(buf, cdrarg, kmflags); 6417 if (error != 0) 6418 return (error); 6419 6420 error = i_sotpi_info_constructor(&st->st_info); 6421 if (error != 0) 6422 sonode_destructor(buf, cdrarg); 6423 6424 st->st_sonode.so_priv = &st->st_info; 6425 6426 return (error); 6427 } 6428 6429 /*ARGSUSED1*/ 6430 static void 6431 socktpi_destructor(void *buf, void *cdrarg) 6432 { 6433 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6434 6435 ASSERT(st->st_sonode.so_priv == &st->st_info); 6436 st->st_sonode.so_priv = NULL; 6437 6438 i_sotpi_info_destructor(&st->st_info); 6439 sonode_destructor(buf, cdrarg); 6440 } 6441 6442 static int 6443 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6444 { 6445 int retval; 6446 6447 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6448 struct sonode *so = (struct sonode *)buf; 6449 sotpi_info_t *sti = SOTOTPI(so); 6450 6451 mutex_enter(&socklist.sl_lock); 6452 6453 sti->sti_next_so = socklist.sl_list; 6454 sti->sti_prev_so = NULL; 6455 if (sti->sti_next_so != NULL) 6456 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6457 socklist.sl_list = so; 6458 6459 mutex_exit(&socklist.sl_lock); 6460 6461 } 6462 return (retval); 6463 } 6464 6465 static void 6466 socktpi_unix_destructor(void *buf, void *cdrarg) 6467 { 6468 struct sonode *so = (struct sonode *)buf; 6469 sotpi_info_t *sti = SOTOTPI(so); 6470 6471 mutex_enter(&socklist.sl_lock); 6472 6473 if (sti->sti_next_so != NULL) 6474 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6475 if (sti->sti_prev_so != NULL) 6476 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6477 else 6478 socklist.sl_list = sti->sti_next_so; 6479 6480 mutex_exit(&socklist.sl_lock); 6481 6482 socktpi_destructor(buf, cdrarg); 6483 } 6484 6485 int 6486 socktpi_init(void) 6487 { 6488 /* 6489 * Create sonode caches. We create a special one for AF_UNIX so 6490 * that we can track them for netstat(1m). 6491 */ 6492 socktpi_cache = kmem_cache_create("socktpi_cache", 6493 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6494 socktpi_destructor, NULL, NULL, NULL, 0); 6495 6496 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6497 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6498 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6499 6500 return (0); 6501 } 6502 6503 /* 6504 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6505 * 6506 * Caller must still update state and mode using sotpi_update_state(). 6507 */ 6508 int 6509 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6510 boolean_t *direct, queue_t **qp, struct cred *cr) 6511 { 6512 sotpi_info_t *sti; 6513 struct sockparams *origsp = so->so_sockparams; 6514 sock_lower_handle_t handle = so->so_proto_handle; 6515 struct stdata *stp; 6516 struct vnode *vp; 6517 queue_t *q; 6518 int error = 0; 6519 6520 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6521 SS_FALLBACK_PENDING); 6522 ASSERT(SOCK_IS_NONSTR(so)); 6523 6524 *qp = NULL; 6525 *direct = B_FALSE; 6526 so->so_sockparams = newsp; 6527 /* 6528 * Allocate and initalize fields required by TPI. 6529 */ 6530 (void) sotpi_info_create(so, KM_SLEEP); 6531 sotpi_info_init(so); 6532 6533 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6534 sotpi_info_fini(so); 6535 sotpi_info_destroy(so); 6536 return (error); 6537 } 6538 ASSERT(handle == so->so_proto_handle); 6539 sti = SOTOTPI(so); 6540 if (sti->sti_direct != 0) 6541 *direct = B_TRUE; 6542 6543 /* 6544 * Keep the original sp around so we can properly dispose of the 6545 * sonode when the socket is being closed. 6546 */ 6547 sti->sti_orig_sp = origsp; 6548 6549 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6550 so_alloc_addr(so, so->so_max_addr_len); 6551 6552 /* 6553 * If the application has done a SIOCSPGRP, make sure the 6554 * STREAM head is aware. This needs to take place before 6555 * the protocol start sending up messages. Otherwise we 6556 * might miss to generate SIGPOLL. 6557 * 6558 * It is possible that the application will receive duplicate 6559 * signals if some were already generated for either data or 6560 * connection indications. 6561 */ 6562 if (so->so_pgrp != 0) { 6563 if (so_set_events(so, so->so_vnode, cr) != 0) 6564 so->so_pgrp = 0; 6565 } 6566 6567 /* 6568 * Determine which queue to use. 6569 */ 6570 vp = SOTOV(so); 6571 stp = vp->v_stream; 6572 ASSERT(stp != NULL); 6573 q = stp->sd_wrq->q_next; 6574 6575 /* 6576 * Skip any modules that may have been auto pushed when the device 6577 * was opened 6578 */ 6579 while (q->q_next != NULL) 6580 q = q->q_next; 6581 *qp = _RD(q); 6582 6583 /* This is now a STREAMS sockets */ 6584 so->so_not_str = B_FALSE; 6585 6586 return (error); 6587 } 6588 6589 /* 6590 * Revert a TPI sonode. It is only allowed to revert the sonode during 6591 * the fallback process. 6592 */ 6593 void 6594 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6595 { 6596 vnode_t *vp = SOTOV(so); 6597 6598 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6599 SS_FALLBACK_PENDING); 6600 ASSERT(!SOCK_IS_NONSTR(so)); 6601 ASSERT(vp->v_stream != NULL); 6602 6603 strclean(vp); 6604 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6605 6606 /* 6607 * Restore the original sockparams. The caller is responsible for 6608 * dropping the ref to the new sp. 6609 */ 6610 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6611 6612 sotpi_info_fini(so); 6613 sotpi_info_destroy(so); 6614 6615 /* This is no longer a STREAMS sockets */ 6616 so->so_not_str = B_TRUE; 6617 } 6618 6619 void 6620 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6621 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6622 socklen_t faddrlen, short opts) 6623 { 6624 sotpi_info_t *sti = SOTOTPI(so); 6625 6626 so_proc_tcapability_ack(so, tcap); 6627 6628 so->so_options |= opts; 6629 6630 /* 6631 * Determine whether the foreign and local address are valid 6632 */ 6633 if (laddrlen != 0) { 6634 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6635 sti->sti_laddr_len = laddrlen; 6636 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6637 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6638 } 6639 6640 if (faddrlen != 0) { 6641 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6642 sti->sti_faddr_len = faddrlen; 6643 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6644 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6645 } 6646 6647 } 6648 6649 /* 6650 * Allocate enough space to cache the local and foreign addresses. 6651 */ 6652 void 6653 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6654 { 6655 sotpi_info_t *sti = SOTOTPI(so); 6656 6657 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6658 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6659 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6660 P2ROUNDUP(maxlen, KMEM_ALIGN); 6661 so->so_max_addr_len = sti->sti_laddr_maxlen; 6662 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6663 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6664 + sti->sti_laddr_maxlen); 6665 6666 if (so->so_family == AF_UNIX) { 6667 /* 6668 * Initialize AF_UNIX related fields. 6669 */ 6670 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6671 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6672 } 6673 } 6674 6675 6676 sotpi_info_t * 6677 sotpi_sototpi(struct sonode *so) 6678 { 6679 sotpi_info_t *sti; 6680 6681 ASSERT(so != NULL); 6682 6683 sti = (sotpi_info_t *)so->so_priv; 6684 6685 ASSERT(sti != NULL); 6686 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6687 6688 return (sti); 6689 } 6690 6691 static int 6692 i_sotpi_info_constructor(sotpi_info_t *sti) 6693 { 6694 sti->sti_magic = SOTPI_INFO_MAGIC; 6695 sti->sti_ack_mp = NULL; 6696 sti->sti_discon_ind_mp = NULL; 6697 sti->sti_ux_bound_vp = NULL; 6698 sti->sti_unbind_mp = NULL; 6699 6700 sti->sti_conn_ind_head = NULL; 6701 sti->sti_conn_ind_tail = NULL; 6702 6703 sti->sti_laddr_sa = NULL; 6704 sti->sti_faddr_sa = NULL; 6705 6706 sti->sti_nl7c_flags = 0; 6707 sti->sti_nl7c_uri = NULL; 6708 sti->sti_nl7c_rcv_mp = NULL; 6709 6710 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6711 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6712 6713 return (0); 6714 } 6715 6716 static void 6717 i_sotpi_info_destructor(sotpi_info_t *sti) 6718 { 6719 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6720 ASSERT(sti->sti_ack_mp == NULL); 6721 ASSERT(sti->sti_discon_ind_mp == NULL); 6722 ASSERT(sti->sti_ux_bound_vp == NULL); 6723 ASSERT(sti->sti_unbind_mp == NULL); 6724 6725 ASSERT(sti->sti_conn_ind_head == NULL); 6726 ASSERT(sti->sti_conn_ind_tail == NULL); 6727 6728 ASSERT(sti->sti_laddr_sa == NULL); 6729 ASSERT(sti->sti_faddr_sa == NULL); 6730 6731 ASSERT(sti->sti_nl7c_flags == 0); 6732 ASSERT(sti->sti_nl7c_uri == NULL); 6733 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6734 6735 mutex_destroy(&sti->sti_plumb_lock); 6736 cv_destroy(&sti->sti_ack_cv); 6737 } 6738 6739 /* 6740 * Creates and attaches TPI information to the given sonode 6741 */ 6742 static boolean_t 6743 sotpi_info_create(struct sonode *so, int kmflags) 6744 { 6745 sotpi_info_t *sti; 6746 6747 ASSERT(so->so_priv == NULL); 6748 6749 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6750 return (B_FALSE); 6751 6752 if (i_sotpi_info_constructor(sti) != 0) { 6753 kmem_free(sti, sizeof (*sti)); 6754 return (B_FALSE); 6755 } 6756 6757 so->so_priv = (void *)sti; 6758 return (B_TRUE); 6759 } 6760 6761 /* 6762 * Initializes the TPI information. 6763 */ 6764 static void 6765 sotpi_info_init(struct sonode *so) 6766 { 6767 struct vnode *vp = SOTOV(so); 6768 sotpi_info_t *sti = SOTOTPI(so); 6769 time_t now; 6770 6771 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6772 vp->v_rdev = sti->sti_dev; 6773 6774 sti->sti_orig_sp = NULL; 6775 6776 sti->sti_pushcnt = 0; 6777 6778 now = gethrestime_sec(); 6779 sti->sti_atime = now; 6780 sti->sti_mtime = now; 6781 sti->sti_ctime = now; 6782 6783 sti->sti_eaddr_mp = NULL; 6784 sti->sti_delayed_error = 0; 6785 6786 sti->sti_provinfo = NULL; 6787 6788 sti->sti_oobcnt = 0; 6789 sti->sti_oobsigcnt = 0; 6790 6791 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6792 6793 sti->sti_laddr_sa = 0; 6794 sti->sti_faddr_sa = 0; 6795 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6796 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6797 6798 sti->sti_laddr_valid = 0; 6799 sti->sti_faddr_valid = 0; 6800 sti->sti_faddr_noxlate = 0; 6801 6802 sti->sti_direct = 0; 6803 6804 ASSERT(sti->sti_ack_mp == NULL); 6805 ASSERT(sti->sti_ux_bound_vp == NULL); 6806 ASSERT(sti->sti_unbind_mp == NULL); 6807 6808 ASSERT(sti->sti_conn_ind_head == NULL); 6809 ASSERT(sti->sti_conn_ind_tail == NULL); 6810 } 6811 6812 /* 6813 * Given a sonode, grab the TPI info and free any data. 6814 */ 6815 static void 6816 sotpi_info_fini(struct sonode *so) 6817 { 6818 sotpi_info_t *sti = SOTOTPI(so); 6819 mblk_t *mp; 6820 6821 ASSERT(sti->sti_discon_ind_mp == NULL); 6822 6823 if ((mp = sti->sti_conn_ind_head) != NULL) { 6824 mblk_t *mp1; 6825 6826 while (mp) { 6827 mp1 = mp->b_next; 6828 mp->b_next = NULL; 6829 freemsg(mp); 6830 mp = mp1; 6831 } 6832 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6833 } 6834 6835 /* 6836 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6837 * indirect them. It also uses so_count as a validity test. 6838 */ 6839 mutex_enter(&so->so_lock); 6840 6841 if (sti->sti_laddr_sa) { 6842 ASSERT((caddr_t)sti->sti_faddr_sa == 6843 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6844 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6845 sti->sti_laddr_valid = 0; 6846 sti->sti_faddr_valid = 0; 6847 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6848 sti->sti_laddr_sa = NULL; 6849 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6850 sti->sti_faddr_sa = NULL; 6851 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6852 } 6853 6854 mutex_exit(&so->so_lock); 6855 6856 if ((mp = sti->sti_eaddr_mp) != NULL) { 6857 freemsg(mp); 6858 sti->sti_eaddr_mp = NULL; 6859 sti->sti_delayed_error = 0; 6860 } 6861 6862 if ((mp = sti->sti_ack_mp) != NULL) { 6863 freemsg(mp); 6864 sti->sti_ack_mp = NULL; 6865 } 6866 6867 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6868 sti->sti_nl7c_rcv_mp = NULL; 6869 freemsg(mp); 6870 } 6871 sti->sti_nl7c_rcv_rval = 0; 6872 if (sti->sti_nl7c_uri != NULL) { 6873 nl7c_urifree(so); 6874 /* urifree() cleared nl7c_uri */ 6875 } 6876 if (sti->sti_nl7c_flags) { 6877 sti->sti_nl7c_flags = 0; 6878 } 6879 6880 ASSERT(sti->sti_ux_bound_vp == NULL); 6881 if ((mp = sti->sti_unbind_mp) != NULL) { 6882 freemsg(mp); 6883 sti->sti_unbind_mp = NULL; 6884 } 6885 } 6886 6887 /* 6888 * Destroys the TPI information attached to a sonode. 6889 */ 6890 static void 6891 sotpi_info_destroy(struct sonode *so) 6892 { 6893 sotpi_info_t *sti = SOTOTPI(so); 6894 6895 i_sotpi_info_destructor(sti); 6896 kmem_free(sti, sizeof (*sti)); 6897 6898 so->so_priv = NULL; 6899 } 6900 6901 /* 6902 * Create the global sotpi socket module entry. It will never be freed. 6903 */ 6904 smod_info_t * 6905 sotpi_smod_create(void) 6906 { 6907 smod_info_t *smodp; 6908 6909 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6910 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6911 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6912 /* 6913 * Initialize the smod_refcnt to 1 so it will never be freed. 6914 */ 6915 smodp->smod_refcnt = 1; 6916 smodp->smod_uc_version = SOCK_UC_VERSION; 6917 smodp->smod_dc_version = SOCK_DC_VERSION; 6918 smodp->smod_sock_create_func = &sotpi_create; 6919 smodp->smod_sock_destroy_func = &sotpi_destroy; 6920 return (smodp); 6921 } 6922