1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/kmem_impl.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/open.h> 44 #include <sys/user.h> 45 #include <sys/termios.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/suntpi.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <sys/sodirect.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <inet/kssl/ksslapi.h> 85 86 #include <fs/sockfs/sockcommon.h> 87 #include <fs/sockfs/socktpi.h> 88 #include <fs/sockfs/socktpi_impl.h> 89 90 /* 91 * Possible failures when memory can't be allocated. The documented behavior: 92 * 93 * 5.5: 4.X: XNET: 94 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 95 * EINTR 96 * (4.X does not document EINTR but returns it) 97 * bind: ENOSR - ENOBUFS/ENOSR 98 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 99 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 100 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 101 * (4.X getpeername and getsockname do not fail in practice) 102 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 103 * listen: - - ENOBUFS 104 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 107 * EINTR 108 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 109 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 110 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 111 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 112 * 113 * Resolution. When allocation fails: 114 * recv: return EINTR 115 * send: return EINTR 116 * connect, accept: EINTR 117 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 118 * socket, socketpair: ENOBUFS 119 * getpeername, getsockname: sleep 120 * getsockopt, setsockopt: sleep 121 */ 122 123 #ifdef SOCK_TEST 124 /* 125 * Variables that make sockfs do something other than the standard TPI 126 * for the AF_INET transports. 127 * 128 * solisten_tpi_tcp: 129 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 130 * the transport is already bound. This is needed to avoid loosing the 131 * port number should listen() do a T_UNBIND_REQ followed by a 132 * O_T_BIND_REQ. 133 * 134 * soconnect_tpi_udp: 135 * UDP and ICMP can handle a T_CONN_REQ. 136 * This is needed to make the sequence of connect(), getsockname() 137 * return the local IP address used to send packets to the connected to 138 * destination. 139 * 140 * soconnect_tpi_tcp: 141 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 142 * Set this to non-zero to send TPI conformant messages to TCP in this 143 * respect. This is a performance optimization. 144 * 145 * soaccept_tpi_tcp: 146 * TCP can handle a T_CONN_REQ without the acceptor being bound. 147 * This is a performance optimization that has been picked up in XTI. 148 * 149 * soaccept_tpi_multioptions: 150 * When inheriting SOL_SOCKET options from the listener to the accepting 151 * socket send them as a single message for AF_INET{,6}. 152 */ 153 int solisten_tpi_tcp = 0; 154 int soconnect_tpi_udp = 0; 155 int soconnect_tpi_tcp = 0; 156 int soaccept_tpi_tcp = 0; 157 int soaccept_tpi_multioptions = 1; 158 #else /* SOCK_TEST */ 159 #define soconnect_tpi_tcp 0 160 #define soconnect_tpi_udp 0 161 #define solisten_tpi_tcp 0 162 #define soaccept_tpi_tcp 0 163 #define soaccept_tpi_multioptions 1 164 #endif /* SOCK_TEST */ 165 166 #ifdef SOCK_TEST 167 extern int do_useracc; 168 extern clock_t sock_test_timelimit; 169 #endif /* SOCK_TEST */ 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 /* Sockets acting as an in-kernel SSL proxy */ 203 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 204 strsigset_t *, strsigset_t *, strpollset_t *); 205 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 206 strsigset_t *, strsigset_t *, strpollset_t *); 207 208 static int sotpi_unbind(struct sonode *, int); 209 210 extern int sodput(sodirect_t *, mblk_t *); 211 extern void sodwakeup(sodirect_t *); 212 213 /* TPI sockfs sonode operations */ 214 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 215 int); 216 static int sotpi_accept(struct sonode *, int, struct cred *, 217 struct sonode **); 218 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 219 int, struct cred *); 220 static int sotpi_listen(struct sonode *, int, struct cred *); 221 static int sotpi_connect(struct sonode *, const struct sockaddr *, 222 socklen_t, int, int, struct cred *); 223 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 224 struct uio *, struct cred *); 225 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 226 struct uio *, struct cred *); 227 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 228 struct cred *, mblk_t **); 229 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 230 struct uio *, void *, t_uscalar_t, int); 231 static int sodgram_direct(struct sonode *, struct sockaddr *, 232 socklen_t, struct uio *, int); 233 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 234 socklen_t *, boolean_t, struct cred *); 235 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 236 socklen_t *, struct cred *); 237 static int sotpi_shutdown(struct sonode *, int, struct cred *); 238 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 239 socklen_t *, int, struct cred *); 240 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 241 socklen_t, struct cred *); 242 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 243 int32_t *); 244 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 245 struct cred *, int32_t *); 246 static int sotpi_poll(struct sonode *, short, int, short *, 247 struct pollhead **); 248 static int sotpi_close(struct sonode *, int, struct cred *); 249 250 static int i_sotpi_info_constructor(sotpi_info_t *); 251 static void i_sotpi_info_destructor(sotpi_info_t *); 252 253 sonodeops_t sotpi_sonodeops = { 254 sotpi_init, /* sop_init */ 255 sotpi_accept, /* sop_accept */ 256 sotpi_bind, /* sop_bind */ 257 sotpi_listen, /* sop_listen */ 258 sotpi_connect, /* sop_connect */ 259 sotpi_recvmsg, /* sop_recvmsg */ 260 sotpi_sendmsg, /* sop_sendmsg */ 261 sotpi_sendmblk, /* sop_sendmblk */ 262 sotpi_getpeername, /* sop_getpeername */ 263 sotpi_getsockname, /* sop_getsockname */ 264 sotpi_shutdown, /* sop_shutdown */ 265 sotpi_getsockopt, /* sop_getsockopt */ 266 sotpi_setsockopt, /* sop_setsockopt */ 267 sotpi_ioctl, /* sop_ioctl */ 268 sotpi_poll, /* sop_poll */ 269 sotpi_close, /* sop_close */ 270 }; 271 272 /* 273 * Return a TPI socket vnode. 274 * 275 * Note that sockets assume that the driver will clone (either itself 276 * or by using the clone driver) i.e. a socket() call will always 277 * result in a new vnode being created. 278 */ 279 280 /* 281 * Common create code for socket and accept. If tso is set the values 282 * from that node is used instead of issuing a T_INFO_REQ. 283 */ 284 285 /* ARGSUSED */ 286 static struct sonode * 287 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 288 int version, int sflags, int *errorp, cred_t *cr) 289 { 290 struct sonode *so; 291 kmem_cache_t *cp; 292 int sfamily = family; 293 294 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 295 296 if (family == AF_NCA) { 297 /* 298 * The request is for an NCA socket so for NL7C use the 299 * INET domain instead and mark NL7C_AF_NCA below. 300 */ 301 family = AF_INET; 302 /* 303 * NL7C is not supported in the non-global zone, 304 * we enforce this restriction here. 305 */ 306 if (getzoneid() != GLOBAL_ZONEID) { 307 *errorp = ENOTSUP; 308 return (NULL); 309 } 310 } 311 312 /* 313 * to be compatible with old tpi socket implementation ignore 314 * sleep flag (sflags) passed in 315 */ 316 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 317 so = kmem_cache_alloc(cp, KM_SLEEP); 318 if (so == NULL) { 319 *errorp = ENOMEM; 320 return (NULL); 321 } 322 323 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 324 sotpi_info_init(so); 325 326 if (sfamily == AF_NCA) { 327 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 328 } 329 330 if (version == SOV_DEFAULT) 331 version = so_default_version; 332 333 so->so_version = (short)version; 334 *errorp = 0; 335 336 return (so); 337 } 338 339 static void 340 sotpi_destroy(struct sonode *so) 341 { 342 kmem_cache_t *cp; 343 struct sockparams *origsp; 344 345 /* 346 * If there is a new dealloc function (ie. smod_destroy_func), 347 * then it should check the correctness of the ops. 348 */ 349 350 ASSERT(so->so_ops == &sotpi_sonodeops); 351 352 origsp = SOTOTPI(so)->sti_orig_sp; 353 354 sotpi_info_fini(so); 355 356 if (so->so_state & SS_FALLBACK_COMP) { 357 /* 358 * A fallback happend, which means that a sotpi_info_t struct 359 * was allocated (as opposed to being allocated from the TPI 360 * sonode cache. Therefore we explicitly free the struct 361 * here. 362 */ 363 sotpi_info_destroy(so); 364 ASSERT(origsp != NULL); 365 366 origsp->sp_smod_info->smod_sock_destroy_func(so); 367 SOCKPARAMS_DEC_REF(origsp); 368 } else { 369 sonode_fini(so); 370 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 371 socktpi_cache; 372 kmem_cache_free(cp, so); 373 } 374 } 375 376 /* ARGSUSED1 */ 377 int 378 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 379 { 380 major_t maj; 381 dev_t newdev; 382 struct vnode *vp; 383 int error = 0; 384 struct stdata *stp; 385 386 sotpi_info_t *sti = SOTOTPI(so); 387 388 dprint(1, ("sotpi_init()\n")); 389 390 /* 391 * over write the sleep flag passed in but that is ok 392 * as tpi socket does not honor sleep flag. 393 */ 394 flags |= FREAD|FWRITE; 395 396 /* 397 * Record in so_flag that it is a clone. 398 */ 399 if (getmajor(sti->sti_dev) == clone_major) 400 so->so_flag |= SOCLONE; 401 402 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 403 (so->so_family == AF_INET || so->so_family == AF_INET6) && 404 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 405 so->so_protocol == IPPROTO_IP)) { 406 /* Tell tcp or udp that it's talking to sockets */ 407 flags |= SO_SOCKSTR; 408 409 /* 410 * Here we indicate to socktpi_open() our attempt to 411 * make direct calls between sockfs and transport. 412 * The final decision is left to socktpi_open(). 413 */ 414 sti->sti_direct = 1; 415 416 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 417 if (so->so_type == SOCK_STREAM && tso != NULL) { 418 if (SOTOTPI(tso)->sti_direct) { 419 /* 420 * Inherit sti_direct from listener and pass 421 * SO_ACCEPTOR open flag to tcp, indicating 422 * that this is an accept fast-path instance. 423 */ 424 flags |= SO_ACCEPTOR; 425 } else { 426 /* 427 * sti_direct is not set on listener, meaning 428 * that the listener has been converted from 429 * a socket to a stream. Ensure that the 430 * acceptor inherits these settings. 431 */ 432 sti->sti_direct = 0; 433 flags &= ~SO_SOCKSTR; 434 } 435 } 436 } 437 438 /* 439 * Tell local transport that it is talking to sockets. 440 */ 441 if (so->so_family == AF_UNIX) { 442 flags |= SO_SOCKSTR; 443 } 444 445 vp = SOTOV(so); 446 newdev = vp->v_rdev; 447 maj = getmajor(newdev); 448 ASSERT(STREAMSTAB(maj)); 449 450 error = stropen(vp, &newdev, flags, cr); 451 452 stp = vp->v_stream; 453 if (error == 0) { 454 if (so->so_flag & SOCLONE) 455 ASSERT(newdev != vp->v_rdev); 456 mutex_enter(&so->so_lock); 457 sti->sti_dev = newdev; 458 vp->v_rdev = newdev; 459 mutex_exit(&so->so_lock); 460 461 if (stp->sd_flag & STRISTTY) { 462 /* 463 * this is a post SVR4 tty driver - a socket can not 464 * be a controlling terminal. Fail the open. 465 */ 466 (void) sotpi_close(so, flags, cr); 467 return (ENOTTY); /* XXX */ 468 } 469 470 ASSERT(stp->sd_wrq != NULL); 471 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 472 473 /* 474 * If caller is interested in doing direct function call 475 * interface to/from transport module, probe the module 476 * directly beneath the streamhead to see if it qualifies. 477 * 478 * We turn off the direct interface when qualifications fail. 479 * In the acceptor case, we simply turn off the sti_direct 480 * flag on the socket. We do the fallback after the accept 481 * has completed, before the new socket is returned to the 482 * application. 483 */ 484 if (sti->sti_direct) { 485 queue_t *tq = stp->sd_wrq->q_next; 486 487 /* 488 * sti_direct is currently supported and tested 489 * only for tcp/udp; this is the main reason to 490 * have the following assertions. 491 */ 492 ASSERT(so->so_family == AF_INET || 493 so->so_family == AF_INET6); 494 ASSERT(so->so_protocol == IPPROTO_UDP || 495 so->so_protocol == IPPROTO_TCP || 496 so->so_protocol == IPPROTO_IP); 497 ASSERT(so->so_type == SOCK_DGRAM || 498 so->so_type == SOCK_STREAM); 499 500 /* 501 * Abort direct call interface if the module directly 502 * underneath the stream head is not defined with the 503 * _D_DIRECT flag. This could happen in the tcp or 504 * udp case, when some other module is autopushed 505 * above it, or for some reasons the expected module 506 * isn't purely D_MP (which is the main requirement). 507 * 508 * Else, SS_DIRECT is valid. If the read-side Q has 509 * _QSODIRECT set then and uioasync is enabled then 510 * set SS_SODIRECT to enable sodirect. 511 */ 512 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 513 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 514 int rval; 515 516 /* Continue on without direct calls */ 517 sti->sti_direct = 0; 518 519 /* 520 * Cannot issue ioctl on fallback socket since 521 * there is no conn associated with the queue. 522 * The fallback downcall will notify the proto 523 * of the change. 524 */ 525 if (!(flags & SO_ACCEPTOR) && 526 !(flags & SO_FALLBACK)) { 527 if ((error = strioctl(vp, 528 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 529 cr, &rval)) != 0) { 530 (void) sotpi_close(so, flags, 531 cr); 532 return (error); 533 } 534 } 535 } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) && 536 uioasync.enabled) { 537 /* Enable sodirect */ 538 so->so_state |= SS_SODIRECT; 539 } 540 } 541 542 if (flags & SO_FALLBACK) { 543 /* 544 * The stream created does not have a conn. 545 * do stream set up after conn has been assigned 546 */ 547 return (error); 548 } 549 if (error = so_strinit(so, tso)) { 550 (void) sotpi_close(so, flags, cr); 551 return (error); 552 } 553 554 /* Wildcard */ 555 if (so->so_protocol != so->so_sockparams->sp_protocol) { 556 int protocol = so->so_protocol; 557 /* 558 * Issue SO_PROTOTYPE setsockopt. 559 */ 560 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 561 &protocol, (t_uscalar_t)sizeof (protocol), cr); 562 if (error != 0) { 563 (void) sotpi_close(so, flags, cr); 564 /* 565 * Setsockopt often fails with ENOPROTOOPT but 566 * socket() should fail with 567 * EPROTONOSUPPORT/EPROTOTYPE. 568 */ 569 return (EPROTONOSUPPORT); 570 } 571 } 572 573 } else { 574 /* 575 * While the same socket can not be reopened (unlike specfs) 576 * the stream head sets STREOPENFAIL when the autopush fails. 577 */ 578 if ((stp != NULL) && 579 (stp->sd_flag & STREOPENFAIL)) { 580 /* 581 * Open failed part way through. 582 */ 583 mutex_enter(&stp->sd_lock); 584 stp->sd_flag &= ~STREOPENFAIL; 585 mutex_exit(&stp->sd_lock); 586 (void) sotpi_close(so, flags, cr); 587 return (error); 588 /*NOTREACHED*/ 589 } 590 ASSERT(stp == NULL); 591 } 592 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 593 "sockfs open:maj %d vp %p so %p error %d", 594 maj, vp, so, error); 595 return (error); 596 } 597 598 /* 599 * Bind the socket to an unspecified address in sockfs only. 600 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 601 * required in all cases. 602 */ 603 static void 604 so_automatic_bind(struct sonode *so) 605 { 606 sotpi_info_t *sti = SOTOTPI(so); 607 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 608 609 ASSERT(MUTEX_HELD(&so->so_lock)); 610 ASSERT(!(so->so_state & SS_ISBOUND)); 611 ASSERT(sti->sti_unbind_mp); 612 613 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 614 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 615 sti->sti_laddr_sa->sa_family = so->so_family; 616 so->so_state |= SS_ISBOUND; 617 } 618 619 620 /* 621 * bind the socket. 622 * 623 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 624 * are passed in we allow rebinding. Note that for backwards compatibility 625 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 626 * Thus the rebinding code is currently not executed. 627 * 628 * The constraints for rebinding are: 629 * - it is a SOCK_DGRAM, or 630 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 631 * and no listen() has been done. 632 * This rebinding code was added based on some language in the XNET book 633 * about not returning EINVAL it the protocol allows rebinding. However, 634 * this language is not present in the Posix socket draft. Thus maybe the 635 * rebinding logic should be deleted from the source. 636 * 637 * A null "name" can be used to unbind the socket if: 638 * - it is a SOCK_DGRAM, or 639 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 640 * and no listen() has been done. 641 */ 642 /* ARGSUSED */ 643 static int 644 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 645 socklen_t namelen, int backlog, int flags, struct cred *cr) 646 { 647 struct T_bind_req bind_req; 648 struct T_bind_ack *bind_ack; 649 int error = 0; 650 mblk_t *mp; 651 void *addr; 652 t_uscalar_t addrlen; 653 int unbind_on_err = 1; 654 boolean_t clear_acceptconn_on_err = B_FALSE; 655 boolean_t restore_backlog_on_err = B_FALSE; 656 int save_so_backlog; 657 t_scalar_t PRIM_type = O_T_BIND_REQ; 658 boolean_t tcp_udp_xport; 659 void *nl7c = NULL; 660 sotpi_info_t *sti = SOTOTPI(so); 661 662 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 663 (void *)so, (void *)name, namelen, backlog, flags, 664 pr_state(so->so_state, so->so_mode))); 665 666 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 667 668 if (!(flags & _SOBIND_LOCK_HELD)) { 669 mutex_enter(&so->so_lock); 670 so_lock_single(so); /* Set SOLOCKED */ 671 } else { 672 ASSERT(MUTEX_HELD(&so->so_lock)); 673 ASSERT(so->so_flag & SOLOCKED); 674 } 675 676 /* 677 * Make sure that there is a preallocated unbind_req message 678 * before binding. This message allocated when the socket is 679 * created but it might be have been consumed. 680 */ 681 if (sti->sti_unbind_mp == NULL) { 682 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 683 /* NOTE: holding so_lock while sleeping */ 684 sti->sti_unbind_mp = 685 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 686 } 687 688 if (flags & _SOBIND_REBIND) { 689 /* 690 * Called from solisten after doing an sotpi_unbind() or 691 * potentially without the unbind (latter for AF_INET{,6}). 692 */ 693 ASSERT(name == NULL && namelen == 0); 694 695 if (so->so_family == AF_UNIX) { 696 ASSERT(sti->sti_ux_bound_vp); 697 addr = &sti->sti_ux_laddr; 698 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 699 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 700 "addr 0x%p, vp %p\n", 701 addrlen, 702 (void *)((struct so_ux_addr *)addr)->soua_vp, 703 (void *)sti->sti_ux_bound_vp)); 704 } else { 705 addr = sti->sti_laddr_sa; 706 addrlen = (t_uscalar_t)sti->sti_laddr_len; 707 } 708 } else if (flags & _SOBIND_UNSPEC) { 709 ASSERT(name == NULL && namelen == 0); 710 711 /* 712 * The caller checked SS_ISBOUND but not necessarily 713 * under so_lock 714 */ 715 if (so->so_state & SS_ISBOUND) { 716 /* No error */ 717 goto done; 718 } 719 720 /* Set an initial local address */ 721 switch (so->so_family) { 722 case AF_UNIX: 723 /* 724 * Use an address with same size as struct sockaddr 725 * just like BSD. 726 */ 727 sti->sti_laddr_len = 728 (socklen_t)sizeof (struct sockaddr); 729 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 730 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 731 sti->sti_laddr_sa->sa_family = so->so_family; 732 733 /* 734 * Pass down an address with the implicit bind 735 * magic number and the rest all zeros. 736 * The transport will return a unique address. 737 */ 738 sti->sti_ux_laddr.soua_vp = NULL; 739 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 740 addr = &sti->sti_ux_laddr; 741 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 742 break; 743 744 case AF_INET: 745 case AF_INET6: 746 /* 747 * An unspecified bind in TPI has a NULL address. 748 * Set the address in sockfs to have the sa_family. 749 */ 750 sti->sti_laddr_len = (so->so_family == AF_INET) ? 751 (socklen_t)sizeof (sin_t) : 752 (socklen_t)sizeof (sin6_t); 753 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 754 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 755 sti->sti_laddr_sa->sa_family = so->so_family; 756 addr = NULL; 757 addrlen = 0; 758 break; 759 760 default: 761 /* 762 * An unspecified bind in TPI has a NULL address. 763 * Set the address in sockfs to be zero length. 764 * 765 * Can not assume there is a sa_family for all 766 * protocol families. For example, AF_X25 does not 767 * have a family field. 768 */ 769 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 770 sti->sti_laddr_len = 0; /* XXX correct? */ 771 addr = NULL; 772 addrlen = 0; 773 break; 774 } 775 776 } else { 777 if (so->so_state & SS_ISBOUND) { 778 /* 779 * If it is ok to rebind the socket, first unbind 780 * with the transport. A rebind to the NULL address 781 * is interpreted as an unbind. 782 * Note that a bind to NULL in BSD does unbind the 783 * socket but it fails with EINVAL. 784 * Note that regular sockets set SOV_SOCKBSD i.e. 785 * _SOBIND_SOCKBSD gets set here hence no type of 786 * socket does currently allow rebinding. 787 * 788 * If the name is NULL just do an unbind. 789 */ 790 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 791 name != NULL) { 792 error = EINVAL; 793 unbind_on_err = 0; 794 eprintsoline(so, error); 795 goto done; 796 } 797 if ((so->so_mode & SM_CONNREQUIRED) && 798 (so->so_state & SS_CANTREBIND)) { 799 error = EINVAL; 800 unbind_on_err = 0; 801 eprintsoline(so, error); 802 goto done; 803 } 804 error = sotpi_unbind(so, 0); 805 if (error) { 806 eprintsoline(so, error); 807 goto done; 808 } 809 ASSERT(!(so->so_state & SS_ISBOUND)); 810 if (name == NULL) { 811 so->so_state &= 812 ~(SS_ISCONNECTED|SS_ISCONNECTING); 813 goto done; 814 } 815 } 816 817 /* X/Open requires this check */ 818 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 819 if (xnet_check_print) { 820 printf("sockfs: X/Open bind state check " 821 "caused EINVAL\n"); 822 } 823 error = EINVAL; 824 goto done; 825 } 826 827 switch (so->so_family) { 828 case AF_UNIX: 829 /* 830 * All AF_UNIX addresses are nul terminated 831 * when copied (copyin_name) in so the minimum 832 * length is 3 bytes. 833 */ 834 if (name == NULL || 835 (ssize_t)namelen <= sizeof (short) + 1) { 836 error = EISDIR; 837 eprintsoline(so, error); 838 goto done; 839 } 840 /* 841 * Verify so_family matches the bound family. 842 * BSD does not check this for AF_UNIX resulting 843 * in funny mknods. 844 */ 845 if (name->sa_family != so->so_family) { 846 error = EAFNOSUPPORT; 847 goto done; 848 } 849 break; 850 case AF_INET: 851 if (name == NULL) { 852 error = EINVAL; 853 eprintsoline(so, error); 854 goto done; 855 } 856 if ((size_t)namelen != sizeof (sin_t)) { 857 error = name->sa_family != so->so_family ? 858 EAFNOSUPPORT : EINVAL; 859 eprintsoline(so, error); 860 goto done; 861 } 862 if ((flags & _SOBIND_XPG4_2) && 863 (name->sa_family != so->so_family)) { 864 /* 865 * This check has to be made for X/Open 866 * sockets however application failures have 867 * been observed when it is applied to 868 * all sockets. 869 */ 870 error = EAFNOSUPPORT; 871 eprintsoline(so, error); 872 goto done; 873 } 874 /* 875 * Force a zero sa_family to match so_family. 876 * 877 * Some programs like inetd(1M) don't set the 878 * family field. Other programs leave 879 * sin_family set to garbage - SunOS 4.X does 880 * not check the family field on a bind. 881 * We use the family field that 882 * was passed in to the socket() call. 883 */ 884 name->sa_family = so->so_family; 885 break; 886 887 case AF_INET6: { 888 #ifdef DEBUG 889 sin6_t *sin6 = (sin6_t *)name; 890 #endif /* DEBUG */ 891 892 if (name == NULL) { 893 error = EINVAL; 894 eprintsoline(so, error); 895 goto done; 896 } 897 if ((size_t)namelen != sizeof (sin6_t)) { 898 error = name->sa_family != so->so_family ? 899 EAFNOSUPPORT : EINVAL; 900 eprintsoline(so, error); 901 goto done; 902 } 903 if (name->sa_family != so->so_family) { 904 /* 905 * With IPv6 we require the family to match 906 * unlike in IPv4. 907 */ 908 error = EAFNOSUPPORT; 909 eprintsoline(so, error); 910 goto done; 911 } 912 #ifdef DEBUG 913 /* 914 * Verify that apps don't forget to clear 915 * sin6_scope_id etc 916 */ 917 if (sin6->sin6_scope_id != 0 && 918 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 919 zcmn_err(getzoneid(), CE_WARN, 920 "bind with uninitialized sin6_scope_id " 921 "(%d) on socket. Pid = %d\n", 922 (int)sin6->sin6_scope_id, 923 (int)curproc->p_pid); 924 } 925 if (sin6->__sin6_src_id != 0) { 926 zcmn_err(getzoneid(), CE_WARN, 927 "bind with uninitialized __sin6_src_id " 928 "(%d) on socket. Pid = %d\n", 929 (int)sin6->__sin6_src_id, 930 (int)curproc->p_pid); 931 } 932 #endif /* DEBUG */ 933 break; 934 } 935 default: 936 /* 937 * Don't do any length or sa_family check to allow 938 * non-sockaddr style addresses. 939 */ 940 if (name == NULL) { 941 error = EINVAL; 942 eprintsoline(so, error); 943 goto done; 944 } 945 break; 946 } 947 948 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 949 error = ENAMETOOLONG; 950 eprintsoline(so, error); 951 goto done; 952 } 953 /* 954 * Save local address. 955 */ 956 sti->sti_laddr_len = (socklen_t)namelen; 957 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 958 bcopy(name, sti->sti_laddr_sa, namelen); 959 960 addr = sti->sti_laddr_sa; 961 addrlen = (t_uscalar_t)sti->sti_laddr_len; 962 switch (so->so_family) { 963 case AF_INET6: 964 case AF_INET: 965 break; 966 case AF_UNIX: { 967 struct sockaddr_un *soun = 968 (struct sockaddr_un *)sti->sti_laddr_sa; 969 struct vnode *vp, *rvp; 970 struct vattr vattr; 971 972 ASSERT(sti->sti_ux_bound_vp == NULL); 973 /* 974 * Create vnode for the specified path name. 975 * Keep vnode held with a reference in sti_ux_bound_vp. 976 * Use the vnode pointer as the address used in the 977 * bind with the transport. 978 * 979 * Use the same mode as in BSD. In particular this does 980 * not observe the umask. 981 */ 982 /* MAXPATHLEN + soun_family + nul termination */ 983 if (sti->sti_laddr_len > 984 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 985 error = ENAMETOOLONG; 986 eprintsoline(so, error); 987 goto done; 988 } 989 vattr.va_type = VSOCK; 990 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 991 vattr.va_mask = AT_TYPE|AT_MODE; 992 /* NOTE: holding so_lock */ 993 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 994 EXCL, 0, &vp, CRMKNOD, 0, 0); 995 if (error) { 996 if (error == EEXIST) 997 error = EADDRINUSE; 998 eprintsoline(so, error); 999 goto done; 1000 } 1001 /* 1002 * Establish pointer from the underlying filesystem 1003 * vnode to the socket node. 1004 * sti_ux_bound_vp and v_stream->sd_vnode form the 1005 * cross-linkage between the underlying filesystem 1006 * node and the socket node. 1007 */ 1008 1009 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 1010 VN_HOLD(rvp); 1011 VN_RELE(vp); 1012 vp = rvp; 1013 } 1014 1015 ASSERT(SOTOV(so)->v_stream); 1016 mutex_enter(&vp->v_lock); 1017 vp->v_stream = SOTOV(so)->v_stream; 1018 sti->sti_ux_bound_vp = vp; 1019 mutex_exit(&vp->v_lock); 1020 1021 /* 1022 * Use the vnode pointer value as a unique address 1023 * (together with the magic number to avoid conflicts 1024 * with implicit binds) in the transport provider. 1025 */ 1026 sti->sti_ux_laddr.soua_vp = 1027 (void *)sti->sti_ux_bound_vp; 1028 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1029 addr = &sti->sti_ux_laddr; 1030 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1031 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1032 addrlen, 1033 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1034 break; 1035 } 1036 } /* end switch (so->so_family) */ 1037 } 1038 1039 /* 1040 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1041 * the transport can start passing up T_CONN_IND messages 1042 * as soon as it receives the bind req and strsock_proto() 1043 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1044 */ 1045 if (flags & _SOBIND_LISTEN) { 1046 if ((so->so_state & SS_ACCEPTCONN) == 0) 1047 clear_acceptconn_on_err = B_TRUE; 1048 save_so_backlog = so->so_backlog; 1049 restore_backlog_on_err = B_TRUE; 1050 so->so_state |= SS_ACCEPTCONN; 1051 so->so_backlog = backlog; 1052 } 1053 1054 /* 1055 * If NL7C addr(s) have been configured check for addr/port match, 1056 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1057 * 1058 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1059 * family sockets only. If match mark as such. 1060 */ 1061 if (nl7c_enabled && ((addr != NULL && 1062 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1063 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1064 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1065 /* 1066 * NL7C is not supported in non-global zones, 1067 * we enforce this restriction here. 1068 */ 1069 if (so->so_zoneid == GLOBAL_ZONEID) { 1070 /* An NL7C socket, mark it */ 1071 sti->sti_nl7c_flags |= NL7C_ENABLED; 1072 if (nl7c == NULL) { 1073 /* 1074 * Was an AF_NCA bind() so add it to the 1075 * addr list for reporting purposes. 1076 */ 1077 nl7c = nl7c_add_addr(addr, addrlen); 1078 } 1079 } else 1080 nl7c = NULL; 1081 } 1082 1083 /* 1084 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1085 * for other transports we will send in a O_T_BIND_REQ. 1086 */ 1087 if (tcp_udp_xport && 1088 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1089 PRIM_type = T_BIND_REQ; 1090 1091 bind_req.PRIM_type = PRIM_type; 1092 bind_req.ADDR_length = addrlen; 1093 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1094 bind_req.CONIND_number = backlog; 1095 /* NOTE: holding so_lock while sleeping */ 1096 mp = soallocproto2(&bind_req, sizeof (bind_req), 1097 addr, addrlen, 0, _ALLOC_SLEEP); 1098 sti->sti_laddr_valid = 0; 1099 1100 /* Done using sti_laddr_sa - can drop the lock */ 1101 mutex_exit(&so->so_lock); 1102 1103 /* 1104 * Intercept the bind_req message here to check if this <address/port> 1105 * was configured as an SSL proxy server, or if another endpoint was 1106 * already configured to act as a proxy for us. 1107 * 1108 * Note, only if NL7C not enabled for this socket. 1109 */ 1110 if (nl7c == NULL && 1111 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1112 so->so_type == SOCK_STREAM) { 1113 1114 if (sti->sti_kssl_ent != NULL) { 1115 kssl_release_ent(sti->sti_kssl_ent, so, 1116 sti->sti_kssl_type); 1117 sti->sti_kssl_ent = NULL; 1118 } 1119 1120 sti->sti_kssl_type = kssl_check_proxy(mp, so, 1121 &sti->sti_kssl_ent); 1122 switch (sti->sti_kssl_type) { 1123 case KSSL_NO_PROXY: 1124 break; 1125 1126 case KSSL_HAS_PROXY: 1127 mutex_enter(&so->so_lock); 1128 goto skip_transport; 1129 1130 case KSSL_IS_PROXY: 1131 break; 1132 } 1133 } 1134 1135 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1136 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1137 if (error) { 1138 eprintsoline(so, error); 1139 mutex_enter(&so->so_lock); 1140 goto done; 1141 } 1142 1143 mutex_enter(&so->so_lock); 1144 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1145 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1146 if (error) { 1147 eprintsoline(so, error); 1148 goto done; 1149 } 1150 skip_transport: 1151 ASSERT(mp); 1152 /* 1153 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1154 * strsock_proto while the lock was dropped above, the bind 1155 * is allowed to complete. 1156 */ 1157 1158 /* Mark as bound. This will be undone if we detect errors below. */ 1159 if (flags & _SOBIND_NOXLATE) { 1160 ASSERT(so->so_family == AF_UNIX); 1161 sti->sti_faddr_noxlate = 1; 1162 } 1163 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1164 so->so_state |= SS_ISBOUND; 1165 ASSERT(sti->sti_unbind_mp); 1166 1167 /* note that we've already set SS_ACCEPTCONN above */ 1168 1169 /* 1170 * Recompute addrlen - an unspecied bind sent down an 1171 * address of length zero but we expect the appropriate length 1172 * in return. 1173 */ 1174 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1175 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1176 1177 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1178 /* 1179 * The alignment restriction is really too strict but 1180 * we want enough alignment to inspect the fields of 1181 * a sockaddr_in. 1182 */ 1183 addr = sogetoff(mp, bind_ack->ADDR_offset, 1184 bind_ack->ADDR_length, 1185 __TPI_ALIGN_SIZE); 1186 if (addr == NULL) { 1187 freemsg(mp); 1188 error = EPROTO; 1189 eprintsoline(so, error); 1190 goto done; 1191 } 1192 if (!(flags & _SOBIND_UNSPEC)) { 1193 /* 1194 * Verify that the transport didn't return something we 1195 * did not want e.g. an address other than what we asked for. 1196 * 1197 * NOTE: These checks would go away if/when we switch to 1198 * using the new TPI (in which the transport would fail 1199 * the request instead of assigning a different address). 1200 * 1201 * NOTE2: For protocols that we don't know (i.e. any 1202 * other than AF_INET6, AF_INET and AF_UNIX), we 1203 * cannot know if the transport should be expected to 1204 * return the same address as that requested. 1205 * 1206 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1207 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1208 * 1209 * For example, in the case of netatalk it may be 1210 * inappropriate for the transport to return the 1211 * requested address (as it may have allocated a local 1212 * port number in behaviour similar to that of an 1213 * AF_INET bind request with a port number of zero). 1214 * 1215 * Given the definition of O_T_BIND_REQ, where the 1216 * transport may bind to an address other than the 1217 * requested address, it's not possible to determine 1218 * whether a returned address that differs from the 1219 * requested address is a reason to fail (because the 1220 * requested address was not available) or succeed 1221 * (because the transport allocated an appropriate 1222 * address and/or port). 1223 * 1224 * sockfs currently requires that the transport return 1225 * the requested address in the T_BIND_ACK, unless 1226 * there is code here to allow for any discrepancy. 1227 * Such code exists for AF_INET and AF_INET6. 1228 * 1229 * Netatalk chooses to return the requested address 1230 * rather than the (correct) allocated address. This 1231 * means that netatalk violates the TPI specification 1232 * (and would not function correctly if used from a 1233 * TLI application), but it does mean that it works 1234 * with sockfs. 1235 * 1236 * As noted above, using the newer XTI bind primitive 1237 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1238 * allow sockfs to be more sure about whether or not 1239 * the bind request had succeeded (as transports are 1240 * not permitted to bind to a different address than 1241 * that requested - they must return failure). 1242 * Unfortunately, support for T_BIND_REQ may not be 1243 * present in all transport implementations (netatalk, 1244 * for example, doesn't have it), making the 1245 * transition difficult. 1246 */ 1247 if (bind_ack->ADDR_length != addrlen) { 1248 /* Assumes that the requested address was in use */ 1249 freemsg(mp); 1250 error = EADDRINUSE; 1251 eprintsoline(so, error); 1252 goto done; 1253 } 1254 1255 switch (so->so_family) { 1256 case AF_INET6: 1257 case AF_INET: { 1258 sin_t *rname, *aname; 1259 1260 rname = (sin_t *)addr; 1261 aname = (sin_t *)sti->sti_laddr_sa; 1262 1263 /* 1264 * Take advantage of the alignment 1265 * of sin_port and sin6_port which fall 1266 * in the same place in their data structures. 1267 * Just use sin_port for either address family. 1268 * 1269 * This may become a problem if (heaven forbid) 1270 * there's a separate ipv6port_reserved... :-P 1271 * 1272 * Binding to port 0 has the semantics of letting 1273 * the transport bind to any port. 1274 * 1275 * If the transport is TCP or UDP since we had sent 1276 * a T_BIND_REQ we would not get a port other than 1277 * what we asked for. 1278 */ 1279 if (tcp_udp_xport) { 1280 /* 1281 * Pick up the new port number if we bound to 1282 * port 0. 1283 */ 1284 if (aname->sin_port == 0) 1285 aname->sin_port = rname->sin_port; 1286 sti->sti_laddr_valid = 1; 1287 break; 1288 } 1289 if (aname->sin_port != 0 && 1290 aname->sin_port != rname->sin_port) { 1291 freemsg(mp); 1292 error = EADDRINUSE; 1293 eprintsoline(so, error); 1294 goto done; 1295 } 1296 /* 1297 * Pick up the new port number if we bound to port 0. 1298 */ 1299 aname->sin_port = rname->sin_port; 1300 1301 /* 1302 * Unfortunately, addresses aren't _quite_ the same. 1303 */ 1304 if (so->so_family == AF_INET) { 1305 if (aname->sin_addr.s_addr != 1306 rname->sin_addr.s_addr) { 1307 freemsg(mp); 1308 error = EADDRNOTAVAIL; 1309 eprintsoline(so, error); 1310 goto done; 1311 } 1312 } else { 1313 sin6_t *rname6 = (sin6_t *)rname; 1314 sin6_t *aname6 = (sin6_t *)aname; 1315 1316 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1317 &rname6->sin6_addr)) { 1318 freemsg(mp); 1319 error = EADDRNOTAVAIL; 1320 eprintsoline(so, error); 1321 goto done; 1322 } 1323 } 1324 break; 1325 } 1326 case AF_UNIX: 1327 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1328 freemsg(mp); 1329 error = EADDRINUSE; 1330 eprintsoline(so, error); 1331 eprintso(so, 1332 ("addrlen %d, addr 0x%x, vp %p\n", 1333 addrlen, *((int *)addr), 1334 (void *)sti->sti_ux_bound_vp)); 1335 goto done; 1336 } 1337 sti->sti_laddr_valid = 1; 1338 break; 1339 default: 1340 /* 1341 * NOTE: This assumes that addresses can be 1342 * byte-compared for equivalence. 1343 */ 1344 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1345 freemsg(mp); 1346 error = EADDRINUSE; 1347 eprintsoline(so, error); 1348 goto done; 1349 } 1350 /* 1351 * Don't mark sti_laddr_valid, as we cannot be 1352 * sure that the returned address is the real 1353 * bound address when talking to an unknown 1354 * transport. 1355 */ 1356 break; 1357 } 1358 } else { 1359 /* 1360 * Save for returned address for getsockname. 1361 * Needed for unspecific bind unless transport supports 1362 * the TI_GETMYNAME ioctl. 1363 * Do this for AF_INET{,6} even though they do, as 1364 * caching info here is much better performance than 1365 * a TPI/STREAMS trip to the transport for getsockname. 1366 * Any which can't for some reason _must_ _not_ set 1367 * sti_laddr_valid here for the caching version of 1368 * getsockname to not break; 1369 */ 1370 switch (so->so_family) { 1371 case AF_UNIX: 1372 /* 1373 * Record the address bound with the transport 1374 * for use by socketpair. 1375 */ 1376 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1377 sti->sti_laddr_valid = 1; 1378 break; 1379 case AF_INET: 1380 case AF_INET6: 1381 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1382 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1383 sti->sti_laddr_valid = 1; 1384 break; 1385 default: 1386 /* 1387 * Don't mark sti_laddr_valid, as we cannot be 1388 * sure that the returned address is the real 1389 * bound address when talking to an unknown 1390 * transport. 1391 */ 1392 break; 1393 } 1394 } 1395 1396 if (nl7c != NULL) { 1397 /* Register listen()er sonode pointer with NL7C */ 1398 nl7c_listener_addr(nl7c, so); 1399 } 1400 1401 freemsg(mp); 1402 1403 done: 1404 if (error) { 1405 /* reset state & backlog to values held on entry */ 1406 if (clear_acceptconn_on_err == B_TRUE) 1407 so->so_state &= ~SS_ACCEPTCONN; 1408 if (restore_backlog_on_err == B_TRUE) 1409 so->so_backlog = save_so_backlog; 1410 1411 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1412 int err; 1413 1414 err = sotpi_unbind(so, 0); 1415 /* LINTED - statement has no consequent: if */ 1416 if (err) { 1417 eprintsoline(so, error); 1418 } else { 1419 ASSERT(!(so->so_state & SS_ISBOUND)); 1420 } 1421 } 1422 } 1423 if (!(flags & _SOBIND_LOCK_HELD)) { 1424 so_unlock_single(so, SOLOCKED); 1425 mutex_exit(&so->so_lock); 1426 } else { 1427 ASSERT(MUTEX_HELD(&so->so_lock)); 1428 ASSERT(so->so_flag & SOLOCKED); 1429 } 1430 return (error); 1431 } 1432 1433 /* bind the socket */ 1434 static int 1435 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1436 int flags, struct cred *cr) 1437 { 1438 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1439 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1440 1441 flags &= ~_SOBIND_SOCKETPAIR; 1442 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1443 } 1444 1445 /* 1446 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1447 * address, or when listen needs to unbind and bind. 1448 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1449 * so that a sobind can pick them up. 1450 */ 1451 static int 1452 sotpi_unbind(struct sonode *so, int flags) 1453 { 1454 struct T_unbind_req unbind_req; 1455 int error = 0; 1456 mblk_t *mp; 1457 sotpi_info_t *sti = SOTOTPI(so); 1458 1459 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1460 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1461 1462 ASSERT(MUTEX_HELD(&so->so_lock)); 1463 ASSERT(so->so_flag & SOLOCKED); 1464 1465 if (!(so->so_state & SS_ISBOUND)) { 1466 error = EINVAL; 1467 eprintsoline(so, error); 1468 goto done; 1469 } 1470 1471 mutex_exit(&so->so_lock); 1472 1473 /* 1474 * Flush the read and write side (except stream head read queue) 1475 * and send down T_UNBIND_REQ. 1476 */ 1477 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1478 1479 unbind_req.PRIM_type = T_UNBIND_REQ; 1480 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1481 0, _ALLOC_SLEEP); 1482 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1483 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1484 mutex_enter(&so->so_lock); 1485 if (error) { 1486 eprintsoline(so, error); 1487 goto done; 1488 } 1489 1490 error = sowaitokack(so, T_UNBIND_REQ); 1491 if (error) { 1492 eprintsoline(so, error); 1493 goto done; 1494 } 1495 1496 /* 1497 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1498 * strsock_proto while the lock was dropped above, the unbind 1499 * is allowed to complete. 1500 */ 1501 if (!(flags & _SOUNBIND_REBIND)) { 1502 /* 1503 * Clear out bound address. 1504 */ 1505 vnode_t *vp; 1506 1507 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1508 1509 /* Undo any SSL proxy setup */ 1510 if ((so->so_family == AF_INET || 1511 so->so_family == AF_INET6) && 1512 (so->so_type == SOCK_STREAM) && 1513 (sti->sti_kssl_ent != NULL)) { 1514 kssl_release_ent(sti->sti_kssl_ent, so, 1515 sti->sti_kssl_type); 1516 sti->sti_kssl_ent = NULL; 1517 sti->sti_kssl_type = KSSL_NO_PROXY; 1518 } 1519 sti->sti_ux_bound_vp = NULL; 1520 vn_rele_stream(vp); 1521 } 1522 /* Clear out address */ 1523 sti->sti_laddr_len = 0; 1524 } 1525 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1526 sti->sti_laddr_valid = 0; 1527 1528 done: 1529 1530 /* If the caller held the lock don't release it here */ 1531 ASSERT(MUTEX_HELD(&so->so_lock)); 1532 ASSERT(so->so_flag & SOLOCKED); 1533 1534 return (error); 1535 } 1536 1537 /* 1538 * listen on the socket. 1539 * For TPI conforming transports this has to first unbind with the transport 1540 * and then bind again using the new backlog. 1541 */ 1542 /* ARGSUSED */ 1543 int 1544 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1545 { 1546 int error = 0; 1547 sotpi_info_t *sti = SOTOTPI(so); 1548 1549 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1550 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1551 1552 if (sti->sti_serv_type == T_CLTS) 1553 return (EOPNOTSUPP); 1554 1555 /* 1556 * If the socket is ready to accept connections already, then 1557 * return without doing anything. This avoids a problem where 1558 * a second listen() call fails if a connection is pending and 1559 * leaves the socket unbound. Only when we are not unbinding 1560 * with the transport can we safely increase the backlog. 1561 */ 1562 if (so->so_state & SS_ACCEPTCONN && 1563 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1564 /*CONSTCOND*/ 1565 !solisten_tpi_tcp)) 1566 return (0); 1567 1568 if (so->so_state & SS_ISCONNECTED) 1569 return (EINVAL); 1570 1571 mutex_enter(&so->so_lock); 1572 so_lock_single(so); /* Set SOLOCKED */ 1573 1574 /* 1575 * If the listen doesn't change the backlog we do nothing. 1576 * This avoids an EPROTO error from the transport. 1577 */ 1578 if ((so->so_state & SS_ACCEPTCONN) && 1579 so->so_backlog == backlog) 1580 goto done; 1581 1582 if (!(so->so_state & SS_ISBOUND)) { 1583 /* 1584 * Must have been explicitly bound in the UNIX domain. 1585 */ 1586 if (so->so_family == AF_UNIX) { 1587 error = EINVAL; 1588 goto done; 1589 } 1590 error = sotpi_bindlisten(so, NULL, 0, backlog, 1591 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1592 } else if (backlog > 0) { 1593 /* 1594 * AF_INET{,6} hack to avoid losing the port. 1595 * Assumes that all AF_INET{,6} transports can handle a 1596 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1597 * has already bound thus it is possible to avoid the unbind. 1598 */ 1599 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1600 /*CONSTCOND*/ 1601 !solisten_tpi_tcp)) { 1602 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1603 if (error) 1604 goto done; 1605 } 1606 error = sotpi_bindlisten(so, NULL, 0, backlog, 1607 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1608 } else { 1609 so->so_state |= SS_ACCEPTCONN; 1610 so->so_backlog = backlog; 1611 } 1612 if (error) 1613 goto done; 1614 ASSERT(so->so_state & SS_ACCEPTCONN); 1615 done: 1616 so_unlock_single(so, SOLOCKED); 1617 mutex_exit(&so->so_lock); 1618 return (error); 1619 } 1620 1621 /* 1622 * Disconnect either a specified seqno or all (-1). 1623 * The former is used on listening sockets only. 1624 * 1625 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1626 * the current use of sodisconnect(seqno == -1) is only for shutdown 1627 * so there is no point (and potentially incorrect) to unbind. 1628 */ 1629 static int 1630 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1631 { 1632 struct T_discon_req discon_req; 1633 int error = 0; 1634 mblk_t *mp; 1635 1636 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1637 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1638 1639 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1640 mutex_enter(&so->so_lock); 1641 so_lock_single(so); /* Set SOLOCKED */ 1642 } else { 1643 ASSERT(MUTEX_HELD(&so->so_lock)); 1644 ASSERT(so->so_flag & SOLOCKED); 1645 } 1646 1647 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1648 error = EINVAL; 1649 eprintsoline(so, error); 1650 goto done; 1651 } 1652 1653 mutex_exit(&so->so_lock); 1654 /* 1655 * Flush the write side (unless this is a listener) 1656 * and then send down a T_DISCON_REQ. 1657 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1658 * and other messages.) 1659 */ 1660 if (!(so->so_state & SS_ACCEPTCONN)) 1661 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1662 1663 discon_req.PRIM_type = T_DISCON_REQ; 1664 discon_req.SEQ_number = seqno; 1665 mp = soallocproto1(&discon_req, sizeof (discon_req), 1666 0, _ALLOC_SLEEP); 1667 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1668 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1669 mutex_enter(&so->so_lock); 1670 if (error) { 1671 eprintsoline(so, error); 1672 goto done; 1673 } 1674 1675 error = sowaitokack(so, T_DISCON_REQ); 1676 if (error) { 1677 eprintsoline(so, error); 1678 goto done; 1679 } 1680 /* 1681 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1682 * strsock_proto while the lock was dropped above, the disconnect 1683 * is allowed to complete. However, it is not possible to 1684 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1685 */ 1686 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1687 SOTOTPI(so)->sti_laddr_valid = 0; 1688 SOTOTPI(so)->sti_faddr_valid = 0; 1689 done: 1690 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1691 so_unlock_single(so, SOLOCKED); 1692 mutex_exit(&so->so_lock); 1693 } else { 1694 /* If the caller held the lock don't release it here */ 1695 ASSERT(MUTEX_HELD(&so->so_lock)); 1696 ASSERT(so->so_flag & SOLOCKED); 1697 } 1698 return (error); 1699 } 1700 1701 /* ARGSUSED */ 1702 int 1703 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1704 struct sonode **nsop) 1705 { 1706 struct T_conn_ind *conn_ind; 1707 struct T_conn_res *conn_res; 1708 int error = 0; 1709 mblk_t *mp, *ctxmp, *ack_mp; 1710 struct sonode *nso; 1711 vnode_t *nvp; 1712 void *src; 1713 t_uscalar_t srclen; 1714 void *opt; 1715 t_uscalar_t optlen; 1716 t_scalar_t PRIM_type; 1717 t_scalar_t SEQ_number; 1718 size_t sinlen; 1719 sotpi_info_t *sti = SOTOTPI(so); 1720 sotpi_info_t *nsti; 1721 1722 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1723 (void *)so, fflag, (void *)nsop, 1724 pr_state(so->so_state, so->so_mode))); 1725 1726 /* 1727 * Defer single-threading the accepting socket until 1728 * the T_CONN_IND has been received and parsed and the 1729 * new sonode has been opened. 1730 */ 1731 1732 /* Check that we are not already connected */ 1733 if ((so->so_state & SS_ACCEPTCONN) == 0) 1734 goto conn_bad; 1735 again: 1736 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1737 goto e_bad; 1738 1739 ASSERT(mp != NULL); 1740 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1741 ctxmp = mp->b_cont; 1742 1743 /* 1744 * Save SEQ_number for error paths. 1745 */ 1746 SEQ_number = conn_ind->SEQ_number; 1747 1748 srclen = conn_ind->SRC_length; 1749 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1750 if (src == NULL) { 1751 error = EPROTO; 1752 freemsg(mp); 1753 eprintsoline(so, error); 1754 goto disconnect_unlocked; 1755 } 1756 optlen = conn_ind->OPT_length; 1757 switch (so->so_family) { 1758 case AF_INET: 1759 case AF_INET6: 1760 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1761 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1762 &opt, conn_ind->OPT_length); 1763 } else { 1764 /* 1765 * The transport (in this case TCP) hasn't sent up 1766 * a pointer to an instance for the accept fast-path. 1767 * Disable fast-path completely because the call to 1768 * sotpi_create() below would otherwise create an 1769 * incomplete TCP instance, which would lead to 1770 * problems when sockfs sends a normal T_CONN_RES 1771 * message down the new stream. 1772 */ 1773 if (sti->sti_direct) { 1774 int rval; 1775 /* 1776 * For consistency we inform tcp to disable 1777 * direct interface on the listener, though 1778 * we can certainly live without doing this 1779 * because no data will ever travel upstream 1780 * on the listening socket. 1781 */ 1782 sti->sti_direct = 0; 1783 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1784 0, 0, K_TO_K, CRED(), &rval); 1785 } 1786 opt = NULL; 1787 optlen = 0; 1788 } 1789 break; 1790 case AF_UNIX: 1791 default: 1792 if (optlen != 0) { 1793 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1794 __TPI_ALIGN_SIZE); 1795 if (opt == NULL) { 1796 error = EPROTO; 1797 freemsg(mp); 1798 eprintsoline(so, error); 1799 goto disconnect_unlocked; 1800 } 1801 } 1802 if (so->so_family == AF_UNIX) { 1803 if (!sti->sti_faddr_noxlate) { 1804 src = NULL; 1805 srclen = 0; 1806 } 1807 /* Extract src address from options */ 1808 if (optlen != 0) 1809 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1810 } 1811 break; 1812 } 1813 1814 /* 1815 * Create the new socket. 1816 */ 1817 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1818 if (nso == NULL) { 1819 ASSERT(error != 0); 1820 /* 1821 * Accept can not fail with ENOBUFS. sotpi_create 1822 * sleeps waiting for memory until a signal is caught 1823 * so return EINTR. 1824 */ 1825 freemsg(mp); 1826 if (error == ENOBUFS) 1827 error = EINTR; 1828 goto e_disc_unl; 1829 } 1830 nvp = SOTOV(nso); 1831 nsti = SOTOTPI(nso); 1832 1833 /* 1834 * If the transport sent up an SSL connection context, then attach 1835 * it the new socket, and set the (sd_wputdatafunc)() and 1836 * (sd_rputdatafunc)() stream head hooks to intercept and process 1837 * SSL records. 1838 */ 1839 if (ctxmp != NULL) { 1840 /* 1841 * This kssl_ctx_t is already held for us by the transport. 1842 * So, we don't need to do a kssl_hold_ctx() here. 1843 */ 1844 nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1845 freemsg(ctxmp); 1846 mp->b_cont = NULL; 1847 strsetrwputdatahooks(nvp, strsock_kssl_input, 1848 strsock_kssl_output); 1849 1850 /* Disable sodirect if any */ 1851 if (nso->so_direct != NULL) { 1852 mutex_enter(nso->so_direct->sod_lockp); 1853 SOD_DISABLE(nso->so_direct); 1854 mutex_exit(nso->so_direct->sod_lockp); 1855 } 1856 } 1857 #ifdef DEBUG 1858 /* 1859 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1860 * it's inherited early to allow debugging of the accept code itself. 1861 */ 1862 nso->so_options |= so->so_options & SO_DEBUG; 1863 #endif /* DEBUG */ 1864 1865 /* 1866 * Save the SRC address from the T_CONN_IND 1867 * for getpeername to work on AF_UNIX and on transports that do not 1868 * support TI_GETPEERNAME. 1869 * 1870 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1871 * copyin_name(). 1872 */ 1873 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1874 error = EINVAL; 1875 freemsg(mp); 1876 eprintsoline(so, error); 1877 goto disconnect_vp_unlocked; 1878 } 1879 nsti->sti_faddr_len = (socklen_t)srclen; 1880 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1881 bcopy(src, nsti->sti_faddr_sa, srclen); 1882 nsti->sti_faddr_valid = 1; 1883 1884 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1885 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1886 cred_t *cr; 1887 1888 if ((cr = DB_CRED(mp)) != NULL) { 1889 crhold(cr); 1890 nso->so_peercred = cr; 1891 nso->so_cpid = DB_CPID(mp); 1892 } 1893 freemsg(mp); 1894 1895 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1896 sizeof (intptr_t), 0, _ALLOC_INTR); 1897 if (mp == NULL) { 1898 /* 1899 * Accept can not fail with ENOBUFS. 1900 * A signal was caught so return EINTR. 1901 */ 1902 error = EINTR; 1903 eprintsoline(so, error); 1904 goto disconnect_vp_unlocked; 1905 } 1906 conn_res = (struct T_conn_res *)mp->b_rptr; 1907 } else { 1908 nso->so_peercred = DB_CRED(mp); 1909 nso->so_cpid = DB_CPID(mp); 1910 DB_CRED(mp) = NULL; 1911 1912 mp->b_rptr = DB_BASE(mp); 1913 conn_res = (struct T_conn_res *)mp->b_rptr; 1914 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1915 } 1916 1917 /* 1918 * New socket must be bound at least in sockfs and, except for AF_INET, 1919 * (or AF_INET6) it also has to be bound in the transport provider. 1920 * We set the local address in the sonode from the T_OK_ACK of the 1921 * T_CONN_RES. For this reason the address we bind to here isn't 1922 * important. 1923 */ 1924 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1925 /*CONSTCOND*/ 1926 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1927 /* 1928 * Optimization for AF_INET{,6} transports 1929 * that can handle a T_CONN_RES without being bound. 1930 */ 1931 mutex_enter(&nso->so_lock); 1932 so_automatic_bind(nso); 1933 mutex_exit(&nso->so_lock); 1934 } else { 1935 /* Perform NULL bind with the transport provider. */ 1936 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1937 cr)) != 0) { 1938 ASSERT(error != ENOBUFS); 1939 freemsg(mp); 1940 eprintsoline(nso, error); 1941 goto disconnect_vp_unlocked; 1942 } 1943 } 1944 1945 /* 1946 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1947 * so that any data arriving on the new socket will cause the 1948 * appropriate signals to be delivered for the new socket. 1949 * 1950 * No other thread (except strsock_proto and strsock_misc) 1951 * can access the new socket thus we relax the locking. 1952 */ 1953 nso->so_pgrp = so->so_pgrp; 1954 nso->so_state |= so->so_state & SS_ASYNC; 1955 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1956 1957 if (nso->so_pgrp != 0) { 1958 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1959 eprintsoline(nso, error); 1960 error = 0; 1961 nso->so_pgrp = 0; 1962 } 1963 } 1964 1965 /* 1966 * Make note of the socket level options. TCP and IP level options 1967 * are already inherited. We could do all this after accept is 1968 * successful but doing it here simplifies code and no harm done 1969 * for error case. 1970 */ 1971 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1972 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1973 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1974 nso->so_sndbuf = so->so_sndbuf; 1975 nso->so_rcvbuf = so->so_rcvbuf; 1976 if (nso->so_options & SO_LINGER) 1977 nso->so_linger = so->so_linger; 1978 1979 /* 1980 * Note that the following sti_direct code path should be 1981 * removed once we are confident that the direct sockets 1982 * do not result in any degradation. 1983 */ 1984 if (sti->sti_direct) { 1985 1986 ASSERT(opt != NULL); 1987 1988 conn_res->OPT_length = optlen; 1989 conn_res->OPT_offset = MBLKL(mp); 1990 bcopy(&opt, mp->b_wptr, optlen); 1991 mp->b_wptr += optlen; 1992 conn_res->PRIM_type = T_CONN_RES; 1993 conn_res->ACCEPTOR_id = 0; 1994 PRIM_type = T_CONN_RES; 1995 1996 /* Send down the T_CONN_RES on acceptor STREAM */ 1997 error = kstrputmsg(SOTOV(nso), mp, NULL, 1998 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1999 if (error) { 2000 mutex_enter(&so->so_lock); 2001 so_lock_single(so); 2002 eprintsoline(so, error); 2003 goto disconnect_vp; 2004 } 2005 mutex_enter(&nso->so_lock); 2006 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 2007 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2008 if (error) { 2009 mutex_exit(&nso->so_lock); 2010 mutex_enter(&so->so_lock); 2011 so_lock_single(so); 2012 eprintsoline(so, error); 2013 goto disconnect_vp; 2014 } 2015 if (nso->so_family == AF_INET) { 2016 sin_t *sin; 2017 2018 sin = (sin_t *)(ack_mp->b_rptr + 2019 sizeof (struct T_ok_ack)); 2020 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 2021 nsti->sti_laddr_len = sizeof (sin_t); 2022 } else { 2023 sin6_t *sin6; 2024 2025 sin6 = (sin6_t *)(ack_mp->b_rptr + 2026 sizeof (struct T_ok_ack)); 2027 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 2028 nsti->sti_laddr_len = sizeof (sin6_t); 2029 } 2030 freemsg(ack_mp); 2031 2032 nso->so_state |= SS_ISCONNECTED; 2033 nso->so_proto_handle = (sock_lower_handle_t)opt; 2034 nsti->sti_laddr_valid = 1; 2035 2036 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 2037 /* 2038 * A NL7C marked listen()er so the new socket 2039 * inherits the listen()er's NL7C state, except 2040 * for NL7C_POLLIN. 2041 * 2042 * Only call NL7C to process the new socket if 2043 * the listen socket allows blocking i/o. 2044 */ 2045 nsti->sti_nl7c_flags = 2046 sti->sti_nl7c_flags & (~NL7C_POLLIN); 2047 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 2048 /* 2049 * Nonblocking accept() just make it 2050 * persist to defer processing to the 2051 * read-side syscall (e.g. read). 2052 */ 2053 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 2054 } else if (nl7c_process(nso, B_FALSE)) { 2055 /* 2056 * NL7C has completed processing on the 2057 * socket, close the socket and back to 2058 * the top to await the next T_CONN_IND. 2059 */ 2060 mutex_exit(&nso->so_lock); 2061 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 2062 CRED(), NULL); 2063 VN_RELE(nvp); 2064 goto again; 2065 } 2066 /* Pass the new socket out */ 2067 } 2068 2069 mutex_exit(&nso->so_lock); 2070 2071 /* 2072 * It's possible, through the use of autopush for example, 2073 * that the acceptor stream may not support sti_direct 2074 * semantics. If the new socket does not support sti_direct 2075 * we issue a _SIOCSOCKFALLBACK to inform the transport 2076 * as we would in the I_PUSH case. 2077 */ 2078 if (nsti->sti_direct == 0) { 2079 int rval; 2080 2081 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2082 0, 0, K_TO_K, CRED(), &rval)) != 0) { 2083 mutex_enter(&so->so_lock); 2084 so_lock_single(so); 2085 eprintsoline(so, error); 2086 goto disconnect_vp; 2087 } 2088 } 2089 2090 /* 2091 * Pass out new socket. 2092 */ 2093 if (nsop != NULL) 2094 *nsop = nso; 2095 2096 return (0); 2097 } 2098 2099 /* 2100 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2101 * which don't support the FireEngine accept fast-path. It is also 2102 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2103 * again. Neither sockfs nor TCP attempt to find out if some other 2104 * random module has been inserted in between (in which case we 2105 * should follow TLI accept behaviour). We blindly assume the worst 2106 * case and revert back to old behaviour i.e. TCP will not send us 2107 * any option (eager) and the accept should happen on the listener 2108 * queue. Any queued T_conn_ind have already got their options removed 2109 * by so_sock2_stream() when "sockmod" was I_POP'd. 2110 */ 2111 /* 2112 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2113 */ 2114 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2115 #ifdef _ILP32 2116 queue_t *q; 2117 2118 /* 2119 * Find read queue in driver 2120 * Can safely do this since we "own" nso/nvp. 2121 */ 2122 q = strvp2wq(nvp)->q_next; 2123 while (SAMESTR(q)) 2124 q = q->q_next; 2125 q = RD(q); 2126 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2127 #else 2128 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2129 #endif /* _ILP32 */ 2130 conn_res->PRIM_type = O_T_CONN_RES; 2131 PRIM_type = O_T_CONN_RES; 2132 } else { 2133 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2134 conn_res->PRIM_type = T_CONN_RES; 2135 PRIM_type = T_CONN_RES; 2136 } 2137 conn_res->SEQ_number = SEQ_number; 2138 conn_res->OPT_length = 0; 2139 conn_res->OPT_offset = 0; 2140 2141 mutex_enter(&so->so_lock); 2142 so_lock_single(so); /* Set SOLOCKED */ 2143 mutex_exit(&so->so_lock); 2144 2145 error = kstrputmsg(SOTOV(so), mp, NULL, 2146 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2147 mutex_enter(&so->so_lock); 2148 if (error) { 2149 eprintsoline(so, error); 2150 goto disconnect_vp; 2151 } 2152 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2153 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2154 if (error) { 2155 eprintsoline(so, error); 2156 goto disconnect_vp; 2157 } 2158 /* 2159 * If there is a sin/sin6 appended onto the T_OK_ACK use 2160 * that to set the local address. If this is not present 2161 * then we zero out the address and don't set the 2162 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2163 * the pathname from the listening socket. 2164 */ 2165 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2166 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2167 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2168 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2169 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2170 nsti->sti_laddr_len = sinlen; 2171 nsti->sti_laddr_valid = 1; 2172 } else if (nso->so_family == AF_UNIX) { 2173 ASSERT(so->so_family == AF_UNIX); 2174 nsti->sti_laddr_len = sti->sti_laddr_len; 2175 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2176 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2177 nsti->sti_laddr_len); 2178 nsti->sti_laddr_valid = 1; 2179 } else { 2180 nsti->sti_laddr_len = sti->sti_laddr_len; 2181 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2182 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2183 nsti->sti_laddr_sa->sa_family = nso->so_family; 2184 } 2185 freemsg(ack_mp); 2186 2187 so_unlock_single(so, SOLOCKED); 2188 mutex_exit(&so->so_lock); 2189 2190 nso->so_state |= SS_ISCONNECTED; 2191 2192 /* 2193 * Pass out new socket. 2194 */ 2195 if (nsop != NULL) 2196 *nsop = nso; 2197 2198 return (0); 2199 2200 2201 eproto_disc_unl: 2202 error = EPROTO; 2203 e_disc_unl: 2204 eprintsoline(so, error); 2205 goto disconnect_unlocked; 2206 2207 pr_disc_vp_unl: 2208 eprintsoline(so, error); 2209 disconnect_vp_unlocked: 2210 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 2211 VN_RELE(nvp); 2212 disconnect_unlocked: 2213 (void) sodisconnect(so, SEQ_number, 0); 2214 return (error); 2215 2216 pr_disc_vp: 2217 eprintsoline(so, error); 2218 disconnect_vp: 2219 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2220 so_unlock_single(so, SOLOCKED); 2221 mutex_exit(&so->so_lock); 2222 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 2223 VN_RELE(nvp); 2224 return (error); 2225 2226 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2227 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2228 ? EOPNOTSUPP : EINVAL; 2229 e_bad: 2230 eprintsoline(so, error); 2231 return (error); 2232 } 2233 2234 /* 2235 * connect a socket. 2236 * 2237 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2238 * unconnect (by specifying a null address). 2239 */ 2240 int 2241 sotpi_connect(struct sonode *so, 2242 const struct sockaddr *name, 2243 socklen_t namelen, 2244 int fflag, 2245 int flags, 2246 struct cred *cr) 2247 { 2248 struct T_conn_req conn_req; 2249 int error = 0; 2250 mblk_t *mp; 2251 void *src; 2252 socklen_t srclen; 2253 void *addr; 2254 socklen_t addrlen; 2255 boolean_t need_unlock; 2256 sotpi_info_t *sti = SOTOTPI(so); 2257 2258 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2259 (void *)so, (void *)name, namelen, fflag, flags, 2260 pr_state(so->so_state, so->so_mode))); 2261 2262 /* 2263 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2264 * avoid sleeping for memory with SOLOCKED held. 2265 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2266 * + sizeof (struct T_opthdr). 2267 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2268 * exceed sti_faddr_maxlen). 2269 */ 2270 mp = soallocproto(sizeof (struct T_conn_req) + 2271 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 2272 if (mp == NULL) { 2273 /* 2274 * Connect can not fail with ENOBUFS. A signal was 2275 * caught so return EINTR. 2276 */ 2277 error = EINTR; 2278 eprintsoline(so, error); 2279 return (error); 2280 } 2281 2282 mutex_enter(&so->so_lock); 2283 /* 2284 * Make sure there is a preallocated T_unbind_req message 2285 * before any binding. This message is allocated when the 2286 * socket is created. Since another thread can consume 2287 * so_unbind_mp by the time we return from so_lock_single(), 2288 * we should check the availability of so_unbind_mp after 2289 * we return from so_lock_single(). 2290 */ 2291 2292 so_lock_single(so); /* Set SOLOCKED */ 2293 need_unlock = B_TRUE; 2294 2295 if (sti->sti_unbind_mp == NULL) { 2296 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2297 /* NOTE: holding so_lock while sleeping */ 2298 sti->sti_unbind_mp = 2299 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 2300 if (sti->sti_unbind_mp == NULL) { 2301 error = EINTR; 2302 goto done; 2303 } 2304 } 2305 2306 /* 2307 * Can't have done a listen before connecting. 2308 */ 2309 if (so->so_state & SS_ACCEPTCONN) { 2310 error = EOPNOTSUPP; 2311 goto done; 2312 } 2313 2314 /* 2315 * Must be bound with the transport 2316 */ 2317 if (!(so->so_state & SS_ISBOUND)) { 2318 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2319 /*CONSTCOND*/ 2320 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2321 /* 2322 * Optimization for AF_INET{,6} transports 2323 * that can handle a T_CONN_REQ without being bound. 2324 */ 2325 so_automatic_bind(so); 2326 } else { 2327 error = sotpi_bind(so, NULL, 0, 2328 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2329 if (error) 2330 goto done; 2331 } 2332 ASSERT(so->so_state & SS_ISBOUND); 2333 flags |= _SOCONNECT_DID_BIND; 2334 } 2335 2336 /* 2337 * Handle a connect to a name parameter of type AF_UNSPEC like a 2338 * connect to a null address. This is the portable method to 2339 * unconnect a socket. 2340 */ 2341 if ((namelen >= sizeof (sa_family_t)) && 2342 (name->sa_family == AF_UNSPEC)) { 2343 name = NULL; 2344 namelen = 0; 2345 } 2346 2347 /* 2348 * Check that we are not already connected. 2349 * A connection-oriented socket cannot be reconnected. 2350 * A connected connection-less socket can be 2351 * - connected to a different address by a subsequent connect 2352 * - "unconnected" by a connect to the NULL address 2353 */ 2354 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2355 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2356 if (so->so_mode & SM_CONNREQUIRED) { 2357 /* Connection-oriented socket */ 2358 error = so->so_state & SS_ISCONNECTED ? 2359 EISCONN : EALREADY; 2360 goto done; 2361 } 2362 /* Connection-less socket */ 2363 if (name == NULL) { 2364 /* 2365 * Remove the connected state and clear SO_DGRAM_ERRIND 2366 * since it was set when the socket was connected. 2367 * If this is UDP also send down a T_DISCON_REQ. 2368 */ 2369 int val; 2370 2371 if ((so->so_family == AF_INET || 2372 so->so_family == AF_INET6) && 2373 (so->so_type == SOCK_DGRAM || 2374 so->so_type == SOCK_RAW) && 2375 /*CONSTCOND*/ 2376 !soconnect_tpi_udp) { 2377 /* XXX What about implicitly unbinding here? */ 2378 error = sodisconnect(so, -1, 2379 _SODISCONNECT_LOCK_HELD); 2380 } else { 2381 so->so_state &= 2382 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2383 sti->sti_faddr_valid = 0; 2384 sti->sti_faddr_len = 0; 2385 } 2386 2387 /* Remove SOLOCKED since setsockopt will grab it */ 2388 so_unlock_single(so, SOLOCKED); 2389 mutex_exit(&so->so_lock); 2390 2391 val = 0; 2392 (void) sotpi_setsockopt(so, SOL_SOCKET, 2393 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2394 cr); 2395 2396 mutex_enter(&so->so_lock); 2397 so_lock_single(so); /* Set SOLOCKED */ 2398 goto done; 2399 } 2400 } 2401 ASSERT(so->so_state & SS_ISBOUND); 2402 2403 if (name == NULL || namelen == 0) { 2404 error = EINVAL; 2405 goto done; 2406 } 2407 /* 2408 * Mark the socket if sti_faddr_sa represents the transport level 2409 * address. 2410 */ 2411 if (flags & _SOCONNECT_NOXLATE) { 2412 struct sockaddr_ux *soaddr_ux; 2413 2414 ASSERT(so->so_family == AF_UNIX); 2415 if (namelen != sizeof (struct sockaddr_ux)) { 2416 error = EINVAL; 2417 goto done; 2418 } 2419 soaddr_ux = (struct sockaddr_ux *)name; 2420 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2421 namelen = sizeof (soaddr_ux->sou_addr); 2422 sti->sti_faddr_noxlate = 1; 2423 } 2424 2425 /* 2426 * Length and family checks. 2427 */ 2428 error = so_addr_verify(so, name, namelen); 2429 if (error) 2430 goto bad; 2431 2432 /* 2433 * Save foreign address. Needed for AF_UNIX as well as 2434 * transport providers that do not support TI_GETPEERNAME. 2435 * Also used for cached foreign address for TCP and UDP. 2436 */ 2437 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2438 error = EINVAL; 2439 goto done; 2440 } 2441 sti->sti_faddr_len = (socklen_t)namelen; 2442 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2443 bcopy(name, sti->sti_faddr_sa, namelen); 2444 sti->sti_faddr_valid = 1; 2445 2446 if (so->so_family == AF_UNIX) { 2447 if (sti->sti_faddr_noxlate) { 2448 /* 2449 * Already have a transport internal address. Do not 2450 * pass any (transport internal) source address. 2451 */ 2452 addr = sti->sti_faddr_sa; 2453 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2454 src = NULL; 2455 srclen = 0; 2456 } else { 2457 /* 2458 * Pass the sockaddr_un source address as an option 2459 * and translate the remote address. 2460 * Holding so_lock thus sti_laddr_sa can not change. 2461 */ 2462 src = sti->sti_laddr_sa; 2463 srclen = (t_uscalar_t)sti->sti_laddr_len; 2464 dprintso(so, 1, 2465 ("sotpi_connect UNIX: srclen %d, src %p\n", 2466 srclen, src)); 2467 error = so_ux_addr_xlate(so, 2468 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2469 (flags & _SOCONNECT_XPG4_2), 2470 &addr, &addrlen); 2471 if (error) 2472 goto bad; 2473 } 2474 } else { 2475 addr = sti->sti_faddr_sa; 2476 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2477 src = NULL; 2478 srclen = 0; 2479 } 2480 /* 2481 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2482 * option which asks the transport provider to send T_UDERR_IND 2483 * messages. These T_UDERR_IND messages are used to return connected 2484 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2485 * 2486 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2487 * we send down a T_CONN_REQ. This is needed to let the 2488 * transport assign a local address that is consistent with 2489 * the remote address. Applications depend on a getsockname() 2490 * after a connect() to retrieve the "source" IP address for 2491 * the connected socket. Invalidate the cached local address 2492 * to force getsockname() to enquire of the transport. 2493 */ 2494 if (!(so->so_mode & SM_CONNREQUIRED)) { 2495 /* 2496 * Datagram socket. 2497 */ 2498 int32_t val; 2499 2500 so_unlock_single(so, SOLOCKED); 2501 mutex_exit(&so->so_lock); 2502 2503 val = 1; 2504 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2505 &val, (t_uscalar_t)sizeof (val), cr); 2506 2507 mutex_enter(&so->so_lock); 2508 so_lock_single(so); /* Set SOLOCKED */ 2509 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2510 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2511 soconnect_tpi_udp) { 2512 soisconnected(so); 2513 goto done; 2514 } 2515 /* 2516 * Send down T_CONN_REQ etc. 2517 * Clear fflag to avoid returning EWOULDBLOCK. 2518 */ 2519 fflag = 0; 2520 ASSERT(so->so_family != AF_UNIX); 2521 sti->sti_laddr_valid = 0; 2522 } else if (sti->sti_laddr_len != 0) { 2523 /* 2524 * If the local address or port was "any" then it may be 2525 * changed by the transport as a result of the 2526 * connect. Invalidate the cached version if we have one. 2527 */ 2528 switch (so->so_family) { 2529 case AF_INET: 2530 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2531 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2532 INADDR_ANY || 2533 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2534 sti->sti_laddr_valid = 0; 2535 break; 2536 2537 case AF_INET6: 2538 ASSERT(sti->sti_laddr_len == 2539 (socklen_t)sizeof (sin6_t)); 2540 if (IN6_IS_ADDR_UNSPECIFIED( 2541 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2542 IN6_IS_ADDR_V4MAPPED_ANY( 2543 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2544 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2545 sti->sti_laddr_valid = 0; 2546 break; 2547 2548 default: 2549 break; 2550 } 2551 } 2552 2553 /* 2554 * Check for failure of an earlier call 2555 */ 2556 if (so->so_error != 0) 2557 goto so_bad; 2558 2559 /* 2560 * Send down T_CONN_REQ. Message was allocated above. 2561 */ 2562 conn_req.PRIM_type = T_CONN_REQ; 2563 conn_req.DEST_length = addrlen; 2564 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2565 if (srclen == 0) { 2566 conn_req.OPT_length = 0; 2567 conn_req.OPT_offset = 0; 2568 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2569 soappendmsg(mp, addr, addrlen); 2570 } else { 2571 /* 2572 * There is a AF_UNIX sockaddr_un to include as a source 2573 * address option. 2574 */ 2575 struct T_opthdr toh; 2576 2577 toh.level = SOL_SOCKET; 2578 toh.name = SO_SRCADDR; 2579 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2580 toh.status = 0; 2581 conn_req.OPT_length = 2582 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2583 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2584 _TPI_ALIGN_TOPT(addrlen)); 2585 2586 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2587 soappendmsg(mp, addr, addrlen); 2588 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2589 soappendmsg(mp, &toh, sizeof (toh)); 2590 soappendmsg(mp, src, srclen); 2591 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2592 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2593 } 2594 /* 2595 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2596 * in order to have the right state when the T_CONN_CON shows up. 2597 */ 2598 soisconnecting(so); 2599 mutex_exit(&so->so_lock); 2600 2601 if (audit_active) 2602 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2603 2604 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2605 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2606 mp = NULL; 2607 mutex_enter(&so->so_lock); 2608 if (error != 0) 2609 goto bad; 2610 2611 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2612 goto bad; 2613 2614 /* Allow other threads to access the socket */ 2615 so_unlock_single(so, SOLOCKED); 2616 need_unlock = B_FALSE; 2617 2618 /* 2619 * Wait until we get a T_CONN_CON or an error 2620 */ 2621 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2622 so_lock_single(so); /* Set SOLOCKED */ 2623 need_unlock = B_TRUE; 2624 } 2625 2626 done: 2627 freemsg(mp); 2628 switch (error) { 2629 case EINPROGRESS: 2630 case EALREADY: 2631 case EISCONN: 2632 case EINTR: 2633 /* Non-fatal errors */ 2634 sti->sti_laddr_valid = 0; 2635 /* FALLTHRU */ 2636 case 0: 2637 break; 2638 default: 2639 ASSERT(need_unlock); 2640 /* 2641 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2642 * and invalidate local-address cache 2643 */ 2644 so->so_state &= ~SS_ISCONNECTING; 2645 sti->sti_laddr_valid = 0; 2646 /* A discon_ind might have already unbound us */ 2647 if ((flags & _SOCONNECT_DID_BIND) && 2648 (so->so_state & SS_ISBOUND)) { 2649 int err; 2650 2651 err = sotpi_unbind(so, 0); 2652 /* LINTED - statement has no conseq */ 2653 if (err) { 2654 eprintsoline(so, err); 2655 } 2656 } 2657 break; 2658 } 2659 if (need_unlock) 2660 so_unlock_single(so, SOLOCKED); 2661 mutex_exit(&so->so_lock); 2662 return (error); 2663 2664 so_bad: error = sogeterr(so, B_TRUE); 2665 bad: eprintsoline(so, error); 2666 goto done; 2667 } 2668 2669 /* ARGSUSED */ 2670 int 2671 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2672 { 2673 struct T_ordrel_req ordrel_req; 2674 mblk_t *mp; 2675 uint_t old_state, state_change; 2676 int error = 0; 2677 sotpi_info_t *sti = SOTOTPI(so); 2678 2679 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2680 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2681 2682 mutex_enter(&so->so_lock); 2683 so_lock_single(so); /* Set SOLOCKED */ 2684 2685 /* 2686 * SunOS 4.X has no check for datagram sockets. 2687 * 5.X checks that it is connected (ENOTCONN) 2688 * X/Open requires that we check the connected state. 2689 */ 2690 if (!(so->so_state & SS_ISCONNECTED)) { 2691 if (!xnet_skip_checks) { 2692 error = ENOTCONN; 2693 if (xnet_check_print) { 2694 printf("sockfs: X/Open shutdown check " 2695 "caused ENOTCONN\n"); 2696 } 2697 } 2698 goto done; 2699 } 2700 /* 2701 * Record the current state and then perform any state changes. 2702 * Then use the difference between the old and new states to 2703 * determine which messages need to be sent. 2704 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2705 * duplicate calls to shutdown(). 2706 */ 2707 old_state = so->so_state; 2708 2709 switch (how) { 2710 case 0: 2711 socantrcvmore(so); 2712 break; 2713 case 1: 2714 socantsendmore(so); 2715 break; 2716 case 2: 2717 socantsendmore(so); 2718 socantrcvmore(so); 2719 break; 2720 default: 2721 error = EINVAL; 2722 goto done; 2723 } 2724 2725 /* 2726 * Assumes that the SS_CANT* flags are never cleared in the above code. 2727 */ 2728 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2729 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2730 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2731 2732 switch (state_change) { 2733 case 0: 2734 dprintso(so, 1, 2735 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2736 so->so_state)); 2737 goto done; 2738 2739 case SS_CANTRCVMORE: 2740 mutex_exit(&so->so_lock); 2741 strseteof(SOTOV(so), 1); 2742 /* 2743 * strseteof takes care of read side wakeups, 2744 * pollwakeups, and signals. 2745 */ 2746 /* 2747 * Get the read lock before flushing data to avoid problems 2748 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2749 */ 2750 mutex_enter(&so->so_lock); 2751 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2752 mutex_exit(&so->so_lock); 2753 2754 /* Flush read side queue */ 2755 strflushrq(SOTOV(so), FLUSHALL); 2756 2757 mutex_enter(&so->so_lock); 2758 so_unlock_read(so); /* Clear SOREADLOCKED */ 2759 break; 2760 2761 case SS_CANTSENDMORE: 2762 mutex_exit(&so->so_lock); 2763 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2764 mutex_enter(&so->so_lock); 2765 break; 2766 2767 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2768 mutex_exit(&so->so_lock); 2769 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2770 strseteof(SOTOV(so), 1); 2771 /* 2772 * strseteof takes care of read side wakeups, 2773 * pollwakeups, and signals. 2774 */ 2775 /* 2776 * Get the read lock before flushing data to avoid problems 2777 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2778 */ 2779 mutex_enter(&so->so_lock); 2780 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2781 mutex_exit(&so->so_lock); 2782 2783 /* Flush read side queue */ 2784 strflushrq(SOTOV(so), FLUSHALL); 2785 2786 mutex_enter(&so->so_lock); 2787 so_unlock_read(so); /* Clear SOREADLOCKED */ 2788 break; 2789 } 2790 2791 ASSERT(MUTEX_HELD(&so->so_lock)); 2792 2793 /* 2794 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2795 * was set due to this call and the new state has both of them set: 2796 * Send the AF_UNIX close indication 2797 * For T_COTS send a discon_ind 2798 * 2799 * If cantsend was set due to this call: 2800 * For T_COTSORD send an ordrel_ind 2801 * 2802 * Note that for T_CLTS there is no message sent here. 2803 */ 2804 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2805 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2806 /* 2807 * For SunOS 4.X compatibility we tell the other end 2808 * that we are unable to receive at this point. 2809 */ 2810 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2811 so_unix_close(so); 2812 2813 if (sti->sti_serv_type == T_COTS) 2814 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2815 } 2816 if ((state_change & SS_CANTSENDMORE) && 2817 (sti->sti_serv_type == T_COTS_ORD)) { 2818 /* Send an orderly release */ 2819 ordrel_req.PRIM_type = T_ORDREL_REQ; 2820 2821 mutex_exit(&so->so_lock); 2822 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2823 0, _ALLOC_SLEEP); 2824 /* 2825 * Send down the T_ORDREL_REQ even if there is flow control. 2826 * This prevents shutdown from blocking. 2827 * Note that there is no T_OK_ACK for ordrel_req. 2828 */ 2829 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2830 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2831 mutex_enter(&so->so_lock); 2832 if (error) { 2833 eprintsoline(so, error); 2834 goto done; 2835 } 2836 } 2837 2838 done: 2839 so_unlock_single(so, SOLOCKED); 2840 mutex_exit(&so->so_lock); 2841 return (error); 2842 } 2843 2844 /* 2845 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2846 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2847 * that we have closed. 2848 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2849 * T_UNITDATA_REQ containing the same option. 2850 * 2851 * For SOCK_DGRAM half-connections (somebody connected to this end 2852 * but this end is not connect) we don't know where to send any 2853 * SO_UNIX_CLOSE. 2854 * 2855 * We have to ignore stream head errors just in case there has been 2856 * a shutdown(output). 2857 * Ignore any flow control to try to get the message more quickly to the peer. 2858 * While locally ignoring flow control solves the problem when there 2859 * is only the loopback transport on the stream it would not provide 2860 * the correct AF_UNIX socket semantics when one or more modules have 2861 * been pushed. 2862 */ 2863 void 2864 so_unix_close(struct sonode *so) 2865 { 2866 int error; 2867 struct T_opthdr toh; 2868 mblk_t *mp; 2869 sotpi_info_t *sti = SOTOTPI(so); 2870 2871 ASSERT(MUTEX_HELD(&so->so_lock)); 2872 2873 ASSERT(so->so_family == AF_UNIX); 2874 2875 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2876 (SS_ISCONNECTED|SS_ISBOUND)) 2877 return; 2878 2879 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2880 (void *)so, pr_state(so->so_state, so->so_mode))); 2881 2882 toh.level = SOL_SOCKET; 2883 toh.name = SO_UNIX_CLOSE; 2884 2885 /* zero length + header */ 2886 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2887 toh.status = 0; 2888 2889 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2890 struct T_optdata_req tdr; 2891 2892 tdr.PRIM_type = T_OPTDATA_REQ; 2893 tdr.DATA_flag = 0; 2894 2895 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2896 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2897 2898 /* NOTE: holding so_lock while sleeping */ 2899 mp = soallocproto2(&tdr, sizeof (tdr), 2900 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2901 } else { 2902 struct T_unitdata_req tudr; 2903 void *addr; 2904 socklen_t addrlen; 2905 void *src; 2906 socklen_t srclen; 2907 struct T_opthdr toh2; 2908 t_scalar_t size; 2909 2910 /* Connecteded DGRAM socket */ 2911 2912 /* 2913 * For AF_UNIX the destination address is translated to 2914 * an internal name and the source address is passed as 2915 * an option. 2916 */ 2917 /* 2918 * Length and family checks. 2919 */ 2920 error = so_addr_verify(so, sti->sti_faddr_sa, 2921 (t_uscalar_t)sti->sti_faddr_len); 2922 if (error) { 2923 eprintsoline(so, error); 2924 return; 2925 } 2926 if (sti->sti_faddr_noxlate) { 2927 /* 2928 * Already have a transport internal address. Do not 2929 * pass any (transport internal) source address. 2930 */ 2931 addr = sti->sti_faddr_sa; 2932 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2933 src = NULL; 2934 srclen = 0; 2935 } else { 2936 /* 2937 * Pass the sockaddr_un source address as an option 2938 * and translate the remote address. 2939 * Holding so_lock thus sti_laddr_sa can not change. 2940 */ 2941 src = sti->sti_laddr_sa; 2942 srclen = (socklen_t)sti->sti_laddr_len; 2943 dprintso(so, 1, 2944 ("so_ux_close: srclen %d, src %p\n", 2945 srclen, src)); 2946 error = so_ux_addr_xlate(so, 2947 sti->sti_faddr_sa, 2948 (socklen_t)sti->sti_faddr_len, 0, 2949 &addr, &addrlen); 2950 if (error) { 2951 eprintsoline(so, error); 2952 return; 2953 } 2954 } 2955 tudr.PRIM_type = T_UNITDATA_REQ; 2956 tudr.DEST_length = addrlen; 2957 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2958 if (srclen == 0) { 2959 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2960 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2961 _TPI_ALIGN_TOPT(addrlen)); 2962 2963 size = tudr.OPT_offset + tudr.OPT_length; 2964 /* NOTE: holding so_lock while sleeping */ 2965 mp = soallocproto2(&tudr, sizeof (tudr), 2966 addr, addrlen, size, _ALLOC_SLEEP); 2967 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2968 soappendmsg(mp, &toh, sizeof (toh)); 2969 } else { 2970 /* 2971 * There is a AF_UNIX sockaddr_un to include as a 2972 * source address option. 2973 */ 2974 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2975 _TPI_ALIGN_TOPT(srclen)); 2976 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2977 _TPI_ALIGN_TOPT(addrlen)); 2978 2979 toh2.level = SOL_SOCKET; 2980 toh2.name = SO_SRCADDR; 2981 toh2.len = (t_uscalar_t)(srclen + 2982 sizeof (struct T_opthdr)); 2983 toh2.status = 0; 2984 2985 size = tudr.OPT_offset + tudr.OPT_length; 2986 2987 /* NOTE: holding so_lock while sleeping */ 2988 mp = soallocproto2(&tudr, sizeof (tudr), 2989 addr, addrlen, size, _ALLOC_SLEEP); 2990 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2991 soappendmsg(mp, &toh, sizeof (toh)); 2992 soappendmsg(mp, &toh2, sizeof (toh2)); 2993 soappendmsg(mp, src, srclen); 2994 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2995 } 2996 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2997 } 2998 mutex_exit(&so->so_lock); 2999 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 3000 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 3001 mutex_enter(&so->so_lock); 3002 } 3003 3004 /* 3005 * Called by sotpi_recvmsg when reading a non-zero amount of data. 3006 * In addition, the caller typically verifies that there is some 3007 * potential state to clear by checking 3008 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 3009 * before calling this routine. 3010 * Note that such a check can be made without holding so_lock since 3011 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 3012 * decrements sti_oobsigcnt. 3013 * 3014 * When data is read *after* the point that all pending 3015 * oob data has been consumed the oob indication is cleared. 3016 * 3017 * This logic keeps select/poll returning POLLRDBAND and 3018 * SIOCATMARK returning true until we have read past 3019 * the mark. 3020 */ 3021 static void 3022 sorecv_update_oobstate(struct sonode *so) 3023 { 3024 sotpi_info_t *sti = SOTOTPI(so); 3025 3026 mutex_enter(&so->so_lock); 3027 ASSERT(so_verify_oobstate(so)); 3028 dprintso(so, 1, 3029 ("sorecv_update_oobstate: counts %d/%d state %s\n", 3030 sti->sti_oobsigcnt, 3031 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 3032 if (sti->sti_oobsigcnt == 0) { 3033 /* No more pending oob indications */ 3034 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 3035 freemsg(so->so_oobmsg); 3036 so->so_oobmsg = NULL; 3037 } 3038 ASSERT(so_verify_oobstate(so)); 3039 mutex_exit(&so->so_lock); 3040 } 3041 3042 /* 3043 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 3044 */ 3045 static int 3046 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 3047 { 3048 sotpi_info_t *sti = SOTOTPI(so); 3049 int error = 0; 3050 mblk_t *tmp = NULL; 3051 mblk_t *pmp = NULL; 3052 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 3053 3054 ASSERT(nmp != NULL); 3055 3056 while (nmp != NULL && uiop->uio_resid > 0) { 3057 ssize_t n; 3058 3059 if (DB_TYPE(nmp) == M_DATA) { 3060 /* 3061 * We have some data, uiomove up to resid bytes. 3062 */ 3063 n = MIN(MBLKL(nmp), uiop->uio_resid); 3064 if (n > 0) 3065 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3066 nmp->b_rptr += n; 3067 if (nmp->b_rptr == nmp->b_wptr) { 3068 pmp = nmp; 3069 nmp = nmp->b_cont; 3070 } 3071 if (error) 3072 break; 3073 } else { 3074 /* 3075 * We only handle data, save for caller to handle. 3076 */ 3077 if (pmp != NULL) { 3078 pmp->b_cont = nmp->b_cont; 3079 } 3080 nmp->b_cont = NULL; 3081 if (*rmp == NULL) { 3082 *rmp = nmp; 3083 } else { 3084 tmp->b_cont = nmp; 3085 } 3086 nmp = nmp->b_cont; 3087 tmp = nmp; 3088 } 3089 } 3090 if (pmp != NULL) { 3091 /* Free any mblk_t(s) which we have consumed */ 3092 pmp->b_cont = NULL; 3093 freemsg(sti->sti_nl7c_rcv_mp); 3094 } 3095 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3096 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3097 if (error == 0) { 3098 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3099 3100 error = p->r_v.r_v2; 3101 p->r_v.r_v2 = 0; 3102 } 3103 rp->r_vals = sti->sti_nl7c_rcv_rval; 3104 sti->sti_nl7c_rcv_rval = 0; 3105 } else { 3106 /* More mblk_t(s) to process so no rval to return */ 3107 rp->r_vals = 0; 3108 } 3109 return (error); 3110 } 3111 /* 3112 * Receive the next message on the queue. 3113 * If msg_controllen is non-zero when called the caller is interested in 3114 * any received control info (options). 3115 * If msg_namelen is non-zero when called the caller is interested in 3116 * any received source address. 3117 * The routine returns with msg_control and msg_name pointing to 3118 * kmem_alloc'ed memory which the caller has to free. 3119 */ 3120 /* ARGSUSED */ 3121 int 3122 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3123 struct cred *cr) 3124 { 3125 union T_primitives *tpr; 3126 mblk_t *mp; 3127 uchar_t pri; 3128 int pflag, opflag; 3129 void *control; 3130 t_uscalar_t controllen; 3131 t_uscalar_t namelen; 3132 int so_state = so->so_state; /* Snapshot */ 3133 ssize_t saved_resid; 3134 rval_t rval; 3135 int flags; 3136 clock_t timout; 3137 int error = 0; 3138 int reterr = 0; 3139 struct uio *suiop = NULL; 3140 sotpi_info_t *sti = SOTOTPI(so); 3141 3142 flags = msg->msg_flags; 3143 msg->msg_flags = 0; 3144 3145 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3146 (void *)so, (void *)msg, flags, 3147 pr_state(so->so_state, so->so_mode), so->so_error)); 3148 3149 if (so->so_version == SOV_STREAM) { 3150 so_update_attrs(so, SOACC); 3151 /* The imaginary "sockmod" has been popped - act as a stream */ 3152 return (strread(SOTOV(so), uiop, cr)); 3153 } 3154 3155 /* 3156 * If we are not connected because we have never been connected 3157 * we return ENOTCONN. If we have been connected (but are no longer 3158 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3159 * the EOF. 3160 * 3161 * An alternative would be to post an ENOTCONN error in stream head 3162 * (read+write) and clear it when we're connected. However, that error 3163 * would cause incorrect poll/select behavior! 3164 */ 3165 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3166 (so->so_mode & SM_CONNREQUIRED)) { 3167 return (ENOTCONN); 3168 } 3169 3170 /* 3171 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3172 * after checking that the read queue is empty) and returns zero. 3173 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3174 * is zero. 3175 */ 3176 3177 if (flags & MSG_OOB) { 3178 /* Check that the transport supports OOB */ 3179 if (!(so->so_mode & SM_EXDATA)) 3180 return (EOPNOTSUPP); 3181 so_update_attrs(so, SOACC); 3182 return (sorecvoob(so, msg, uiop, flags, 3183 (so->so_options & SO_OOBINLINE))); 3184 } 3185 3186 so_update_attrs(so, SOACC); 3187 3188 /* 3189 * Set msg_controllen and msg_namelen to zero here to make it 3190 * simpler in the cases that no control or name is returned. 3191 */ 3192 controllen = msg->msg_controllen; 3193 namelen = msg->msg_namelen; 3194 msg->msg_controllen = 0; 3195 msg->msg_namelen = 0; 3196 3197 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3198 namelen, controllen)); 3199 3200 mutex_enter(&so->so_lock); 3201 /* 3202 * If an NL7C enabled socket and not waiting for write data. 3203 */ 3204 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3205 NL7C_ENABLED) { 3206 if (sti->sti_nl7c_uri) { 3207 /* Close uri processing for a previous request */ 3208 nl7c_close(so); 3209 } 3210 if ((so_state & SS_CANTRCVMORE) && 3211 sti->sti_nl7c_rcv_mp == NULL) { 3212 /* Nothing to process, EOF */ 3213 mutex_exit(&so->so_lock); 3214 return (0); 3215 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3216 /* Persistent NL7C socket, try to process request */ 3217 boolean_t ret; 3218 3219 ret = nl7c_process(so, 3220 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3221 rval.r_vals = sti->sti_nl7c_rcv_rval; 3222 error = rval.r_v.r_v2; 3223 if (error) { 3224 /* Error of some sort, return it */ 3225 mutex_exit(&so->so_lock); 3226 return (error); 3227 } 3228 if (sti->sti_nl7c_flags && 3229 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3230 /* 3231 * Still an NL7C socket and no data 3232 * to pass up to the caller. 3233 */ 3234 mutex_exit(&so->so_lock); 3235 if (ret) { 3236 /* EOF */ 3237 return (0); 3238 } else { 3239 /* Need more data */ 3240 return (EAGAIN); 3241 } 3242 } 3243 } else { 3244 /* 3245 * Not persistent so no further NL7C processing. 3246 */ 3247 sti->sti_nl7c_flags = 0; 3248 } 3249 } 3250 /* 3251 * Only one reader is allowed at any given time. This is needed 3252 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3253 * 3254 * This is slightly different that BSD behavior in that it fails with 3255 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3256 * is single-threaded using sblock(), which is dropped while waiting 3257 * for data to appear. The difference shows up e.g. if one 3258 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3259 * does use nonblocking io and different threads are reading each 3260 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3261 * in this case as long as the read queue doesn't get empty. 3262 * In this implementation the thread using nonblocking io can 3263 * get an EWOULDBLOCK error due to the blocking thread executing 3264 * e.g. in the uiomove in kstrgetmsg. 3265 * This difference is not believed to be significant. 3266 */ 3267 /* Set SOREADLOCKED */ 3268 error = so_lock_read_intr(so, 3269 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3270 mutex_exit(&so->so_lock); 3271 if (error) 3272 return (error); 3273 3274 /* 3275 * Tell kstrgetmsg to not inspect the stream head errors until all 3276 * queued data has been consumed. 3277 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3278 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3279 * 3280 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3281 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3282 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3283 */ 3284 pflag = MSG_ANY | MSG_DELAYERROR; 3285 if (flags & MSG_PEEK) { 3286 pflag |= MSG_IPEEK; 3287 flags &= ~MSG_WAITALL; 3288 } 3289 if (so->so_mode & SM_ATOMIC) 3290 pflag |= MSG_DISCARDTAIL; 3291 3292 if (flags & MSG_DONTWAIT) 3293 timout = 0; 3294 else 3295 timout = -1; 3296 opflag = pflag; 3297 3298 suiop = sod_rcv_init(so, flags, &uiop); 3299 retry: 3300 saved_resid = uiop->uio_resid; 3301 pri = 0; 3302 mp = NULL; 3303 if (sti->sti_nl7c_rcv_mp != NULL) { 3304 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3305 error = nl7c_sorecv(so, &mp, uiop, &rval); 3306 } else { 3307 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3308 timout, &rval); 3309 } 3310 if (error != 0) { 3311 /* kstrgetmsg returns ETIME when timeout expires */ 3312 if (error == ETIME) 3313 error = EWOULDBLOCK; 3314 goto out; 3315 } 3316 /* 3317 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3318 * For non-datagrams MOREDATA is used to set MSG_EOR. 3319 */ 3320 ASSERT(!(rval.r_val1 & MORECTL)); 3321 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3322 msg->msg_flags |= MSG_TRUNC; 3323 3324 if (mp == NULL) { 3325 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3326 /* 3327 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3328 * The draft Posix socket spec states that the mark should 3329 * not be cleared when peeking. We follow the latter. 3330 */ 3331 if ((so->so_state & 3332 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3333 (uiop->uio_resid != saved_resid) && 3334 !(flags & MSG_PEEK)) { 3335 sorecv_update_oobstate(so); 3336 } 3337 3338 mutex_enter(&so->so_lock); 3339 /* Set MSG_EOR based on MOREDATA */ 3340 if (!(rval.r_val1 & MOREDATA)) { 3341 if (so->so_state & SS_SAVEDEOR) { 3342 msg->msg_flags |= MSG_EOR; 3343 so->so_state &= ~SS_SAVEDEOR; 3344 } 3345 } 3346 /* 3347 * If some data was received (i.e. not EOF) and the 3348 * read/recv* has not been satisfied wait for some more. 3349 */ 3350 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3351 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3352 mutex_exit(&so->so_lock); 3353 pflag = opflag | MSG_NOMARK; 3354 goto retry; 3355 } 3356 goto out_locked; 3357 } 3358 3359 /* strsock_proto has already verified length and alignment */ 3360 tpr = (union T_primitives *)mp->b_rptr; 3361 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3362 3363 switch (tpr->type) { 3364 case T_DATA_IND: { 3365 if ((so->so_state & 3366 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3367 (uiop->uio_resid != saved_resid) && 3368 !(flags & MSG_PEEK)) { 3369 sorecv_update_oobstate(so); 3370 } 3371 3372 /* 3373 * Set msg_flags to MSG_EOR based on 3374 * MORE_flag and MOREDATA. 3375 */ 3376 mutex_enter(&so->so_lock); 3377 so->so_state &= ~SS_SAVEDEOR; 3378 if (!(tpr->data_ind.MORE_flag & 1)) { 3379 if (!(rval.r_val1 & MOREDATA)) 3380 msg->msg_flags |= MSG_EOR; 3381 else 3382 so->so_state |= SS_SAVEDEOR; 3383 } 3384 freemsg(mp); 3385 /* 3386 * If some data was received (i.e. not EOF) and the 3387 * read/recv* has not been satisfied wait for some more. 3388 */ 3389 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3390 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3391 mutex_exit(&so->so_lock); 3392 pflag = opflag | MSG_NOMARK; 3393 goto retry; 3394 } 3395 goto out_locked; 3396 } 3397 case T_UNITDATA_IND: { 3398 void *addr; 3399 t_uscalar_t addrlen; 3400 void *abuf; 3401 t_uscalar_t optlen; 3402 void *opt; 3403 3404 if ((so->so_state & 3405 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3406 (uiop->uio_resid != saved_resid) && 3407 !(flags & MSG_PEEK)) { 3408 sorecv_update_oobstate(so); 3409 } 3410 3411 if (namelen != 0) { 3412 /* Caller wants source address */ 3413 addrlen = tpr->unitdata_ind.SRC_length; 3414 addr = sogetoff(mp, 3415 tpr->unitdata_ind.SRC_offset, 3416 addrlen, 1); 3417 if (addr == NULL) { 3418 freemsg(mp); 3419 error = EPROTO; 3420 eprintsoline(so, error); 3421 goto out; 3422 } 3423 if (so->so_family == AF_UNIX) { 3424 /* 3425 * Can not use the transport level address. 3426 * If there is a SO_SRCADDR option carrying 3427 * the socket level address it will be 3428 * extracted below. 3429 */ 3430 addr = NULL; 3431 addrlen = 0; 3432 } 3433 } 3434 optlen = tpr->unitdata_ind.OPT_length; 3435 if (optlen != 0) { 3436 t_uscalar_t ncontrollen; 3437 3438 /* 3439 * Extract any source address option. 3440 * Determine how large cmsg buffer is needed. 3441 */ 3442 opt = sogetoff(mp, 3443 tpr->unitdata_ind.OPT_offset, 3444 optlen, __TPI_ALIGN_SIZE); 3445 3446 if (opt == NULL) { 3447 freemsg(mp); 3448 error = EPROTO; 3449 eprintsoline(so, error); 3450 goto out; 3451 } 3452 if (so->so_family == AF_UNIX) 3453 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3454 ncontrollen = so_cmsglen(mp, opt, optlen, 3455 !(flags & MSG_XPG4_2)); 3456 if (controllen != 0) 3457 controllen = ncontrollen; 3458 else if (ncontrollen != 0) 3459 msg->msg_flags |= MSG_CTRUNC; 3460 } else { 3461 controllen = 0; 3462 } 3463 3464 if (namelen != 0) { 3465 /* 3466 * Return address to caller. 3467 * Caller handles truncation if length 3468 * exceeds msg_namelen. 3469 * NOTE: AF_UNIX NUL termination is ensured by 3470 * the sender's copyin_name(). 3471 */ 3472 abuf = kmem_alloc(addrlen, KM_SLEEP); 3473 3474 bcopy(addr, abuf, addrlen); 3475 msg->msg_name = abuf; 3476 msg->msg_namelen = addrlen; 3477 } 3478 3479 if (controllen != 0) { 3480 /* 3481 * Return control msg to caller. 3482 * Caller handles truncation if length 3483 * exceeds msg_controllen. 3484 */ 3485 control = kmem_zalloc(controllen, KM_SLEEP); 3486 3487 error = so_opt2cmsg(mp, opt, optlen, 3488 !(flags & MSG_XPG4_2), 3489 control, controllen); 3490 if (error) { 3491 freemsg(mp); 3492 if (msg->msg_namelen != 0) 3493 kmem_free(msg->msg_name, 3494 msg->msg_namelen); 3495 kmem_free(control, controllen); 3496 eprintsoline(so, error); 3497 goto out; 3498 } 3499 msg->msg_control = control; 3500 msg->msg_controllen = controllen; 3501 } 3502 3503 freemsg(mp); 3504 goto out; 3505 } 3506 case T_OPTDATA_IND: { 3507 struct T_optdata_req *tdr; 3508 void *opt; 3509 t_uscalar_t optlen; 3510 3511 if ((so->so_state & 3512 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3513 (uiop->uio_resid != saved_resid) && 3514 !(flags & MSG_PEEK)) { 3515 sorecv_update_oobstate(so); 3516 } 3517 3518 tdr = (struct T_optdata_req *)mp->b_rptr; 3519 optlen = tdr->OPT_length; 3520 if (optlen != 0) { 3521 t_uscalar_t ncontrollen; 3522 /* 3523 * Determine how large cmsg buffer is needed. 3524 */ 3525 opt = sogetoff(mp, 3526 tpr->optdata_ind.OPT_offset, 3527 optlen, __TPI_ALIGN_SIZE); 3528 3529 if (opt == NULL) { 3530 freemsg(mp); 3531 error = EPROTO; 3532 eprintsoline(so, error); 3533 goto out; 3534 } 3535 3536 ncontrollen = so_cmsglen(mp, opt, optlen, 3537 !(flags & MSG_XPG4_2)); 3538 if (controllen != 0) 3539 controllen = ncontrollen; 3540 else if (ncontrollen != 0) 3541 msg->msg_flags |= MSG_CTRUNC; 3542 } else { 3543 controllen = 0; 3544 } 3545 3546 if (controllen != 0) { 3547 /* 3548 * Return control msg to caller. 3549 * Caller handles truncation if length 3550 * exceeds msg_controllen. 3551 */ 3552 control = kmem_zalloc(controllen, KM_SLEEP); 3553 3554 error = so_opt2cmsg(mp, opt, optlen, 3555 !(flags & MSG_XPG4_2), 3556 control, controllen); 3557 if (error) { 3558 freemsg(mp); 3559 kmem_free(control, controllen); 3560 eprintsoline(so, error); 3561 goto out; 3562 } 3563 msg->msg_control = control; 3564 msg->msg_controllen = controllen; 3565 } 3566 3567 /* 3568 * Set msg_flags to MSG_EOR based on 3569 * DATA_flag and MOREDATA. 3570 */ 3571 mutex_enter(&so->so_lock); 3572 so->so_state &= ~SS_SAVEDEOR; 3573 if (!(tpr->data_ind.MORE_flag & 1)) { 3574 if (!(rval.r_val1 & MOREDATA)) 3575 msg->msg_flags |= MSG_EOR; 3576 else 3577 so->so_state |= SS_SAVEDEOR; 3578 } 3579 freemsg(mp); 3580 /* 3581 * If some data was received (i.e. not EOF) and the 3582 * read/recv* has not been satisfied wait for some more. 3583 * Not possible to wait if control info was received. 3584 */ 3585 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3586 controllen == 0 && 3587 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3588 mutex_exit(&so->so_lock); 3589 pflag = opflag | MSG_NOMARK; 3590 goto retry; 3591 } 3592 goto out_locked; 3593 } 3594 case T_EXDATA_IND: { 3595 dprintso(so, 1, 3596 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3597 "state %s\n", 3598 sti->sti_oobsigcnt, sti->sti_oobcnt, 3599 saved_resid - uiop->uio_resid, 3600 pr_state(so->so_state, so->so_mode))); 3601 /* 3602 * kstrgetmsg handles MSGMARK so there is nothing to 3603 * inspect in the T_EXDATA_IND. 3604 * strsock_proto makes the stream head queue the T_EXDATA_IND 3605 * as a separate message with no M_DATA component. Furthermore, 3606 * the stream head does not consolidate M_DATA messages onto 3607 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3608 * remains a message by itself. This is needed since MSGMARK 3609 * marks both the whole message as well as the last byte 3610 * of the message. 3611 */ 3612 freemsg(mp); 3613 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3614 if (flags & MSG_PEEK) { 3615 /* 3616 * Even though we are peeking we consume the 3617 * T_EXDATA_IND thereby moving the mark information 3618 * to SS_RCVATMARK. Then the oob code below will 3619 * retry the peeking kstrgetmsg. 3620 * Note that the stream head read queue is 3621 * never flushed without holding SOREADLOCKED 3622 * thus the T_EXDATA_IND can not disappear 3623 * underneath us. 3624 */ 3625 dprintso(so, 1, 3626 ("sotpi_recvmsg: consume EXDATA_IND " 3627 "counts %d/%d state %s\n", 3628 sti->sti_oobsigcnt, 3629 sti->sti_oobcnt, 3630 pr_state(so->so_state, so->so_mode))); 3631 3632 pflag = MSG_ANY | MSG_DELAYERROR; 3633 if (so->so_mode & SM_ATOMIC) 3634 pflag |= MSG_DISCARDTAIL; 3635 3636 pri = 0; 3637 mp = NULL; 3638 3639 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3640 &pri, &pflag, (clock_t)-1, &rval); 3641 ASSERT(uiop->uio_resid == saved_resid); 3642 3643 if (error) { 3644 #ifdef SOCK_DEBUG 3645 if (error != EWOULDBLOCK && error != EINTR) { 3646 eprintsoline(so, error); 3647 } 3648 #endif /* SOCK_DEBUG */ 3649 goto out; 3650 } 3651 ASSERT(mp); 3652 tpr = (union T_primitives *)mp->b_rptr; 3653 ASSERT(tpr->type == T_EXDATA_IND); 3654 freemsg(mp); 3655 } /* end "if (flags & MSG_PEEK)" */ 3656 3657 /* 3658 * Decrement the number of queued and pending oob. 3659 * 3660 * SS_RCVATMARK is cleared when we read past a mark. 3661 * SS_HAVEOOBDATA is cleared when we've read past the 3662 * last mark. 3663 * SS_OOBPEND is cleared if we've read past the last 3664 * mark and no (new) SIGURG has been posted. 3665 */ 3666 mutex_enter(&so->so_lock); 3667 ASSERT(so_verify_oobstate(so)); 3668 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3669 ASSERT(sti->sti_oobsigcnt > 0); 3670 sti->sti_oobsigcnt--; 3671 ASSERT(sti->sti_oobcnt > 0); 3672 sti->sti_oobcnt--; 3673 /* 3674 * Since the T_EXDATA_IND has been removed from the stream 3675 * head, but we have not read data past the mark, 3676 * sockfs needs to track that the socket is still at the mark. 3677 * 3678 * Since no data was received call kstrgetmsg again to wait 3679 * for data. 3680 */ 3681 so->so_state |= SS_RCVATMARK; 3682 mutex_exit(&so->so_lock); 3683 dprintso(so, 1, 3684 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3685 sti->sti_oobsigcnt, sti->sti_oobcnt, 3686 pr_state(so->so_state, so->so_mode))); 3687 pflag = opflag; 3688 goto retry; 3689 } 3690 default: 3691 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3692 (void *)so, tpr->type, (void *)mp); 3693 ASSERT(0); 3694 freemsg(mp); 3695 error = EPROTO; 3696 eprintsoline(so, error); 3697 goto out; 3698 } 3699 /* NOTREACHED */ 3700 out: 3701 mutex_enter(&so->so_lock); 3702 out_locked: 3703 if (so->so_direct != NULL) { 3704 mutex_enter(so->so_direct->sod_lockp); 3705 reterr = sod_rcv_done(so, suiop, uiop); 3706 mutex_exit(so->so_direct->sod_lockp); 3707 } 3708 if (reterr != 0 && error == 0) 3709 error = reterr; 3710 so_unlock_read(so); /* Clear SOREADLOCKED */ 3711 mutex_exit(&so->so_lock); 3712 return (error); 3713 } 3714 3715 /* 3716 * Sending data with options on a datagram socket. 3717 * Assumes caller has verified that SS_ISBOUND etc. are set. 3718 */ 3719 static int 3720 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3721 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3722 { 3723 struct T_unitdata_req tudr; 3724 mblk_t *mp; 3725 int error; 3726 void *addr; 3727 socklen_t addrlen; 3728 void *src; 3729 socklen_t srclen; 3730 ssize_t len; 3731 int size; 3732 struct T_opthdr toh; 3733 struct fdbuf *fdbuf; 3734 t_uscalar_t optlen; 3735 void *fds; 3736 int fdlen; 3737 sotpi_info_t *sti = SOTOTPI(so); 3738 3739 ASSERT(name && namelen); 3740 ASSERT(control && controllen); 3741 3742 len = uiop->uio_resid; 3743 if (len > (ssize_t)sti->sti_tidu_size) { 3744 return (EMSGSIZE); 3745 } 3746 3747 /* 3748 * For AF_UNIX the destination address is translated to an internal 3749 * name and the source address is passed as an option. 3750 * Also, file descriptors are passed as file pointers in an 3751 * option. 3752 */ 3753 3754 /* 3755 * Length and family checks. 3756 */ 3757 error = so_addr_verify(so, name, namelen); 3758 if (error) { 3759 eprintsoline(so, error); 3760 return (error); 3761 } 3762 if (so->so_family == AF_UNIX) { 3763 if (sti->sti_faddr_noxlate) { 3764 /* 3765 * Already have a transport internal address. Do not 3766 * pass any (transport internal) source address. 3767 */ 3768 addr = name; 3769 addrlen = namelen; 3770 src = NULL; 3771 srclen = 0; 3772 } else { 3773 /* 3774 * Pass the sockaddr_un source address as an option 3775 * and translate the remote address. 3776 * 3777 * Note that this code does not prevent sti_laddr_sa 3778 * from changing while it is being used. Thus 3779 * if an unbind+bind occurs concurrently with this 3780 * send the peer might see a partially new and a 3781 * partially old "from" address. 3782 */ 3783 src = sti->sti_laddr_sa; 3784 srclen = (t_uscalar_t)sti->sti_laddr_len; 3785 dprintso(so, 1, 3786 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3787 srclen, src)); 3788 error = so_ux_addr_xlate(so, name, namelen, 3789 (flags & MSG_XPG4_2), 3790 &addr, &addrlen); 3791 if (error) { 3792 eprintsoline(so, error); 3793 return (error); 3794 } 3795 } 3796 } else { 3797 addr = name; 3798 addrlen = namelen; 3799 src = NULL; 3800 srclen = 0; 3801 } 3802 optlen = so_optlen(control, controllen, 3803 !(flags & MSG_XPG4_2)); 3804 tudr.PRIM_type = T_UNITDATA_REQ; 3805 tudr.DEST_length = addrlen; 3806 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3807 if (srclen != 0) 3808 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3809 _TPI_ALIGN_TOPT(srclen)); 3810 else 3811 tudr.OPT_length = optlen; 3812 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3813 _TPI_ALIGN_TOPT(addrlen)); 3814 3815 size = tudr.OPT_offset + tudr.OPT_length; 3816 3817 /* 3818 * File descriptors only when SM_FDPASSING set. 3819 */ 3820 error = so_getfdopt(control, controllen, 3821 !(flags & MSG_XPG4_2), &fds, &fdlen); 3822 if (error) 3823 return (error); 3824 if (fdlen != -1) { 3825 if (!(so->so_mode & SM_FDPASSING)) 3826 return (EOPNOTSUPP); 3827 3828 error = fdbuf_create(fds, fdlen, &fdbuf); 3829 if (error) 3830 return (error); 3831 mp = fdbuf_allocmsg(size, fdbuf); 3832 } else { 3833 mp = soallocproto(size, _ALLOC_INTR); 3834 if (mp == NULL) { 3835 /* 3836 * Caught a signal waiting for memory. 3837 * Let send* return EINTR. 3838 */ 3839 return (EINTR); 3840 } 3841 } 3842 soappendmsg(mp, &tudr, sizeof (tudr)); 3843 soappendmsg(mp, addr, addrlen); 3844 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3845 3846 if (fdlen != -1) { 3847 ASSERT(fdbuf != NULL); 3848 toh.level = SOL_SOCKET; 3849 toh.name = SO_FILEP; 3850 toh.len = fdbuf->fd_size + 3851 (t_uscalar_t)sizeof (struct T_opthdr); 3852 toh.status = 0; 3853 soappendmsg(mp, &toh, sizeof (toh)); 3854 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3855 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3856 } 3857 if (srclen != 0) { 3858 /* 3859 * There is a AF_UNIX sockaddr_un to include as a source 3860 * address option. 3861 */ 3862 toh.level = SOL_SOCKET; 3863 toh.name = SO_SRCADDR; 3864 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3865 toh.status = 0; 3866 soappendmsg(mp, &toh, sizeof (toh)); 3867 soappendmsg(mp, src, srclen); 3868 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3869 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3870 } 3871 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3872 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3873 /* At most 3 bytes left in the message */ 3874 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3875 ASSERT(MBLKL(mp) <= (ssize_t)size); 3876 3877 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3878 if (audit_active) 3879 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3880 3881 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3882 #ifdef SOCK_DEBUG 3883 if (error) { 3884 eprintsoline(so, error); 3885 } 3886 #endif /* SOCK_DEBUG */ 3887 return (error); 3888 } 3889 3890 /* 3891 * Sending data with options on a connected stream socket. 3892 * Assumes caller has verified that SS_ISCONNECTED is set. 3893 */ 3894 static int 3895 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3896 t_uscalar_t controllen, int flags) 3897 { 3898 struct T_optdata_req tdr; 3899 mblk_t *mp; 3900 int error; 3901 ssize_t iosize; 3902 int size; 3903 struct fdbuf *fdbuf; 3904 t_uscalar_t optlen; 3905 void *fds; 3906 int fdlen; 3907 struct T_opthdr toh; 3908 sotpi_info_t *sti = SOTOTPI(so); 3909 3910 dprintso(so, 1, 3911 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3912 3913 /* 3914 * Has to be bound and connected. However, since no locks are 3915 * held the state could have changed after sotpi_sendmsg checked it 3916 * thus it is not possible to ASSERT on the state. 3917 */ 3918 3919 /* Options on connection-oriented only when SM_OPTDATA set. */ 3920 if (!(so->so_mode & SM_OPTDATA)) 3921 return (EOPNOTSUPP); 3922 3923 do { 3924 /* 3925 * Set the MORE flag if uio_resid does not fit in this 3926 * message or if the caller passed in "more". 3927 * Error for transports with zero tidu_size. 3928 */ 3929 tdr.PRIM_type = T_OPTDATA_REQ; 3930 iosize = sti->sti_tidu_size; 3931 if (iosize <= 0) 3932 return (EMSGSIZE); 3933 if (uiop->uio_resid > iosize) { 3934 tdr.DATA_flag = 1; 3935 } else { 3936 if (more) 3937 tdr.DATA_flag = 1; 3938 else 3939 tdr.DATA_flag = 0; 3940 iosize = uiop->uio_resid; 3941 } 3942 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3943 tdr.DATA_flag, iosize)); 3944 3945 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3946 tdr.OPT_length = optlen; 3947 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3948 3949 size = (int)sizeof (tdr) + optlen; 3950 /* 3951 * File descriptors only when SM_FDPASSING set. 3952 */ 3953 error = so_getfdopt(control, controllen, 3954 !(flags & MSG_XPG4_2), &fds, &fdlen); 3955 if (error) 3956 return (error); 3957 if (fdlen != -1) { 3958 if (!(so->so_mode & SM_FDPASSING)) 3959 return (EOPNOTSUPP); 3960 3961 error = fdbuf_create(fds, fdlen, &fdbuf); 3962 if (error) 3963 return (error); 3964 mp = fdbuf_allocmsg(size, fdbuf); 3965 } else { 3966 mp = soallocproto(size, _ALLOC_INTR); 3967 if (mp == NULL) { 3968 /* 3969 * Caught a signal waiting for memory. 3970 * Let send* return EINTR. 3971 */ 3972 return (EINTR); 3973 } 3974 } 3975 soappendmsg(mp, &tdr, sizeof (tdr)); 3976 3977 if (fdlen != -1) { 3978 ASSERT(fdbuf != NULL); 3979 toh.level = SOL_SOCKET; 3980 toh.name = SO_FILEP; 3981 toh.len = fdbuf->fd_size + 3982 (t_uscalar_t)sizeof (struct T_opthdr); 3983 toh.status = 0; 3984 soappendmsg(mp, &toh, sizeof (toh)); 3985 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3986 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3987 } 3988 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3989 /* At most 3 bytes left in the message */ 3990 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3991 ASSERT(MBLKL(mp) <= (ssize_t)size); 3992 3993 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3994 3995 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3996 0, MSG_BAND, 0); 3997 if (error) { 3998 eprintsoline(so, error); 3999 return (error); 4000 } 4001 control = NULL; 4002 if (uiop->uio_resid > 0) { 4003 /* 4004 * Recheck for fatal errors. Fail write even though 4005 * some data have been written. This is consistent 4006 * with strwrite semantics and BSD sockets semantics. 4007 */ 4008 if (so->so_state & SS_CANTSENDMORE) { 4009 eprintsoline(so, error); 4010 return (EPIPE); 4011 } 4012 if (so->so_error != 0) { 4013 mutex_enter(&so->so_lock); 4014 error = sogeterr(so, B_TRUE); 4015 mutex_exit(&so->so_lock); 4016 if (error != 0) { 4017 eprintsoline(so, error); 4018 return (error); 4019 } 4020 } 4021 } 4022 } while (uiop->uio_resid > 0); 4023 return (0); 4024 } 4025 4026 /* 4027 * Sending data on a datagram socket. 4028 * Assumes caller has verified that SS_ISBOUND etc. are set. 4029 * 4030 * For AF_UNIX the destination address is translated to an internal 4031 * name and the source address is passed as an option. 4032 */ 4033 int 4034 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4035 struct uio *uiop, int flags) 4036 { 4037 struct T_unitdata_req tudr; 4038 mblk_t *mp; 4039 int error; 4040 void *addr; 4041 socklen_t addrlen; 4042 void *src; 4043 socklen_t srclen; 4044 ssize_t len; 4045 sotpi_info_t *sti = SOTOTPI(so); 4046 4047 ASSERT(name != NULL && namelen != 0); 4048 4049 len = uiop->uio_resid; 4050 if (len > sti->sti_tidu_size) { 4051 error = EMSGSIZE; 4052 goto done; 4053 } 4054 4055 /* Length and family checks */ 4056 error = so_addr_verify(so, name, namelen); 4057 if (error != 0) 4058 goto done; 4059 4060 if (sti->sti_direct) 4061 return (sodgram_direct(so, name, namelen, uiop, flags)); 4062 4063 if (so->so_family == AF_UNIX) { 4064 if (sti->sti_faddr_noxlate) { 4065 /* 4066 * Already have a transport internal address. Do not 4067 * pass any (transport internal) source address. 4068 */ 4069 addr = name; 4070 addrlen = namelen; 4071 src = NULL; 4072 srclen = 0; 4073 } else { 4074 /* 4075 * Pass the sockaddr_un source address as an option 4076 * and translate the remote address. 4077 * 4078 * Note that this code does not prevent sti_laddr_sa 4079 * from changing while it is being used. Thus 4080 * if an unbind+bind occurs concurrently with this 4081 * send the peer might see a partially new and a 4082 * partially old "from" address. 4083 */ 4084 src = sti->sti_laddr_sa; 4085 srclen = (socklen_t)sti->sti_laddr_len; 4086 dprintso(so, 1, 4087 ("sosend_dgram UNIX: srclen %d, src %p\n", 4088 srclen, src)); 4089 error = so_ux_addr_xlate(so, name, namelen, 4090 (flags & MSG_XPG4_2), 4091 &addr, &addrlen); 4092 if (error) { 4093 eprintsoline(so, error); 4094 goto done; 4095 } 4096 } 4097 } else { 4098 addr = name; 4099 addrlen = namelen; 4100 src = NULL; 4101 srclen = 0; 4102 } 4103 tudr.PRIM_type = T_UNITDATA_REQ; 4104 tudr.DEST_length = addrlen; 4105 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4106 if (srclen == 0) { 4107 tudr.OPT_length = 0; 4108 tudr.OPT_offset = 0; 4109 4110 mp = soallocproto2(&tudr, sizeof (tudr), 4111 addr, addrlen, 0, _ALLOC_INTR); 4112 if (mp == NULL) { 4113 /* 4114 * Caught a signal waiting for memory. 4115 * Let send* return EINTR. 4116 */ 4117 error = EINTR; 4118 goto done; 4119 } 4120 } else { 4121 /* 4122 * There is a AF_UNIX sockaddr_un to include as a source 4123 * address option. 4124 */ 4125 struct T_opthdr toh; 4126 ssize_t size; 4127 4128 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4129 _TPI_ALIGN_TOPT(srclen)); 4130 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4131 _TPI_ALIGN_TOPT(addrlen)); 4132 4133 toh.level = SOL_SOCKET; 4134 toh.name = SO_SRCADDR; 4135 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4136 toh.status = 0; 4137 4138 size = tudr.OPT_offset + tudr.OPT_length; 4139 mp = soallocproto2(&tudr, sizeof (tudr), 4140 addr, addrlen, size, _ALLOC_INTR); 4141 if (mp == NULL) { 4142 /* 4143 * Caught a signal waiting for memory. 4144 * Let send* return EINTR. 4145 */ 4146 error = EINTR; 4147 goto done; 4148 } 4149 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4150 soappendmsg(mp, &toh, sizeof (toh)); 4151 soappendmsg(mp, src, srclen); 4152 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4153 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4154 } 4155 4156 if (audit_active) 4157 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4158 4159 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4160 done: 4161 #ifdef SOCK_DEBUG 4162 if (error) { 4163 eprintsoline(so, error); 4164 } 4165 #endif /* SOCK_DEBUG */ 4166 return (error); 4167 } 4168 4169 /* 4170 * Sending data on a connected stream socket. 4171 * Assumes caller has verified that SS_ISCONNECTED is set. 4172 */ 4173 int 4174 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4175 int sflag) 4176 { 4177 struct T_data_req tdr; 4178 mblk_t *mp; 4179 int error; 4180 ssize_t iosize; 4181 sotpi_info_t *sti = SOTOTPI(so); 4182 4183 dprintso(so, 1, 4184 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4185 (void *)so, uiop->uio_resid, prim, sflag)); 4186 4187 /* 4188 * Has to be bound and connected. However, since no locks are 4189 * held the state could have changed after sotpi_sendmsg checked it 4190 * thus it is not possible to ASSERT on the state. 4191 */ 4192 4193 do { 4194 /* 4195 * Set the MORE flag if uio_resid does not fit in this 4196 * message or if the caller passed in "more". 4197 * Error for transports with zero tidu_size. 4198 */ 4199 tdr.PRIM_type = prim; 4200 iosize = sti->sti_tidu_size; 4201 if (iosize <= 0) 4202 return (EMSGSIZE); 4203 if (uiop->uio_resid > iosize) { 4204 tdr.MORE_flag = 1; 4205 } else { 4206 if (more) 4207 tdr.MORE_flag = 1; 4208 else 4209 tdr.MORE_flag = 0; 4210 iosize = uiop->uio_resid; 4211 } 4212 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4213 prim, tdr.MORE_flag, iosize)); 4214 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 4215 if (mp == NULL) { 4216 /* 4217 * Caught a signal waiting for memory. 4218 * Let send* return EINTR. 4219 */ 4220 return (EINTR); 4221 } 4222 4223 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4224 0, sflag | MSG_BAND, 0); 4225 if (error) { 4226 eprintsoline(so, error); 4227 return (error); 4228 } 4229 if (uiop->uio_resid > 0) { 4230 /* 4231 * Recheck for fatal errors. Fail write even though 4232 * some data have been written. This is consistent 4233 * with strwrite semantics and BSD sockets semantics. 4234 */ 4235 if (so->so_state & SS_CANTSENDMORE) { 4236 eprintsoline(so, error); 4237 return (EPIPE); 4238 } 4239 if (so->so_error != 0) { 4240 mutex_enter(&so->so_lock); 4241 error = sogeterr(so, B_TRUE); 4242 mutex_exit(&so->so_lock); 4243 if (error != 0) { 4244 eprintsoline(so, error); 4245 return (error); 4246 } 4247 } 4248 } 4249 } while (uiop->uio_resid > 0); 4250 return (0); 4251 } 4252 4253 /* 4254 * Check the state for errors and call the appropriate send function. 4255 * 4256 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4257 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4258 * after sending the message. 4259 */ 4260 static int 4261 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4262 struct cred *cr) 4263 { 4264 int so_state; 4265 int so_mode; 4266 int error; 4267 struct sockaddr *name; 4268 t_uscalar_t namelen; 4269 int dontroute; 4270 int flags; 4271 sotpi_info_t *sti = SOTOTPI(so); 4272 4273 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4274 (void *)so, (void *)msg, msg->msg_flags, 4275 pr_state(so->so_state, so->so_mode), so->so_error)); 4276 4277 if (so->so_version == SOV_STREAM) { 4278 /* The imaginary "sockmod" has been popped - act as a stream */ 4279 so_update_attrs(so, SOMOD); 4280 return (strwrite(SOTOV(so), uiop, cr)); 4281 } 4282 4283 mutex_enter(&so->so_lock); 4284 so_state = so->so_state; 4285 4286 if (so_state & SS_CANTSENDMORE) { 4287 mutex_exit(&so->so_lock); 4288 return (EPIPE); 4289 } 4290 4291 if (so->so_error != 0) { 4292 error = sogeterr(so, B_TRUE); 4293 if (error != 0) { 4294 mutex_exit(&so->so_lock); 4295 return (error); 4296 } 4297 } 4298 4299 name = (struct sockaddr *)msg->msg_name; 4300 namelen = msg->msg_namelen; 4301 4302 so_mode = so->so_mode; 4303 4304 if (name == NULL) { 4305 if (!(so_state & SS_ISCONNECTED)) { 4306 mutex_exit(&so->so_lock); 4307 if (so_mode & SM_CONNREQUIRED) 4308 return (ENOTCONN); 4309 else 4310 return (EDESTADDRREQ); 4311 } 4312 if (so_mode & SM_CONNREQUIRED) { 4313 name = NULL; 4314 namelen = 0; 4315 } else { 4316 /* 4317 * Note that this code does not prevent sti_faddr_sa 4318 * from changing while it is being used. Thus 4319 * if an "unconnect"+connect occurs concurrently with 4320 * this send the datagram might be delivered to a 4321 * garbaled address. 4322 */ 4323 ASSERT(sti->sti_faddr_sa); 4324 name = sti->sti_faddr_sa; 4325 namelen = (t_uscalar_t)sti->sti_faddr_len; 4326 } 4327 } else { 4328 if (!(so_state & SS_ISCONNECTED) && 4329 (so_mode & SM_CONNREQUIRED)) { 4330 /* Required but not connected */ 4331 mutex_exit(&so->so_lock); 4332 return (ENOTCONN); 4333 } 4334 /* 4335 * Ignore the address on connection-oriented sockets. 4336 * Just like BSD this code does not generate an error for 4337 * TCP (a CONNREQUIRED socket) when sending to an address 4338 * passed in with sendto/sendmsg. Instead the data is 4339 * delivered on the connection as if no address had been 4340 * supplied. 4341 */ 4342 if ((so_state & SS_ISCONNECTED) && 4343 !(so_mode & SM_CONNREQUIRED)) { 4344 mutex_exit(&so->so_lock); 4345 return (EISCONN); 4346 } 4347 if (!(so_state & SS_ISBOUND)) { 4348 so_lock_single(so); /* Set SOLOCKED */ 4349 error = sotpi_bind(so, NULL, 0, 4350 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4351 so_unlock_single(so, SOLOCKED); 4352 if (error) { 4353 mutex_exit(&so->so_lock); 4354 eprintsoline(so, error); 4355 return (error); 4356 } 4357 } 4358 /* 4359 * Handle delayed datagram errors. These are only queued 4360 * when the application sets SO_DGRAM_ERRIND. 4361 * Return the error if we are sending to the address 4362 * that was returned in the last T_UDERROR_IND. 4363 * If sending to some other address discard the delayed 4364 * error indication. 4365 */ 4366 if (sti->sti_delayed_error) { 4367 struct T_uderror_ind *tudi; 4368 void *addr; 4369 t_uscalar_t addrlen; 4370 boolean_t match = B_FALSE; 4371 4372 ASSERT(sti->sti_eaddr_mp); 4373 error = sti->sti_delayed_error; 4374 sti->sti_delayed_error = 0; 4375 tudi = 4376 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4377 addrlen = tudi->DEST_length; 4378 addr = sogetoff(sti->sti_eaddr_mp, 4379 tudi->DEST_offset, addrlen, 1); 4380 ASSERT(addr); /* Checked by strsock_proto */ 4381 switch (so->so_family) { 4382 case AF_INET: { 4383 /* Compare just IP address and port */ 4384 sin_t *sin1 = (sin_t *)name; 4385 sin_t *sin2 = (sin_t *)addr; 4386 4387 if (addrlen == sizeof (sin_t) && 4388 namelen == addrlen && 4389 sin1->sin_port == sin2->sin_port && 4390 sin1->sin_addr.s_addr == 4391 sin2->sin_addr.s_addr) 4392 match = B_TRUE; 4393 break; 4394 } 4395 case AF_INET6: { 4396 /* Compare just IP address and port. Not flow */ 4397 sin6_t *sin1 = (sin6_t *)name; 4398 sin6_t *sin2 = (sin6_t *)addr; 4399 4400 if (addrlen == sizeof (sin6_t) && 4401 namelen == addrlen && 4402 sin1->sin6_port == sin2->sin6_port && 4403 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4404 &sin2->sin6_addr)) 4405 match = B_TRUE; 4406 break; 4407 } 4408 case AF_UNIX: 4409 default: 4410 if (namelen == addrlen && 4411 bcmp(name, addr, namelen) == 0) 4412 match = B_TRUE; 4413 } 4414 if (match) { 4415 freemsg(sti->sti_eaddr_mp); 4416 sti->sti_eaddr_mp = NULL; 4417 mutex_exit(&so->so_lock); 4418 #ifdef DEBUG 4419 dprintso(so, 0, 4420 ("sockfs delayed error %d for %s\n", 4421 error, 4422 pr_addr(so->so_family, name, namelen))); 4423 #endif /* DEBUG */ 4424 return (error); 4425 } 4426 freemsg(sti->sti_eaddr_mp); 4427 sti->sti_eaddr_mp = NULL; 4428 } 4429 } 4430 mutex_exit(&so->so_lock); 4431 4432 flags = msg->msg_flags; 4433 dontroute = 0; 4434 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4435 uint32_t val; 4436 4437 val = 1; 4438 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4439 &val, (t_uscalar_t)sizeof (val), cr); 4440 if (error) 4441 return (error); 4442 dontroute = 1; 4443 } 4444 4445 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4446 error = EOPNOTSUPP; 4447 goto done; 4448 } 4449 if (msg->msg_controllen != 0) { 4450 if (!(so_mode & SM_CONNREQUIRED)) { 4451 so_update_attrs(so, SOMOD); 4452 error = sosend_dgramcmsg(so, name, namelen, uiop, 4453 msg->msg_control, msg->msg_controllen, flags); 4454 } else { 4455 if (flags & MSG_OOB) { 4456 /* Can't generate T_EXDATA_REQ with options */ 4457 error = EOPNOTSUPP; 4458 goto done; 4459 } 4460 so_update_attrs(so, SOMOD); 4461 error = sosend_svccmsg(so, uiop, 4462 !(flags & MSG_EOR), 4463 msg->msg_control, msg->msg_controllen, 4464 flags); 4465 } 4466 goto done; 4467 } 4468 4469 so_update_attrs(so, SOMOD); 4470 if (!(so_mode & SM_CONNREQUIRED)) { 4471 /* 4472 * If there is no SO_DONTROUTE to turn off return immediately 4473 * from send_dgram. This can allow tail-call optimizations. 4474 */ 4475 if (!dontroute) { 4476 return (sosend_dgram(so, name, namelen, uiop, flags)); 4477 } 4478 error = sosend_dgram(so, name, namelen, uiop, flags); 4479 } else { 4480 t_scalar_t prim; 4481 int sflag; 4482 4483 /* Ignore msg_name in the connected state */ 4484 if (flags & MSG_OOB) { 4485 prim = T_EXDATA_REQ; 4486 /* 4487 * Send down T_EXDATA_REQ even if there is flow 4488 * control for data. 4489 */ 4490 sflag = MSG_IGNFLOW; 4491 } else { 4492 if (so_mode & SM_BYTESTREAM) { 4493 /* Byte stream transport - use write */ 4494 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4495 4496 /* Send M_DATA messages */ 4497 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4498 (error = nl7c_data(so, uiop)) >= 0) { 4499 /* NL7C consumed the data */ 4500 return (error); 4501 } 4502 /* 4503 * If there is no SO_DONTROUTE to turn off, 4504 * sti_direct is on, and there is no flow 4505 * control, we can take the fast path. 4506 */ 4507 if (!dontroute && sti->sti_direct != 0 && 4508 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4509 return (sostream_direct(so, uiop, 4510 NULL, cr)); 4511 } 4512 error = strwrite(SOTOV(so), uiop, cr); 4513 goto done; 4514 } 4515 prim = T_DATA_REQ; 4516 sflag = 0; 4517 } 4518 /* 4519 * If there is no SO_DONTROUTE to turn off return immediately 4520 * from sosend_svc. This can allow tail-call optimizations. 4521 */ 4522 if (!dontroute) 4523 return (sosend_svc(so, uiop, prim, 4524 !(flags & MSG_EOR), sflag)); 4525 error = sosend_svc(so, uiop, prim, 4526 !(flags & MSG_EOR), sflag); 4527 } 4528 ASSERT(dontroute); 4529 done: 4530 if (dontroute) { 4531 uint32_t val; 4532 4533 val = 0; 4534 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4535 &val, (t_uscalar_t)sizeof (val), cr); 4536 } 4537 return (error); 4538 } 4539 4540 /* 4541 * kstrwritemp() has very similar semantics as that of strwrite(). 4542 * The main difference is it obtains mblks from the caller and also 4543 * does not do any copy as done in strwrite() from user buffers to 4544 * kernel buffers. 4545 * 4546 * Currently, this routine is used by sendfile to send data allocated 4547 * within the kernel without any copying. This interface does not use the 4548 * synchronous stream interface as synch. stream interface implies 4549 * copying. 4550 */ 4551 int 4552 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4553 { 4554 struct stdata *stp; 4555 struct queue *wqp; 4556 mblk_t *newmp; 4557 char waitflag; 4558 int tempmode; 4559 int error = 0; 4560 int done = 0; 4561 struct sonode *so; 4562 boolean_t direct; 4563 4564 ASSERT(vp->v_stream); 4565 stp = vp->v_stream; 4566 4567 so = VTOSO(vp); 4568 direct = _SOTOTPI(so)->sti_direct; 4569 4570 /* 4571 * This is the sockfs direct fast path. canputnext() need 4572 * not be accurate so we don't grab the sd_lock here. If 4573 * we get flow-controlled, we grab sd_lock just before the 4574 * do..while loop below to emulate what strwrite() does. 4575 */ 4576 wqp = stp->sd_wrq; 4577 if (canputnext(wqp) && direct && 4578 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4579 return (sostream_direct(so, NULL, mp, CRED())); 4580 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4581 /* Fast check of flags before acquiring the lock */ 4582 mutex_enter(&stp->sd_lock); 4583 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4584 mutex_exit(&stp->sd_lock); 4585 if (error != 0) { 4586 if (!(stp->sd_flag & STPLEX) && 4587 (stp->sd_wput_opt & SW_SIGPIPE)) { 4588 error = EPIPE; 4589 } 4590 return (error); 4591 } 4592 } 4593 4594 waitflag = WRITEWAIT; 4595 if (stp->sd_flag & OLDNDELAY) 4596 tempmode = fmode & ~FNDELAY; 4597 else 4598 tempmode = fmode; 4599 4600 mutex_enter(&stp->sd_lock); 4601 do { 4602 if (canputnext(wqp)) { 4603 mutex_exit(&stp->sd_lock); 4604 if (stp->sd_wputdatafunc != NULL) { 4605 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4606 NULL, NULL, NULL); 4607 if (newmp == NULL) { 4608 /* The caller will free mp */ 4609 return (ECOMM); 4610 } 4611 mp = newmp; 4612 } 4613 putnext(wqp, mp); 4614 return (0); 4615 } 4616 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4617 &done); 4618 } while (error == 0 && !done); 4619 4620 mutex_exit(&stp->sd_lock); 4621 /* 4622 * EAGAIN tells the application to try again. ENOMEM 4623 * is returned only if the memory allocation size 4624 * exceeds the physical limits of the system. ENOMEM 4625 * can't be true here. 4626 */ 4627 if (error == ENOMEM) 4628 error = EAGAIN; 4629 return (error); 4630 } 4631 4632 /* ARGSUSED */ 4633 static int 4634 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4635 struct cred *cr, mblk_t **mpp) 4636 { 4637 int error; 4638 4639 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4640 return (EAFNOSUPPORT); 4641 4642 if (so->so_state & SS_CANTSENDMORE) 4643 return (EPIPE); 4644 4645 if (so->so_type != SOCK_STREAM) 4646 return (EOPNOTSUPP); 4647 4648 if ((so->so_state & SS_ISCONNECTED) == 0) 4649 return (ENOTCONN); 4650 4651 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4652 if (error == 0) 4653 *mpp = NULL; 4654 return (error); 4655 } 4656 4657 /* 4658 * Sending data on a datagram socket. 4659 * Assumes caller has verified that SS_ISBOUND etc. are set. 4660 */ 4661 /* ARGSUSED */ 4662 static int 4663 sodgram_direct(struct sonode *so, struct sockaddr *name, 4664 socklen_t namelen, struct uio *uiop, int flags) 4665 { 4666 struct T_unitdata_req tudr; 4667 mblk_t *mp = NULL; 4668 int error = 0; 4669 void *addr; 4670 socklen_t addrlen; 4671 ssize_t len; 4672 struct stdata *stp = SOTOV(so)->v_stream; 4673 int so_state; 4674 queue_t *udp_wq; 4675 boolean_t connected; 4676 mblk_t *mpdata = NULL; 4677 sotpi_info_t *sti = SOTOTPI(so); 4678 4679 ASSERT(name != NULL && namelen != 0); 4680 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4681 ASSERT(!(so->so_mode & SM_EXDATA)); 4682 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4683 ASSERT(SOTOV(so)->v_type == VSOCK); 4684 4685 /* Caller checked for proper length */ 4686 len = uiop->uio_resid; 4687 ASSERT(len <= sti->sti_tidu_size); 4688 4689 /* Length and family checks have been done by caller */ 4690 ASSERT(name->sa_family == so->so_family); 4691 ASSERT(so->so_family == AF_INET || 4692 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4693 ASSERT(so->so_family == AF_INET6 || 4694 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4695 4696 addr = name; 4697 addrlen = namelen; 4698 4699 if (stp->sd_sidp != NULL && 4700 (error = straccess(stp, JCWRITE)) != 0) 4701 goto done; 4702 4703 so_state = so->so_state; 4704 4705 connected = so_state & SS_ISCONNECTED; 4706 if (!connected) { 4707 tudr.PRIM_type = T_UNITDATA_REQ; 4708 tudr.DEST_length = addrlen; 4709 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4710 tudr.OPT_length = 0; 4711 tudr.OPT_offset = 0; 4712 4713 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4714 _ALLOC_INTR); 4715 if (mp == NULL) { 4716 /* 4717 * Caught a signal waiting for memory. 4718 * Let send* return EINTR. 4719 */ 4720 error = EINTR; 4721 goto done; 4722 } 4723 } 4724 4725 /* 4726 * For UDP we don't break up the copyin into smaller pieces 4727 * as in the TCP case. That means if ENOMEM is returned by 4728 * mcopyinuio() then the uio vector has not been modified at 4729 * all and we fallback to either strwrite() or kstrputmsg() 4730 * below. Note also that we never generate priority messages 4731 * from here. 4732 */ 4733 udp_wq = stp->sd_wrq->q_next; 4734 if (canput(udp_wq) && 4735 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4736 ASSERT(DB_TYPE(mpdata) == M_DATA); 4737 ASSERT(uiop->uio_resid == 0); 4738 if (!connected) 4739 linkb(mp, mpdata); 4740 else 4741 mp = mpdata; 4742 if (audit_active) 4743 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4744 4745 udp_wput(udp_wq, mp); 4746 return (0); 4747 } 4748 4749 ASSERT(mpdata == NULL); 4750 if (error != 0 && error != ENOMEM) { 4751 freemsg(mp); 4752 return (error); 4753 } 4754 4755 /* 4756 * For connected, let strwrite() handle the blocking case. 4757 * Otherwise we fall thru and use kstrputmsg(). 4758 */ 4759 if (connected) 4760 return (strwrite(SOTOV(so), uiop, CRED())); 4761 4762 if (audit_active) 4763 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4764 4765 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4766 done: 4767 #ifdef SOCK_DEBUG 4768 if (error != 0) { 4769 eprintsoline(so, error); 4770 } 4771 #endif /* SOCK_DEBUG */ 4772 return (error); 4773 } 4774 4775 int 4776 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4777 { 4778 struct stdata *stp = SOTOV(so)->v_stream; 4779 ssize_t iosize, rmax, maxblk; 4780 queue_t *tcp_wq = stp->sd_wrq->q_next; 4781 mblk_t *newmp; 4782 int error = 0, wflag = 0; 4783 4784 ASSERT(so->so_mode & SM_BYTESTREAM); 4785 ASSERT(SOTOV(so)->v_type == VSOCK); 4786 4787 if (stp->sd_sidp != NULL && 4788 (error = straccess(stp, JCWRITE)) != 0) 4789 return (error); 4790 4791 if (uiop == NULL) { 4792 /* 4793 * kstrwritemp() should have checked sd_flag and 4794 * flow-control before coming here. If we end up 4795 * here it means that we can simply pass down the 4796 * data to tcp. 4797 */ 4798 ASSERT(mp != NULL); 4799 if (stp->sd_wputdatafunc != NULL) { 4800 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4801 NULL, NULL, NULL); 4802 if (newmp == NULL) { 4803 /* The caller will free mp */ 4804 return (ECOMM); 4805 } 4806 mp = newmp; 4807 } 4808 tcp_wput(tcp_wq, mp); 4809 return (0); 4810 } 4811 4812 /* Fallback to strwrite() to do proper error handling */ 4813 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4814 return (strwrite(SOTOV(so), uiop, cr)); 4815 4816 rmax = stp->sd_qn_maxpsz; 4817 ASSERT(rmax >= 0 || rmax == INFPSZ); 4818 if (rmax == 0 || uiop->uio_resid <= 0) 4819 return (0); 4820 4821 if (rmax == INFPSZ) 4822 rmax = uiop->uio_resid; 4823 4824 maxblk = stp->sd_maxblk; 4825 4826 for (;;) { 4827 iosize = MIN(uiop->uio_resid, rmax); 4828 4829 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4830 if (mp == NULL) { 4831 /* 4832 * Fallback to strwrite() for ENOMEM; if this 4833 * is our first time in this routine and the uio 4834 * vector has not been modified, we will end up 4835 * calling strwrite() without any flag set. 4836 */ 4837 if (error == ENOMEM) 4838 goto slow_send; 4839 else 4840 return (error); 4841 } 4842 ASSERT(uiop->uio_resid >= 0); 4843 /* 4844 * If mp is non-NULL and ENOMEM is set, it means that 4845 * mcopyinuio() was able to break down some of the user 4846 * data into one or more mblks. Send the partial data 4847 * to tcp and let the rest be handled in strwrite(). 4848 */ 4849 ASSERT(error == 0 || error == ENOMEM); 4850 if (stp->sd_wputdatafunc != NULL) { 4851 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4852 NULL, NULL, NULL); 4853 if (newmp == NULL) { 4854 /* The caller will free mp */ 4855 return (ECOMM); 4856 } 4857 mp = newmp; 4858 } 4859 tcp_wput(tcp_wq, mp); 4860 4861 wflag |= NOINTR; 4862 4863 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4864 ASSERT(error == 0); 4865 break; 4866 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4867 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4868 slow_send: 4869 /* 4870 * We were able to send down partial data using 4871 * the direct call interface, but are now relying 4872 * on strwrite() to handle the non-fastpath cases. 4873 * If the socket is blocking we will sleep in 4874 * strwaitq() until write is permitted, otherwise, 4875 * we will need to return the amount of bytes 4876 * written so far back to the app. This is the 4877 * reason why we pass NOINTR flag to strwrite() 4878 * for non-blocking socket, because we don't want 4879 * to return EAGAIN when portion of the user data 4880 * has actually been sent down. 4881 */ 4882 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4883 } 4884 } 4885 return (0); 4886 } 4887 4888 /* 4889 * Update sti_faddr by asking the transport (unless AF_UNIX). 4890 */ 4891 /* ARGSUSED */ 4892 int 4893 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4894 boolean_t accept, struct cred *cr) 4895 { 4896 struct strbuf strbuf; 4897 int error = 0, res; 4898 void *addr; 4899 t_uscalar_t addrlen; 4900 k_sigset_t smask; 4901 sotpi_info_t *sti = SOTOTPI(so); 4902 4903 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4904 (void *)so, pr_state(so->so_state, so->so_mode))); 4905 4906 ASSERT(*namelen > 0); 4907 mutex_enter(&so->so_lock); 4908 so_lock_single(so); /* Set SOLOCKED */ 4909 4910 if (accept) { 4911 bcopy(sti->sti_faddr_sa, name, 4912 MIN(*namelen, sti->sti_faddr_len)); 4913 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4914 goto done; 4915 } 4916 4917 if (!(so->so_state & SS_ISCONNECTED)) { 4918 error = ENOTCONN; 4919 goto done; 4920 } 4921 /* Added this check for X/Open */ 4922 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4923 error = EINVAL; 4924 if (xnet_check_print) { 4925 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4926 } 4927 goto done; 4928 } 4929 4930 if (sti->sti_faddr_valid) { 4931 bcopy(sti->sti_faddr_sa, name, 4932 MIN(*namelen, sti->sti_faddr_len)); 4933 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4934 goto done; 4935 } 4936 4937 #ifdef DEBUG 4938 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4939 pr_addr(so->so_family, sti->sti_faddr_sa, 4940 (t_uscalar_t)sti->sti_faddr_len))); 4941 #endif /* DEBUG */ 4942 4943 if (so->so_family == AF_UNIX) { 4944 /* Transport has different name space - return local info */ 4945 if (sti->sti_faddr_noxlate) 4946 *namelen = 0; 4947 error = 0; 4948 goto done; 4949 } 4950 4951 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4952 4953 ASSERT(sti->sti_faddr_sa); 4954 /* Allocate local buffer to use with ioctl */ 4955 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4956 mutex_exit(&so->so_lock); 4957 addr = kmem_alloc(addrlen, KM_SLEEP); 4958 4959 /* 4960 * Issue TI_GETPEERNAME with signals masked. 4961 * Put the result in sti_faddr_sa so that getpeername works after 4962 * a shutdown(output). 4963 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4964 * back to the socket. 4965 */ 4966 strbuf.buf = addr; 4967 strbuf.maxlen = addrlen; 4968 strbuf.len = 0; 4969 4970 sigintr(&smask, 0); 4971 res = 0; 4972 ASSERT(cr); 4973 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4974 0, K_TO_K, cr, &res); 4975 sigunintr(&smask); 4976 4977 mutex_enter(&so->so_lock); 4978 /* 4979 * If there is an error record the error in so_error put don't fail 4980 * the getpeername. Instead fallback on the recorded 4981 * sti->sti_faddr_sa. 4982 */ 4983 if (error) { 4984 /* 4985 * Various stream head errors can be returned to the ioctl. 4986 * However, it is impossible to determine which ones of 4987 * these are really socket level errors that were incorrectly 4988 * consumed by the ioctl. Thus this code silently ignores the 4989 * error - to code explicitly does not reinstate the error 4990 * using soseterror(). 4991 * Experiments have shows that at least this set of 4992 * errors are reported and should not be reinstated on the 4993 * socket: 4994 * EINVAL E.g. if an I_LINK was in effect when 4995 * getpeername was called. 4996 * EPIPE The ioctl error semantics prefer the write 4997 * side error over the read side error. 4998 * ENOTCONN The transport just got disconnected but 4999 * sockfs had not yet seen the T_DISCON_IND 5000 * when issuing the ioctl. 5001 */ 5002 error = 0; 5003 } else if (res == 0 && strbuf.len > 0 && 5004 (so->so_state & SS_ISCONNECTED)) { 5005 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 5006 sti->sti_faddr_len = (socklen_t)strbuf.len; 5007 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 5008 sti->sti_faddr_valid = 1; 5009 5010 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 5011 *namelen = sti->sti_faddr_len; 5012 } 5013 kmem_free(addr, addrlen); 5014 #ifdef DEBUG 5015 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 5016 pr_addr(so->so_family, sti->sti_faddr_sa, 5017 (t_uscalar_t)sti->sti_faddr_len))); 5018 #endif /* DEBUG */ 5019 done: 5020 so_unlock_single(so, SOLOCKED); 5021 mutex_exit(&so->so_lock); 5022 return (error); 5023 } 5024 5025 /* 5026 * Update sti_laddr by asking the transport (unless AF_UNIX). 5027 */ 5028 int 5029 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5030 struct cred *cr) 5031 { 5032 struct strbuf strbuf; 5033 int error = 0, res; 5034 void *addr; 5035 t_uscalar_t addrlen; 5036 k_sigset_t smask; 5037 sotpi_info_t *sti = SOTOTPI(so); 5038 5039 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5040 (void *)so, pr_state(so->so_state, so->so_mode))); 5041 5042 ASSERT(*namelen > 0); 5043 mutex_enter(&so->so_lock); 5044 so_lock_single(so); /* Set SOLOCKED */ 5045 5046 #ifdef DEBUG 5047 5048 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5049 pr_addr(so->so_family, sti->sti_laddr_sa, 5050 (t_uscalar_t)sti->sti_laddr_len))); 5051 #endif /* DEBUG */ 5052 if (sti->sti_laddr_valid) { 5053 bcopy(sti->sti_laddr_sa, name, 5054 MIN(*namelen, sti->sti_laddr_len)); 5055 *namelen = sti->sti_laddr_len; 5056 goto done; 5057 } 5058 5059 if (so->so_family == AF_UNIX) { 5060 /* Transport has different name space - return local info */ 5061 error = 0; 5062 *namelen = 0; 5063 goto done; 5064 } 5065 if (!(so->so_state & SS_ISBOUND)) { 5066 /* If not bound, then nothing to return. */ 5067 error = 0; 5068 goto done; 5069 } 5070 5071 /* Allocate local buffer to use with ioctl */ 5072 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5073 mutex_exit(&so->so_lock); 5074 addr = kmem_alloc(addrlen, KM_SLEEP); 5075 5076 /* 5077 * Issue TI_GETMYNAME with signals masked. 5078 * Put the result in sti_laddr_sa so that getsockname works after 5079 * a shutdown(output). 5080 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5081 * back to the socket. 5082 */ 5083 strbuf.buf = addr; 5084 strbuf.maxlen = addrlen; 5085 strbuf.len = 0; 5086 5087 sigintr(&smask, 0); 5088 res = 0; 5089 ASSERT(cr); 5090 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5091 0, K_TO_K, cr, &res); 5092 sigunintr(&smask); 5093 5094 mutex_enter(&so->so_lock); 5095 /* 5096 * If there is an error record the error in so_error put don't fail 5097 * the getsockname. Instead fallback on the recorded 5098 * sti->sti_laddr_sa. 5099 */ 5100 if (error) { 5101 /* 5102 * Various stream head errors can be returned to the ioctl. 5103 * However, it is impossible to determine which ones of 5104 * these are really socket level errors that were incorrectly 5105 * consumed by the ioctl. Thus this code silently ignores the 5106 * error - to code explicitly does not reinstate the error 5107 * using soseterror(). 5108 * Experiments have shows that at least this set of 5109 * errors are reported and should not be reinstated on the 5110 * socket: 5111 * EINVAL E.g. if an I_LINK was in effect when 5112 * getsockname was called. 5113 * EPIPE The ioctl error semantics prefer the write 5114 * side error over the read side error. 5115 */ 5116 error = 0; 5117 } else if (res == 0 && strbuf.len > 0 && 5118 (so->so_state & SS_ISBOUND)) { 5119 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5120 sti->sti_laddr_len = (socklen_t)strbuf.len; 5121 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5122 sti->sti_laddr_valid = 1; 5123 5124 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5125 *namelen = sti->sti_laddr_len; 5126 } 5127 kmem_free(addr, addrlen); 5128 #ifdef DEBUG 5129 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5130 pr_addr(so->so_family, sti->sti_laddr_sa, 5131 (t_uscalar_t)sti->sti_laddr_len))); 5132 #endif /* DEBUG */ 5133 done: 5134 so_unlock_single(so, SOLOCKED); 5135 mutex_exit(&so->so_lock); 5136 return (error); 5137 } 5138 5139 /* 5140 * Get socket options. For SOL_SOCKET options some options are handled 5141 * by the sockfs while others use the value recorded in the sonode as a 5142 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5143 * 5144 * On the return most *optlenp bytes are copied to optval. 5145 */ 5146 /* ARGSUSED */ 5147 int 5148 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5149 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5150 { 5151 struct T_optmgmt_req optmgmt_req; 5152 struct T_optmgmt_ack *optmgmt_ack; 5153 struct opthdr oh; 5154 struct opthdr *opt_res; 5155 mblk_t *mp = NULL; 5156 int error = 0; 5157 void *option = NULL; /* Set if fallback value */ 5158 t_uscalar_t maxlen = *optlenp; 5159 t_uscalar_t len; 5160 uint32_t value; 5161 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5162 struct timeval32 tmo_val32; 5163 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5164 5165 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5166 (void *)so, level, option_name, optval, (void *)optlenp, 5167 pr_state(so->so_state, so->so_mode))); 5168 5169 mutex_enter(&so->so_lock); 5170 so_lock_single(so); /* Set SOLOCKED */ 5171 5172 /* 5173 * Check for SOL_SOCKET options. 5174 * Certain SOL_SOCKET options are returned directly whereas 5175 * others only provide a default (fallback) value should 5176 * the T_SVR4_OPTMGMT_REQ fail. 5177 */ 5178 if (level == SOL_SOCKET) { 5179 /* Check parameters */ 5180 switch (option_name) { 5181 case SO_TYPE: 5182 case SO_ERROR: 5183 case SO_DEBUG: 5184 case SO_ACCEPTCONN: 5185 case SO_REUSEADDR: 5186 case SO_KEEPALIVE: 5187 case SO_DONTROUTE: 5188 case SO_BROADCAST: 5189 case SO_USELOOPBACK: 5190 case SO_OOBINLINE: 5191 case SO_SNDBUF: 5192 case SO_RCVBUF: 5193 #ifdef notyet 5194 case SO_SNDLOWAT: 5195 case SO_RCVLOWAT: 5196 #endif /* notyet */ 5197 case SO_DOMAIN: 5198 case SO_DGRAM_ERRIND: 5199 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5200 error = EINVAL; 5201 eprintsoline(so, error); 5202 goto done2; 5203 } 5204 break; 5205 case SO_RCVTIMEO: 5206 case SO_SNDTIMEO: 5207 if (get_udatamodel() == DATAMODEL_NATIVE) { 5208 if (maxlen < sizeof (struct timeval)) { 5209 error = EINVAL; 5210 eprintsoline(so, error); 5211 goto done2; 5212 } 5213 } else { 5214 if (maxlen < sizeof (struct timeval32)) { 5215 error = EINVAL; 5216 eprintsoline(so, error); 5217 goto done2; 5218 } 5219 5220 } 5221 break; 5222 case SO_LINGER: 5223 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5224 error = EINVAL; 5225 eprintsoline(so, error); 5226 goto done2; 5227 } 5228 break; 5229 case SO_SND_BUFINFO: 5230 if (maxlen < (t_uscalar_t) 5231 sizeof (struct so_snd_bufinfo)) { 5232 error = EINVAL; 5233 eprintsoline(so, error); 5234 goto done2; 5235 } 5236 break; 5237 } 5238 5239 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5240 5241 switch (option_name) { 5242 case SO_TYPE: 5243 value = so->so_type; 5244 option = &value; 5245 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5246 5247 case SO_ERROR: 5248 value = sogeterr(so, B_TRUE); 5249 option = &value; 5250 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5251 5252 case SO_ACCEPTCONN: 5253 if (so->so_state & SS_ACCEPTCONN) 5254 value = SO_ACCEPTCONN; 5255 else 5256 value = 0; 5257 #ifdef DEBUG 5258 if (value) { 5259 dprintso(so, 1, 5260 ("sotpi_getsockopt: 0x%x is set\n", 5261 option_name)); 5262 } else { 5263 dprintso(so, 1, 5264 ("sotpi_getsockopt: 0x%x not set\n", 5265 option_name)); 5266 } 5267 #endif /* DEBUG */ 5268 option = &value; 5269 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5270 5271 case SO_DEBUG: 5272 case SO_REUSEADDR: 5273 case SO_KEEPALIVE: 5274 case SO_DONTROUTE: 5275 case SO_BROADCAST: 5276 case SO_USELOOPBACK: 5277 case SO_OOBINLINE: 5278 case SO_DGRAM_ERRIND: 5279 value = (so->so_options & option_name); 5280 #ifdef DEBUG 5281 if (value) { 5282 dprintso(so, 1, 5283 ("sotpi_getsockopt: 0x%x is set\n", 5284 option_name)); 5285 } else { 5286 dprintso(so, 1, 5287 ("sotpi_getsockopt: 0x%x not set\n", 5288 option_name)); 5289 } 5290 #endif /* DEBUG */ 5291 option = &value; 5292 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5293 5294 /* 5295 * The following options are only returned by sockfs when the 5296 * T_SVR4_OPTMGMT_REQ fails. 5297 */ 5298 case SO_LINGER: 5299 option = &so->so_linger; 5300 len = (t_uscalar_t)sizeof (struct linger); 5301 break; 5302 case SO_SNDBUF: { 5303 ssize_t lvalue; 5304 5305 /* 5306 * If the option has not been set then get a default 5307 * value from the read queue. This value is 5308 * returned if the transport fails 5309 * the T_SVR4_OPTMGMT_REQ. 5310 */ 5311 lvalue = so->so_sndbuf; 5312 if (lvalue == 0) { 5313 mutex_exit(&so->so_lock); 5314 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5315 QHIWAT, 0, &lvalue); 5316 mutex_enter(&so->so_lock); 5317 dprintso(so, 1, 5318 ("got SO_SNDBUF %ld from q\n", lvalue)); 5319 } 5320 value = (int)lvalue; 5321 option = &value; 5322 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5323 break; 5324 } 5325 case SO_RCVBUF: { 5326 ssize_t lvalue; 5327 5328 /* 5329 * If the option has not been set then get a default 5330 * value from the read queue. This value is 5331 * returned if the transport fails 5332 * the T_SVR4_OPTMGMT_REQ. 5333 * 5334 * XXX If SO_RCVBUF has been set and this is an 5335 * XPG 4.2 application then do not ask the transport 5336 * since the transport might adjust the value and not 5337 * return exactly what was set by the application. 5338 * For non-XPG 4.2 application we return the value 5339 * that the transport is actually using. 5340 */ 5341 lvalue = so->so_rcvbuf; 5342 if (lvalue == 0) { 5343 mutex_exit(&so->so_lock); 5344 (void) strqget(RD(strvp2wq(SOTOV(so))), 5345 QHIWAT, 0, &lvalue); 5346 mutex_enter(&so->so_lock); 5347 dprintso(so, 1, 5348 ("got SO_RCVBUF %ld from q\n", lvalue)); 5349 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5350 value = (int)lvalue; 5351 option = &value; 5352 goto copyout; /* skip asking transport */ 5353 } 5354 value = (int)lvalue; 5355 option = &value; 5356 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5357 break; 5358 } 5359 case SO_DOMAIN: 5360 value = so->so_family; 5361 option = &value; 5362 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5363 5364 #ifdef notyet 5365 /* 5366 * We do not implement the semantics of these options 5367 * thus we shouldn't implement the options either. 5368 */ 5369 case SO_SNDLOWAT: 5370 value = so->so_sndlowat; 5371 option = &value; 5372 break; 5373 case SO_RCVLOWAT: 5374 value = so->so_rcvlowat; 5375 option = &value; 5376 break; 5377 #endif /* notyet */ 5378 case SO_SNDTIMEO: 5379 case SO_RCVTIMEO: { 5380 clock_t val; 5381 5382 if (option_name == SO_RCVTIMEO) 5383 val = drv_hztousec(so->so_rcvtimeo); 5384 else 5385 val = drv_hztousec(so->so_sndtimeo); 5386 tmo_val.tv_sec = val / (1000 * 1000); 5387 tmo_val.tv_usec = val % (1000 * 1000); 5388 if (get_udatamodel() == DATAMODEL_NATIVE) { 5389 option = &tmo_val; 5390 len = sizeof (struct timeval); 5391 } else { 5392 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5393 option = &tmo_val32; 5394 len = sizeof (struct timeval32); 5395 } 5396 break; 5397 } 5398 case SO_SND_BUFINFO: { 5399 snd_bufinfo.sbi_wroff = 5400 (so->so_proto_props).sopp_wroff; 5401 snd_bufinfo.sbi_maxblk = 5402 (so->so_proto_props).sopp_maxblk; 5403 snd_bufinfo.sbi_maxpsz = 5404 (so->so_proto_props).sopp_maxpsz; 5405 snd_bufinfo.sbi_tail = 5406 (so->so_proto_props).sopp_tail; 5407 option = &snd_bufinfo; 5408 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5409 break; 5410 } 5411 } 5412 } 5413 5414 mutex_exit(&so->so_lock); 5415 5416 /* Send request */ 5417 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5418 optmgmt_req.MGMT_flags = T_CHECK; 5419 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5420 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5421 5422 oh.level = level; 5423 oh.name = option_name; 5424 oh.len = maxlen; 5425 5426 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5427 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 5428 /* Let option management work in the presence of data flow control */ 5429 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5430 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5431 mp = NULL; 5432 mutex_enter(&so->so_lock); 5433 if (error) { 5434 eprintsoline(so, error); 5435 goto done2; 5436 } 5437 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5438 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5439 if (error) { 5440 if (option != NULL) { 5441 /* We have a fallback value */ 5442 error = 0; 5443 goto copyout; 5444 } 5445 eprintsoline(so, error); 5446 goto done2; 5447 } 5448 ASSERT(mp); 5449 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5450 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5451 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5452 if (opt_res == NULL) { 5453 if (option != NULL) { 5454 /* We have a fallback value */ 5455 error = 0; 5456 goto copyout; 5457 } 5458 error = EPROTO; 5459 eprintsoline(so, error); 5460 goto done; 5461 } 5462 option = &opt_res[1]; 5463 5464 /* check to ensure that the option is within bounds */ 5465 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5466 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5467 if (option != NULL) { 5468 /* We have a fallback value */ 5469 error = 0; 5470 goto copyout; 5471 } 5472 error = EPROTO; 5473 eprintsoline(so, error); 5474 goto done; 5475 } 5476 5477 len = opt_res->len; 5478 5479 copyout: { 5480 t_uscalar_t size = MIN(len, maxlen); 5481 bcopy(option, optval, size); 5482 bcopy(&size, optlenp, sizeof (size)); 5483 } 5484 done: 5485 freemsg(mp); 5486 done2: 5487 so_unlock_single(so, SOLOCKED); 5488 mutex_exit(&so->so_lock); 5489 5490 return (error); 5491 } 5492 5493 /* 5494 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5495 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5496 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5497 * setsockopt has to work even if the transport does not support the option. 5498 */ 5499 /* ARGSUSED */ 5500 int 5501 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5502 const void *optval, t_uscalar_t optlen, struct cred *cr) 5503 { 5504 struct T_optmgmt_req optmgmt_req; 5505 struct opthdr oh; 5506 mblk_t *mp; 5507 int error = 0; 5508 boolean_t handled = B_FALSE; 5509 5510 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5511 (void *)so, level, option_name, optval, optlen, 5512 pr_state(so->so_state, so->so_mode))); 5513 5514 /* X/Open requires this check */ 5515 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5516 if (xnet_check_print) 5517 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5518 return (EINVAL); 5519 } 5520 5521 mutex_enter(&so->so_lock); 5522 so_lock_single(so); /* Set SOLOCKED */ 5523 mutex_exit(&so->so_lock); 5524 5525 /* 5526 * For SOCKET or TCP level options, try to set it here itself 5527 * provided socket has not been popped and we know the tcp 5528 * structure (stored in so_priv). 5529 */ 5530 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5531 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5532 (so->so_version == SOV_SOCKSTREAM) && 5533 (so->so_proto_handle != NULL)) { 5534 tcp_t *tcp = (tcp_t *)so->so_proto_handle; 5535 boolean_t onoff; 5536 5537 #define intvalue (*(int32_t *)optval) 5538 5539 switch (level) { 5540 case SOL_SOCKET: 5541 switch (option_name) { /* Check length param */ 5542 case SO_DEBUG: 5543 case SO_REUSEADDR: 5544 case SO_DONTROUTE: 5545 case SO_BROADCAST: 5546 case SO_USELOOPBACK: 5547 case SO_OOBINLINE: 5548 case SO_DGRAM_ERRIND: 5549 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5550 error = EINVAL; 5551 eprintsoline(so, error); 5552 mutex_enter(&so->so_lock); 5553 goto done2; 5554 } 5555 ASSERT(optval); 5556 onoff = intvalue != 0; 5557 handled = B_TRUE; 5558 break; 5559 case SO_SNDTIMEO: 5560 case SO_RCVTIMEO: 5561 if (get_udatamodel() == DATAMODEL_NATIVE) { 5562 if (optlen != 5563 sizeof (struct timeval)) { 5564 error = EINVAL; 5565 eprintsoline(so, error); 5566 mutex_enter(&so->so_lock); 5567 goto done2; 5568 } 5569 } else { 5570 if (optlen != 5571 sizeof (struct timeval32)) { 5572 error = EINVAL; 5573 eprintsoline(so, error); 5574 mutex_enter(&so->so_lock); 5575 goto done2; 5576 } 5577 } 5578 ASSERT(optval); 5579 handled = B_TRUE; 5580 break; 5581 case SO_LINGER: 5582 if (optlen != 5583 (t_uscalar_t)sizeof (struct linger)) { 5584 error = EINVAL; 5585 eprintsoline(so, error); 5586 mutex_enter(&so->so_lock); 5587 goto done2; 5588 } 5589 ASSERT(optval); 5590 handled = B_TRUE; 5591 break; 5592 } 5593 5594 switch (option_name) { /* Do actions */ 5595 case SO_LINGER: { 5596 struct linger *lgr = (struct linger *)optval; 5597 5598 if (lgr->l_onoff) { 5599 tcp->tcp_linger = 1; 5600 tcp->tcp_lingertime = lgr->l_linger; 5601 so->so_linger.l_onoff = SO_LINGER; 5602 so->so_options |= SO_LINGER; 5603 } else { 5604 tcp->tcp_linger = 0; 5605 tcp->tcp_lingertime = 0; 5606 so->so_linger.l_onoff = 0; 5607 so->so_options &= ~SO_LINGER; 5608 } 5609 so->so_linger.l_linger = lgr->l_linger; 5610 handled = B_TRUE; 5611 break; 5612 } 5613 case SO_SNDTIMEO: 5614 case SO_RCVTIMEO: { 5615 struct timeval tl; 5616 clock_t val; 5617 5618 if (get_udatamodel() == DATAMODEL_NATIVE) 5619 bcopy(&tl, (struct timeval *)optval, 5620 sizeof (struct timeval)); 5621 else 5622 TIMEVAL32_TO_TIMEVAL(&tl, 5623 (struct timeval32 *)optval); 5624 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5625 if (option_name == SO_RCVTIMEO) 5626 so->so_rcvtimeo = drv_usectohz(val); 5627 else 5628 so->so_sndtimeo = drv_usectohz(val); 5629 break; 5630 } 5631 5632 case SO_DEBUG: 5633 tcp->tcp_debug = onoff; 5634 #ifdef SOCK_TEST 5635 if (intvalue & 2) 5636 sock_test_timelimit = 10 * hz; 5637 else 5638 sock_test_timelimit = 0; 5639 5640 if (intvalue & 4) 5641 do_useracc = 0; 5642 else 5643 do_useracc = 1; 5644 #endif /* SOCK_TEST */ 5645 break; 5646 case SO_DONTROUTE: 5647 /* 5648 * SO_DONTROUTE, SO_USELOOPBACK and 5649 * SO_BROADCAST are only of interest to IP. 5650 * We track them here only so 5651 * that we can report their current value. 5652 */ 5653 tcp->tcp_dontroute = onoff; 5654 if (onoff) 5655 so->so_options |= option_name; 5656 else 5657 so->so_options &= ~option_name; 5658 break; 5659 case SO_USELOOPBACK: 5660 tcp->tcp_useloopback = onoff; 5661 if (onoff) 5662 so->so_options |= option_name; 5663 else 5664 so->so_options &= ~option_name; 5665 break; 5666 case SO_BROADCAST: 5667 tcp->tcp_broadcast = onoff; 5668 if (onoff) 5669 so->so_options |= option_name; 5670 else 5671 so->so_options &= ~option_name; 5672 break; 5673 case SO_REUSEADDR: 5674 tcp->tcp_reuseaddr = onoff; 5675 if (onoff) 5676 so->so_options |= option_name; 5677 else 5678 so->so_options &= ~option_name; 5679 break; 5680 case SO_OOBINLINE: 5681 tcp->tcp_oobinline = onoff; 5682 if (onoff) 5683 so->so_options |= option_name; 5684 else 5685 so->so_options &= ~option_name; 5686 break; 5687 case SO_DGRAM_ERRIND: 5688 tcp->tcp_dgram_errind = onoff; 5689 if (onoff) 5690 so->so_options |= option_name; 5691 else 5692 so->so_options &= ~option_name; 5693 break; 5694 } 5695 break; 5696 case IPPROTO_TCP: 5697 switch (option_name) { 5698 case TCP_NODELAY: 5699 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5700 error = EINVAL; 5701 eprintsoline(so, error); 5702 mutex_enter(&so->so_lock); 5703 goto done2; 5704 } 5705 ASSERT(optval); 5706 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5707 handled = B_TRUE; 5708 break; 5709 } 5710 break; 5711 default: 5712 handled = B_FALSE; 5713 break; 5714 } 5715 } 5716 5717 if (handled) { 5718 mutex_enter(&so->so_lock); 5719 goto done2; 5720 } 5721 5722 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5723 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5724 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5725 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5726 5727 oh.level = level; 5728 oh.name = option_name; 5729 oh.len = optlen; 5730 5731 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5732 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5733 /* Let option management work in the presence of data flow control */ 5734 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5735 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5736 mp = NULL; 5737 mutex_enter(&so->so_lock); 5738 if (error) { 5739 eprintsoline(so, error); 5740 goto done2; 5741 } 5742 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5743 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5744 if (error) { 5745 eprintsoline(so, error); 5746 goto done; 5747 } 5748 ASSERT(mp); 5749 /* No need to verify T_optmgmt_ack */ 5750 freemsg(mp); 5751 done: 5752 /* 5753 * Check for SOL_SOCKET options and record their values. 5754 * If we know about a SOL_SOCKET parameter and the transport 5755 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5756 * EPROTO) we let the setsockopt succeed. 5757 */ 5758 if (level == SOL_SOCKET) { 5759 /* Check parameters */ 5760 switch (option_name) { 5761 case SO_DEBUG: 5762 case SO_REUSEADDR: 5763 case SO_KEEPALIVE: 5764 case SO_DONTROUTE: 5765 case SO_BROADCAST: 5766 case SO_USELOOPBACK: 5767 case SO_OOBINLINE: 5768 case SO_SNDBUF: 5769 case SO_RCVBUF: 5770 #ifdef notyet 5771 case SO_SNDLOWAT: 5772 case SO_RCVLOWAT: 5773 #endif /* notyet */ 5774 case SO_DGRAM_ERRIND: 5775 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5776 error = EINVAL; 5777 eprintsoline(so, error); 5778 goto done2; 5779 } 5780 ASSERT(optval); 5781 handled = B_TRUE; 5782 break; 5783 case SO_SNDTIMEO: 5784 case SO_RCVTIMEO: 5785 if (get_udatamodel() == DATAMODEL_NATIVE) { 5786 if (optlen != sizeof (struct timeval)) { 5787 error = EINVAL; 5788 eprintsoline(so, error); 5789 goto done2; 5790 } 5791 } else { 5792 if (optlen != sizeof (struct timeval32)) { 5793 error = EINVAL; 5794 eprintsoline(so, error); 5795 goto done2; 5796 } 5797 } 5798 ASSERT(optval); 5799 handled = B_TRUE; 5800 break; 5801 case SO_LINGER: 5802 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5803 error = EINVAL; 5804 eprintsoline(so, error); 5805 goto done2; 5806 } 5807 ASSERT(optval); 5808 handled = B_TRUE; 5809 break; 5810 } 5811 5812 #define intvalue (*(int32_t *)optval) 5813 5814 switch (option_name) { 5815 case SO_TYPE: 5816 case SO_ERROR: 5817 case SO_ACCEPTCONN: 5818 /* Can't be set */ 5819 error = ENOPROTOOPT; 5820 goto done2; 5821 case SO_LINGER: { 5822 struct linger *l = (struct linger *)optval; 5823 5824 so->so_linger.l_linger = l->l_linger; 5825 if (l->l_onoff) { 5826 so->so_linger.l_onoff = SO_LINGER; 5827 so->so_options |= SO_LINGER; 5828 } else { 5829 so->so_linger.l_onoff = 0; 5830 so->so_options &= ~SO_LINGER; 5831 } 5832 break; 5833 } 5834 5835 case SO_DEBUG: 5836 #ifdef SOCK_TEST 5837 if (intvalue & 2) 5838 sock_test_timelimit = 10 * hz; 5839 else 5840 sock_test_timelimit = 0; 5841 5842 if (intvalue & 4) 5843 do_useracc = 0; 5844 else 5845 do_useracc = 1; 5846 #endif /* SOCK_TEST */ 5847 /* FALLTHRU */ 5848 case SO_REUSEADDR: 5849 case SO_KEEPALIVE: 5850 case SO_DONTROUTE: 5851 case SO_BROADCAST: 5852 case SO_USELOOPBACK: 5853 case SO_OOBINLINE: 5854 case SO_DGRAM_ERRIND: 5855 if (intvalue != 0) { 5856 dprintso(so, 1, 5857 ("socket_setsockopt: setting 0x%x\n", 5858 option_name)); 5859 so->so_options |= option_name; 5860 } else { 5861 dprintso(so, 1, 5862 ("socket_setsockopt: clearing 0x%x\n", 5863 option_name)); 5864 so->so_options &= ~option_name; 5865 } 5866 break; 5867 /* 5868 * The following options are only returned by us when the 5869 * transport layer fails. 5870 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5871 * since the transport might adjust the value and not 5872 * return exactly what was set by the application. 5873 */ 5874 case SO_SNDBUF: 5875 so->so_sndbuf = intvalue; 5876 break; 5877 case SO_RCVBUF: 5878 so->so_rcvbuf = intvalue; 5879 break; 5880 case SO_RCVPSH: 5881 so->so_rcv_timer_interval = intvalue; 5882 break; 5883 #ifdef notyet 5884 /* 5885 * We do not implement the semantics of these options 5886 * thus we shouldn't implement the options either. 5887 */ 5888 case SO_SNDLOWAT: 5889 so->so_sndlowat = intvalue; 5890 break; 5891 case SO_RCVLOWAT: 5892 so->so_rcvlowat = intvalue; 5893 break; 5894 #endif /* notyet */ 5895 case SO_SNDTIMEO: 5896 case SO_RCVTIMEO: { 5897 struct timeval tl; 5898 clock_t val; 5899 5900 if (get_udatamodel() == DATAMODEL_NATIVE) 5901 bcopy(&tl, (struct timeval *)optval, 5902 sizeof (struct timeval)); 5903 else 5904 TIMEVAL32_TO_TIMEVAL(&tl, 5905 (struct timeval32 *)optval); 5906 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5907 if (option_name == SO_RCVTIMEO) 5908 so->so_rcvtimeo = drv_usectohz(val); 5909 else 5910 so->so_sndtimeo = drv_usectohz(val); 5911 break; 5912 } 5913 } 5914 #undef intvalue 5915 5916 if (error) { 5917 if ((error == ENOPROTOOPT || error == EPROTO || 5918 error == EINVAL) && handled) { 5919 dprintso(so, 1, 5920 ("setsockopt: ignoring error %d for 0x%x\n", 5921 error, option_name)); 5922 error = 0; 5923 } 5924 } 5925 } 5926 done2: 5927 so_unlock_single(so, SOLOCKED); 5928 mutex_exit(&so->so_lock); 5929 return (error); 5930 } 5931 5932 /* 5933 * sotpi_close() is called when the last open reference goes away. 5934 */ 5935 /* ARGSUSED */ 5936 int 5937 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5938 { 5939 struct vnode *vp = SOTOV(so); 5940 dev_t dev; 5941 int error = 0; 5942 sotpi_info_t *sti = SOTOTPI(so); 5943 5944 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5945 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5946 5947 dev = sti->sti_dev; 5948 5949 ASSERT(STREAMSTAB(getmajor(dev))); 5950 5951 mutex_enter(&so->so_lock); 5952 so_lock_single(so); /* Set SOLOCKED */ 5953 5954 ASSERT(so_verify_oobstate(so)); 5955 5956 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5957 sti->sti_nl7c_flags = 0; 5958 nl7c_close(so); 5959 } 5960 5961 if (vp->v_stream != NULL) { 5962 vnode_t *ux_vp; 5963 5964 if (so->so_family == AF_UNIX) { 5965 /* Could avoid this when CANTSENDMORE for !dgram */ 5966 so_unix_close(so); 5967 } 5968 5969 mutex_exit(&so->so_lock); 5970 /* 5971 * Disassemble the linkage from the AF_UNIX underlying file 5972 * system vnode to this socket (by atomically clearing 5973 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5974 * and frees the stream head. 5975 */ 5976 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5977 ASSERT(ux_vp->v_stream); 5978 sti->sti_ux_bound_vp = NULL; 5979 vn_rele_stream(ux_vp); 5980 } 5981 if (so->so_family == AF_INET || so->so_family == AF_INET6) { 5982 strsetrwputdatahooks(SOTOV(so), NULL, NULL); 5983 if (sti->sti_kssl_ent != NULL) { 5984 kssl_release_ent(sti->sti_kssl_ent, so, 5985 sti->sti_kssl_type); 5986 sti->sti_kssl_ent = NULL; 5987 } 5988 if (sti->sti_kssl_ctx != NULL) { 5989 kssl_release_ctx(sti->sti_kssl_ctx); 5990 sti->sti_kssl_ctx = NULL; 5991 } 5992 sti->sti_kssl_type = KSSL_NO_PROXY; 5993 } 5994 error = strclose(vp, flag, cr); 5995 vp->v_stream = NULL; 5996 mutex_enter(&so->so_lock); 5997 } 5998 5999 /* 6000 * Flush the T_DISCON_IND on sti_discon_ind_mp. 6001 */ 6002 so_flush_discon_ind(so); 6003 6004 so_unlock_single(so, SOLOCKED); 6005 mutex_exit(&so->so_lock); 6006 6007 /* 6008 * Needed for STREAMs. 6009 * Decrement the device driver's reference count for streams 6010 * opened via the clone dip. The driver was held in clone_open(). 6011 * The absence of clone_close() forces this asymmetry. 6012 */ 6013 if (so->so_flag & SOCLONE) 6014 ddi_rele_driver(getmajor(dev)); 6015 6016 return (error); 6017 } 6018 6019 static int 6020 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 6021 struct cred *cr, int32_t *rvalp) 6022 { 6023 struct vnode *vp = SOTOV(so); 6024 sotpi_info_t *sti = SOTOTPI(so); 6025 int error = 0; 6026 6027 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 6028 cmd, arg, pr_state(so->so_state, so->so_mode))); 6029 6030 switch (cmd) { 6031 case SIOCSQPTR: 6032 /* 6033 * SIOCSQPTR is valid only when helper stream is created 6034 * by the protocol. 6035 */ 6036 case _I_INSERT: 6037 case _I_REMOVE: 6038 /* 6039 * Since there's no compelling reason to support these ioctls 6040 * on sockets, and doing so would increase the complexity 6041 * markedly, prevent it. 6042 */ 6043 return (EOPNOTSUPP); 6044 6045 case I_FIND: 6046 case I_LIST: 6047 case I_LOOK: 6048 case I_POP: 6049 case I_PUSH: 6050 /* 6051 * To prevent races and inconsistencies between the actual 6052 * state of the stream and the state according to the sonode, 6053 * we serialize all operations which modify or operate on the 6054 * list of modules on the socket's stream. 6055 */ 6056 mutex_enter(&sti->sti_plumb_lock); 6057 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 6058 mutex_exit(&sti->sti_plumb_lock); 6059 return (error); 6060 6061 default: 6062 if (so->so_version != SOV_STREAM) 6063 break; 6064 6065 /* 6066 * The imaginary "sockmod" has been popped; act as a stream. 6067 */ 6068 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6069 } 6070 6071 ASSERT(so->so_version != SOV_STREAM); 6072 6073 /* 6074 * Process socket-specific ioctls. 6075 */ 6076 switch (cmd) { 6077 case FIONBIO: { 6078 int32_t value; 6079 6080 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6081 (mode & (int)FKIOCTL))) 6082 return (EFAULT); 6083 6084 mutex_enter(&so->so_lock); 6085 if (value) { 6086 so->so_state |= SS_NDELAY; 6087 } else { 6088 so->so_state &= ~SS_NDELAY; 6089 } 6090 mutex_exit(&so->so_lock); 6091 return (0); 6092 } 6093 6094 case FIOASYNC: { 6095 int32_t value; 6096 6097 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6098 (mode & (int)FKIOCTL))) 6099 return (EFAULT); 6100 6101 mutex_enter(&so->so_lock); 6102 /* 6103 * SS_ASYNC flag not already set correctly? 6104 * (!value != !(so->so_state & SS_ASYNC)) 6105 * but some engineers find that too hard to read. 6106 */ 6107 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 6108 value != 0 && (so->so_state & SS_ASYNC) == 0) 6109 error = so_flip_async(so, vp, mode, cr); 6110 mutex_exit(&so->so_lock); 6111 return (error); 6112 } 6113 6114 case SIOCSPGRP: 6115 case FIOSETOWN: { 6116 pid_t pgrp; 6117 6118 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 6119 (mode & (int)FKIOCTL))) 6120 return (EFAULT); 6121 6122 mutex_enter(&so->so_lock); 6123 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 6124 /* Any change? */ 6125 if (pgrp != so->so_pgrp) 6126 error = so_set_siggrp(so, vp, pgrp, mode, cr); 6127 mutex_exit(&so->so_lock); 6128 return (error); 6129 } 6130 case SIOCGPGRP: 6131 case FIOGETOWN: 6132 if (so_copyout(&so->so_pgrp, (void *)arg, 6133 sizeof (pid_t), (mode & (int)FKIOCTL))) 6134 return (EFAULT); 6135 return (0); 6136 6137 case SIOCATMARK: { 6138 int retval; 6139 uint_t so_state; 6140 6141 /* 6142 * strwaitmark has a finite timeout after which it 6143 * returns -1 if the mark state is undetermined. 6144 * In order to avoid any race between the mark state 6145 * in sockfs and the mark state in the stream head this 6146 * routine loops until the mark state can be determined 6147 * (or the urgent data indication has been removed by some 6148 * other thread). 6149 */ 6150 do { 6151 mutex_enter(&so->so_lock); 6152 so_state = so->so_state; 6153 mutex_exit(&so->so_lock); 6154 if (so_state & SS_RCVATMARK) { 6155 retval = 1; 6156 } else if (!(so_state & SS_OOBPEND)) { 6157 /* 6158 * No SIGURG has been generated -- there is no 6159 * pending or present urgent data. Thus can't 6160 * possibly be at the mark. 6161 */ 6162 retval = 0; 6163 } else { 6164 /* 6165 * Have the stream head wait until there is 6166 * either some messages on the read queue, or 6167 * STRATMARK or STRNOTATMARK gets set. The 6168 * STRNOTATMARK flag is used so that the 6169 * transport can send up a MSGNOTMARKNEXT 6170 * M_DATA to indicate that it is not 6171 * at the mark and additional data is not about 6172 * to be send upstream. 6173 * 6174 * If the mark state is undetermined this will 6175 * return -1 and we will loop rechecking the 6176 * socket state. 6177 */ 6178 retval = strwaitmark(vp); 6179 } 6180 } while (retval == -1); 6181 6182 if (so_copyout(&retval, (void *)arg, sizeof (int), 6183 (mode & (int)FKIOCTL))) 6184 return (EFAULT); 6185 return (0); 6186 } 6187 6188 case I_FDINSERT: 6189 case I_SENDFD: 6190 case I_RECVFD: 6191 case I_ATMARK: 6192 case _SIOCSOCKFALLBACK: 6193 /* 6194 * These ioctls do not apply to sockets. I_FDINSERT can be 6195 * used to send M_PROTO messages without modifying the socket 6196 * state. I_SENDFD/RECVFD should not be used for socket file 6197 * descriptor passing since they assume a twisted stream. 6198 * SIOCATMARK must be used instead of I_ATMARK. 6199 * 6200 * _SIOCSOCKFALLBACK from an application should never be 6201 * processed. It is only generated by socktpi_open() or 6202 * in response to I_POP or I_PUSH. 6203 */ 6204 #ifdef DEBUG 6205 zcmn_err(getzoneid(), CE_WARN, 6206 "Unsupported STREAMS ioctl 0x%x on socket. " 6207 "Pid = %d\n", cmd, curproc->p_pid); 6208 #endif /* DEBUG */ 6209 return (EOPNOTSUPP); 6210 6211 case _I_GETPEERCRED: 6212 if ((mode & FKIOCTL) == 0) 6213 return (EINVAL); 6214 6215 mutex_enter(&so->so_lock); 6216 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6217 error = ENOTSUP; 6218 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6219 error = ENOTCONN; 6220 } else if (so->so_peercred != NULL) { 6221 k_peercred_t *kp = (k_peercred_t *)arg; 6222 kp->pc_cr = so->so_peercred; 6223 kp->pc_cpid = so->so_cpid; 6224 crhold(so->so_peercred); 6225 } else { 6226 error = EINVAL; 6227 } 6228 mutex_exit(&so->so_lock); 6229 return (error); 6230 6231 default: 6232 /* 6233 * Do the higher-order bits of the ioctl cmd indicate 6234 * that it is an I_* streams ioctl? 6235 */ 6236 if ((cmd & 0xffffff00U) == STR && 6237 so->so_version == SOV_SOCKBSD) { 6238 #ifdef DEBUG 6239 zcmn_err(getzoneid(), CE_WARN, 6240 "Unsupported STREAMS ioctl 0x%x on socket. " 6241 "Pid = %d\n", cmd, curproc->p_pid); 6242 #endif /* DEBUG */ 6243 return (EOPNOTSUPP); 6244 } 6245 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6246 } 6247 } 6248 6249 /* 6250 * Handle plumbing-related ioctls. 6251 */ 6252 static int 6253 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6254 struct cred *cr, int32_t *rvalp) 6255 { 6256 static const char sockmod_name[] = "sockmod"; 6257 struct sonode *so = VTOSO(vp); 6258 char mname[FMNAMESZ + 1]; 6259 int error; 6260 sotpi_info_t *sti = SOTOTPI(so); 6261 6262 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6263 6264 if (so->so_version == SOV_SOCKBSD) 6265 return (EOPNOTSUPP); 6266 6267 if (so->so_version == SOV_STREAM) { 6268 /* 6269 * The imaginary "sockmod" has been popped - act as a stream. 6270 * If this is a push of sockmod then change back to a socket. 6271 */ 6272 if (cmd == I_PUSH) { 6273 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6274 (void *)arg, mname, sizeof (mname), NULL); 6275 6276 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6277 dprintso(so, 0, ("socktpi_ioctl: going to " 6278 "socket version\n")); 6279 so_stream2sock(so); 6280 return (0); 6281 } 6282 } 6283 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6284 } 6285 6286 switch (cmd) { 6287 case I_PUSH: 6288 if (sti->sti_direct) { 6289 mutex_enter(&so->so_lock); 6290 so_lock_single(so); 6291 mutex_exit(&so->so_lock); 6292 6293 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6294 CRED(), rvalp); 6295 6296 mutex_enter(&so->so_lock); 6297 if (error == 0) 6298 sti->sti_direct = 0; 6299 so_unlock_single(so, SOLOCKED); 6300 mutex_exit(&so->so_lock); 6301 6302 if (error != 0) 6303 return (error); 6304 } 6305 6306 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6307 if (error == 0) 6308 sti->sti_pushcnt++; 6309 return (error); 6310 6311 case I_POP: 6312 if (sti->sti_pushcnt == 0) { 6313 /* Emulate sockmod being popped */ 6314 dprintso(so, 0, 6315 ("socktpi_ioctl: going to STREAMS version\n")); 6316 return (so_sock2stream(so)); 6317 } 6318 6319 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6320 if (error == 0) 6321 sti->sti_pushcnt--; 6322 return (error); 6323 6324 case I_LIST: { 6325 struct str_mlist *kmlistp, *umlistp; 6326 struct str_list kstrlist; 6327 ssize_t kstrlistsize; 6328 int i, nmods; 6329 6330 STRUCT_DECL(str_list, ustrlist); 6331 STRUCT_INIT(ustrlist, mode); 6332 6333 if (arg == NULL) { 6334 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6335 if (error == 0) 6336 (*rvalp)++; /* Add one for sockmod */ 6337 return (error); 6338 } 6339 6340 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6341 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6342 if (error != 0) 6343 return (error); 6344 6345 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6346 if (nmods <= 0) 6347 return (EINVAL); 6348 /* 6349 * Ceiling nmods at nstrpush to prevent someone from 6350 * maliciously consuming lots of kernel memory. 6351 */ 6352 nmods = MIN(nmods, nstrpush); 6353 6354 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6355 kstrlist.sl_nmods = nmods; 6356 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6357 6358 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6359 cr, rvalp); 6360 if (error != 0) 6361 goto done; 6362 6363 /* 6364 * Considering the module list as a 0-based array of sl_nmods 6365 * modules, sockmod should conceptually exist at slot 6366 * sti_pushcnt. Insert sockmod at this location by sliding all 6367 * of the module names after so_pushcnt over by one. We know 6368 * that there will be room to do this since we allocated 6369 * sl_modlist with an additional slot. 6370 */ 6371 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6372 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6373 6374 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6375 kstrlist.sl_nmods++; 6376 6377 /* 6378 * Copy all of the entries out to ustrlist. 6379 */ 6380 kmlistp = kstrlist.sl_modlist; 6381 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6382 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6383 error = so_copyout(kmlistp++, umlistp++, 6384 sizeof (struct str_mlist), mode & FKIOCTL); 6385 if (error != 0) 6386 goto done; 6387 } 6388 6389 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6390 mode & FKIOCTL); 6391 if (error == 0) 6392 *rvalp = 0; 6393 done: 6394 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6395 return (error); 6396 } 6397 case I_LOOK: 6398 if (sti->sti_pushcnt == 0) { 6399 return (so_copyout(sockmod_name, (void *)arg, 6400 sizeof (sockmod_name), mode & FKIOCTL)); 6401 } 6402 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6403 6404 case I_FIND: 6405 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6406 if (error && error != EINVAL) 6407 return (error); 6408 6409 /* if not found and string was sockmod return 1 */ 6410 if (*rvalp == 0 || error == EINVAL) { 6411 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6412 (void *)arg, mname, sizeof (mname), NULL); 6413 if (error == ENAMETOOLONG) 6414 error = EINVAL; 6415 6416 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6417 *rvalp = 1; 6418 } 6419 return (error); 6420 6421 default: 6422 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6423 break; 6424 } 6425 6426 return (0); 6427 } 6428 6429 /* 6430 * Wrapper around the streams poll routine that implements socket poll 6431 * semantics. 6432 * The sockfs never calls pollwakeup itself - the stream head take care 6433 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6434 * stream head there can never be a deadlock due to holding so_lock across 6435 * pollwakeup and acquiring so_lock in this routine. 6436 * 6437 * However, since the performance of VOP_POLL is critical we avoid 6438 * acquiring so_lock here. This is based on two assumptions: 6439 * - The poll implementation holds locks to serialize the VOP_POLL call 6440 * and a pollwakeup for the same pollhead. This ensures that should 6441 * e.g. so_state change during a socktpi_poll call the pollwakeup 6442 * (which strsock_* and strrput conspire to issue) is issued after 6443 * the state change. Thus the pollwakeup will block until VOP_POLL has 6444 * returned and then wake up poll and have it call VOP_POLL again. 6445 * - The reading of so_state without holding so_lock does not result in 6446 * stale data that is older than the latest state change that has dropped 6447 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6448 * memory barrier to force the data into the coherency domain. 6449 */ 6450 static int 6451 sotpi_poll( 6452 struct sonode *so, 6453 short events, 6454 int anyyet, 6455 short *reventsp, 6456 struct pollhead **phpp) 6457 { 6458 short origevents = events; 6459 struct vnode *vp = SOTOV(so); 6460 int error; 6461 int so_state = so->so_state; /* snapshot */ 6462 sotpi_info_t *sti = SOTOTPI(so); 6463 6464 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6465 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6466 6467 ASSERT(vp->v_type == VSOCK); 6468 ASSERT(vp->v_stream != NULL); 6469 6470 if (so->so_version == SOV_STREAM) { 6471 /* The imaginary "sockmod" has been popped - act as a stream */ 6472 return (strpoll(vp->v_stream, events, anyyet, 6473 reventsp, phpp)); 6474 } 6475 6476 if (!(so_state & SS_ISCONNECTED) && 6477 (so->so_mode & SM_CONNREQUIRED)) { 6478 /* Not connected yet - turn off write side events */ 6479 events &= ~(POLLOUT|POLLWRBAND); 6480 } 6481 /* 6482 * Check for errors without calling strpoll if the caller wants them. 6483 * In sockets the errors are represented as input/output events 6484 * and there is no need to ask the stream head for this information. 6485 */ 6486 if (so->so_error != 0 && 6487 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6488 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6489 return (0); 6490 } 6491 /* 6492 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6493 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6494 * will not trigger a POLLIN event with POLLRDDATA set. 6495 * The handling of urgent data (causing POLLRDBAND) is done by 6496 * inspecting SS_OOBPEND below. 6497 */ 6498 events |= POLLRDDATA; 6499 6500 /* 6501 * After shutdown(output) a stream head write error is set. 6502 * However, we should not return output events. 6503 */ 6504 events |= POLLNOERR; 6505 error = strpoll(vp->v_stream, events, anyyet, 6506 reventsp, phpp); 6507 if (error) 6508 return (error); 6509 6510 ASSERT(!(*reventsp & POLLERR)); 6511 6512 /* 6513 * Notes on T_CONN_IND handling for sockets. 6514 * 6515 * If strpoll() returned without events, SR_POLLIN is guaranteed 6516 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6517 * 6518 * Since the so_lock is not held, soqueueconnind() may have run 6519 * and a T_CONN_IND may be waiting. We now check for any queued 6520 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6521 * to ensure poll returns. 6522 * 6523 * However: 6524 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6525 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6526 * the following actions will occur; taken together they ensure the 6527 * syscall will return. 6528 * 6529 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6530 * the accept() was run on a non-blocking socket sowaitconnind() 6531 * may have already returned EWOULDBLOCK, so not be waiting to 6532 * process the message. Additionally socktpi_poll() has probably 6533 * proceeded past the sti_conn_ind_head check below. 6534 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6535 * this thread, however that could occur before poll_common() 6536 * has entered cv_wait. 6537 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6538 * 6539 * Before proceeding to cv_wait() in poll_common() for an event, 6540 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6541 * and if set, re-calls strpoll() to ensure the late arriving 6542 * T_CONN_IND is recognized, and pollsys() returns. 6543 */ 6544 6545 if (sti->sti_conn_ind_head != NULL) 6546 *reventsp |= (POLLIN|POLLRDNORM) & events; 6547 6548 if (so->so_state & SS_OOBPEND) 6549 *reventsp |= POLLRDBAND & events; 6550 6551 if (sti->sti_nl7c_rcv_mp != NULL) { 6552 *reventsp |= (POLLIN|POLLRDNORM) & events; 6553 } 6554 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6555 ((POLLIN|POLLRDNORM) & *reventsp)) { 6556 sti->sti_nl7c_flags |= NL7C_POLLIN; 6557 } 6558 6559 return (0); 6560 } 6561 6562 /*ARGSUSED*/ 6563 static int 6564 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6565 { 6566 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6567 int error = 0; 6568 6569 error = sonode_constructor(buf, cdrarg, kmflags); 6570 if (error != 0) 6571 return (error); 6572 6573 error = i_sotpi_info_constructor(&st->st_info); 6574 if (error != 0) 6575 sonode_destructor(buf, cdrarg); 6576 6577 st->st_sonode.so_priv = &st->st_info; 6578 6579 return (error); 6580 } 6581 6582 /*ARGSUSED1*/ 6583 static void 6584 socktpi_destructor(void *buf, void *cdrarg) 6585 { 6586 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6587 6588 ASSERT(st->st_sonode.so_priv == &st->st_info); 6589 st->st_sonode.so_priv = NULL; 6590 6591 i_sotpi_info_destructor(&st->st_info); 6592 sonode_destructor(buf, cdrarg); 6593 } 6594 6595 static int 6596 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6597 { 6598 int retval; 6599 6600 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6601 struct sonode *so = (struct sonode *)buf; 6602 sotpi_info_t *sti = SOTOTPI(so); 6603 6604 mutex_enter(&socklist.sl_lock); 6605 6606 sti->sti_next_so = socklist.sl_list; 6607 sti->sti_prev_so = NULL; 6608 if (sti->sti_next_so != NULL) 6609 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6610 socklist.sl_list = so; 6611 6612 mutex_exit(&socklist.sl_lock); 6613 6614 } 6615 return (retval); 6616 } 6617 6618 static void 6619 socktpi_unix_destructor(void *buf, void *cdrarg) 6620 { 6621 struct sonode *so = (struct sonode *)buf; 6622 sotpi_info_t *sti = SOTOTPI(so); 6623 6624 mutex_enter(&socklist.sl_lock); 6625 6626 if (sti->sti_next_so != NULL) 6627 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6628 if (sti->sti_prev_so != NULL) 6629 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6630 else 6631 socklist.sl_list = sti->sti_next_so; 6632 6633 mutex_exit(&socklist.sl_lock); 6634 6635 socktpi_destructor(buf, cdrarg); 6636 } 6637 6638 int 6639 socktpi_init(void) 6640 { 6641 /* 6642 * Create sonode caches. We create a special one for AF_UNIX so 6643 * that we can track them for netstat(1m). 6644 */ 6645 socktpi_cache = kmem_cache_create("socktpi_cache", 6646 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6647 socktpi_destructor, NULL, NULL, NULL, 0); 6648 6649 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6650 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6651 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6652 6653 return (0); 6654 } 6655 6656 /* 6657 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6658 * 6659 * Caller must still update state and mode using sotpi_update_state(). 6660 * 6661 * Returns the STREAM queue that the protocol should use. 6662 */ 6663 queue_t * 6664 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6665 boolean_t *direct, struct cred *cr) 6666 { 6667 sotpi_info_t *sti; 6668 struct sockparams *origsp = so->so_sockparams; 6669 sock_lower_handle_t handle = so->so_proto_handle; 6670 uint_t old_state = so->so_state; 6671 struct stdata *stp; 6672 struct vnode *vp; 6673 queue_t *q; 6674 6675 *direct = B_FALSE; 6676 so->so_sockparams = newsp; 6677 /* 6678 * Allocate and initalize fields required by TPI. 6679 */ 6680 (void) sotpi_info_create(so, KM_SLEEP); 6681 sotpi_info_init(so); 6682 6683 if (sotpi_init(so, NULL, cr, SO_FALLBACK) != 0) { 6684 sotpi_info_fini(so); 6685 sotpi_info_destroy(so); 6686 so->so_state = old_state; 6687 return (NULL); 6688 } 6689 ASSERT(handle == so->so_proto_handle); 6690 sti = SOTOTPI(so); 6691 if (sti->sti_direct != 0) 6692 *direct = B_TRUE; 6693 6694 /* 6695 * Keep the original sp around so we can properly dispose of the 6696 * sonode when the socket is being closed. 6697 */ 6698 sti->sti_orig_sp = origsp; 6699 6700 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6701 so_alloc_addr(so, so->so_max_addr_len); 6702 6703 /* 6704 * If the application has done a SIOCSPGRP, make sure the 6705 * STREAM head is aware. This needs to take place before 6706 * the protocol start sending up messages. Otherwise we 6707 * might miss to generate SIGPOLL. 6708 * 6709 * It is possible that the application will receive duplicate 6710 * signals if some were already generated for either data or 6711 * connection indications. 6712 */ 6713 if (so->so_pgrp != 0) { 6714 mutex_enter(&so->so_lock); 6715 if (so_set_events(so, so->so_vnode, cr) != 0) 6716 so->so_pgrp = 0; 6717 mutex_exit(&so->so_lock); 6718 } 6719 6720 /* 6721 * Determine which queue to use. 6722 */ 6723 vp = SOTOV(so); 6724 stp = vp->v_stream; 6725 ASSERT(stp != NULL); 6726 q = stp->sd_wrq->q_next; 6727 6728 /* 6729 * Skip any modules that may have been auto pushed when the device 6730 * was opened 6731 */ 6732 while (q->q_next != NULL) 6733 q = q->q_next; 6734 q = _RD(q); 6735 6736 return (q); 6737 } 6738 6739 void 6740 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6741 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6742 socklen_t faddrlen, short opts) 6743 { 6744 sotpi_info_t *sti = SOTOTPI(so); 6745 6746 so_proc_tcapability_ack(so, tcap); 6747 6748 so->so_options |= opts; 6749 6750 /* 6751 * Determine whether the foreign and local address are valid 6752 */ 6753 if (laddrlen != 0) { 6754 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6755 sti->sti_laddr_len = laddrlen; 6756 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6757 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6758 } 6759 6760 if (faddrlen != 0) { 6761 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6762 sti->sti_faddr_len = faddrlen; 6763 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6764 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6765 } 6766 6767 } 6768 6769 /* 6770 * Allocate enough space to cache the local and foreign addresses. 6771 */ 6772 void 6773 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6774 { 6775 sotpi_info_t *sti = SOTOTPI(so); 6776 6777 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6778 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6779 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6780 P2ROUNDUP(maxlen, KMEM_ALIGN); 6781 so->so_max_addr_len = sti->sti_laddr_maxlen; 6782 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6783 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6784 + sti->sti_laddr_maxlen); 6785 6786 if (so->so_family == AF_UNIX) { 6787 /* 6788 * Initialize AF_UNIX related fields. 6789 */ 6790 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6791 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6792 } 6793 } 6794 6795 6796 sotpi_info_t * 6797 sotpi_sototpi(struct sonode *so) 6798 { 6799 sotpi_info_t *sti; 6800 6801 if (so == NULL) 6802 return (NULL); 6803 6804 sti = (sotpi_info_t *)so->so_priv; 6805 6806 ASSERT(sti != NULL); 6807 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6808 6809 return (sti); 6810 } 6811 6812 static int 6813 i_sotpi_info_constructor(sotpi_info_t *sti) 6814 { 6815 sti->sti_magic = SOTPI_INFO_MAGIC; 6816 sti->sti_ack_mp = NULL; 6817 sti->sti_discon_ind_mp = NULL; 6818 sti->sti_ux_bound_vp = NULL; 6819 sti->sti_unbind_mp = NULL; 6820 6821 sti->sti_conn_ind_head = NULL; 6822 sti->sti_conn_ind_tail = NULL; 6823 6824 sti->sti_laddr_sa = NULL; 6825 sti->sti_faddr_sa = NULL; 6826 6827 sti->sti_nl7c_flags = 0; 6828 sti->sti_nl7c_uri = NULL; 6829 sti->sti_nl7c_rcv_mp = NULL; 6830 6831 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6832 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6833 6834 return (0); 6835 } 6836 6837 static void 6838 i_sotpi_info_destructor(sotpi_info_t *sti) 6839 { 6840 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6841 ASSERT(sti->sti_ack_mp == NULL); 6842 ASSERT(sti->sti_discon_ind_mp == NULL); 6843 ASSERT(sti->sti_ux_bound_vp == NULL); 6844 ASSERT(sti->sti_unbind_mp == NULL); 6845 6846 ASSERT(sti->sti_conn_ind_head == NULL); 6847 ASSERT(sti->sti_conn_ind_tail == NULL); 6848 6849 ASSERT(sti->sti_laddr_sa == NULL); 6850 ASSERT(sti->sti_faddr_sa == NULL); 6851 6852 ASSERT(sti->sti_nl7c_flags == 0); 6853 ASSERT(sti->sti_nl7c_uri == NULL); 6854 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6855 6856 mutex_destroy(&sti->sti_plumb_lock); 6857 cv_destroy(&sti->sti_ack_cv); 6858 } 6859 6860 /* 6861 * Creates and attaches TPI information to the given sonode 6862 */ 6863 static boolean_t 6864 sotpi_info_create(struct sonode *so, int kmflags) 6865 { 6866 sotpi_info_t *sti; 6867 6868 ASSERT(so->so_priv == NULL); 6869 6870 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6871 return (B_FALSE); 6872 6873 if (i_sotpi_info_constructor(sti) != 0) { 6874 kmem_free(sti, sizeof (*sti)); 6875 return (B_FALSE); 6876 } 6877 6878 so->so_priv = (void *)sti; 6879 return (B_TRUE); 6880 } 6881 6882 /* 6883 * Initializes the TPI information. 6884 */ 6885 static void 6886 sotpi_info_init(struct sonode *so) 6887 { 6888 struct vnode *vp = SOTOV(so); 6889 sotpi_info_t *sti = SOTOTPI(so); 6890 time_t now; 6891 6892 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6893 vp->v_rdev = sti->sti_dev; 6894 6895 sti->sti_orig_sp = NULL; 6896 6897 sti->sti_pushcnt = 0; 6898 6899 now = gethrestime_sec(); 6900 sti->sti_atime = now; 6901 sti->sti_mtime = now; 6902 sti->sti_ctime = now; 6903 6904 sti->sti_eaddr_mp = NULL; 6905 sti->sti_delayed_error = 0; 6906 6907 sti->sti_provinfo = NULL; 6908 6909 sti->sti_oobcnt = 0; 6910 sti->sti_oobsigcnt = 0; 6911 6912 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6913 6914 sti->sti_laddr_sa = 0; 6915 sti->sti_faddr_sa = 0; 6916 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6917 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6918 6919 sti->sti_laddr_valid = 0; 6920 sti->sti_faddr_valid = 0; 6921 sti->sti_faddr_noxlate = 0; 6922 6923 sti->sti_direct = 0; 6924 6925 ASSERT(sti->sti_ack_mp == NULL); 6926 ASSERT(sti->sti_ux_bound_vp == NULL); 6927 ASSERT(sti->sti_unbind_mp == NULL); 6928 6929 ASSERT(sti->sti_conn_ind_head == NULL); 6930 ASSERT(sti->sti_conn_ind_tail == NULL); 6931 6932 /* Initialize the kernel SSL proxy fields */ 6933 sti->sti_kssl_type = KSSL_NO_PROXY; 6934 sti->sti_kssl_ent = NULL; 6935 sti->sti_kssl_ctx = NULL; 6936 } 6937 6938 /* 6939 * Given a sonode, grab the TPI info and free any data. 6940 */ 6941 static void 6942 sotpi_info_fini(struct sonode *so) 6943 { 6944 sotpi_info_t *sti = SOTOTPI(so); 6945 mblk_t *mp; 6946 6947 ASSERT(sti->sti_discon_ind_mp == NULL); 6948 6949 if ((mp = sti->sti_conn_ind_head) != NULL) { 6950 mblk_t *mp1; 6951 6952 while (mp) { 6953 mp1 = mp->b_next; 6954 mp->b_next = NULL; 6955 freemsg(mp); 6956 mp = mp1; 6957 } 6958 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6959 } 6960 6961 /* 6962 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6963 * indirect them. It also uses so_count as a validity test. 6964 */ 6965 mutex_enter(&so->so_lock); 6966 6967 if (sti->sti_laddr_sa) { 6968 ASSERT((caddr_t)sti->sti_faddr_sa == 6969 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6970 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6971 sti->sti_laddr_valid = 0; 6972 sti->sti_faddr_valid = 0; 6973 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6974 sti->sti_laddr_sa = NULL; 6975 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6976 sti->sti_faddr_sa = NULL; 6977 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6978 } 6979 6980 mutex_exit(&so->so_lock); 6981 6982 if ((mp = sti->sti_eaddr_mp) != NULL) { 6983 freemsg(mp); 6984 sti->sti_eaddr_mp = NULL; 6985 sti->sti_delayed_error = 0; 6986 } 6987 6988 if ((mp = sti->sti_ack_mp) != NULL) { 6989 freemsg(mp); 6990 sti->sti_ack_mp = NULL; 6991 } 6992 6993 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6994 sti->sti_nl7c_rcv_mp = NULL; 6995 freemsg(mp); 6996 } 6997 sti->sti_nl7c_rcv_rval = 0; 6998 if (sti->sti_nl7c_uri != NULL) { 6999 nl7c_urifree(so); 7000 /* urifree() cleared nl7c_uri */ 7001 } 7002 if (sti->sti_nl7c_flags) { 7003 sti->sti_nl7c_flags = 0; 7004 } 7005 7006 ASSERT(sti->sti_ux_bound_vp == NULL); 7007 if ((mp = sti->sti_unbind_mp) != NULL) { 7008 freemsg(mp); 7009 sti->sti_unbind_mp = NULL; 7010 } 7011 } 7012 7013 /* 7014 * Destroys the TPI information attached to a sonode. 7015 */ 7016 static void 7017 sotpi_info_destroy(struct sonode *so) 7018 { 7019 sotpi_info_t *sti = SOTOTPI(so); 7020 7021 i_sotpi_info_destructor(sti); 7022 kmem_free(sti, sizeof (*sti)); 7023 7024 so->so_priv = NULL; 7025 } 7026 7027 /* 7028 * Create the global sotpi socket module entry. It will never be freed. 7029 */ 7030 smod_info_t * 7031 sotpi_smod_create(void) 7032 { 7033 smod_info_t *smodp; 7034 7035 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 7036 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 7037 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 7038 /* 7039 * Initialize the smod_refcnt to 1 so it will never be freed. 7040 */ 7041 smodp->smod_refcnt = 1; 7042 smodp->smod_uc_version = SOCK_UC_VERSION; 7043 smodp->smod_dc_version = SOCK_DC_VERSION; 7044 smodp->smod_sock_create_func = &sotpi_create; 7045 smodp->smod_sock_destroy_func = &sotpi_destroy; 7046 return (smodp); 7047 } 7048