1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/kmem_impl.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/open.h> 44 #include <sys/user.h> 45 #include <sys/termios.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/suntpi.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <sys/sodirect.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <inet/kssl/ksslapi.h> 85 86 #include <fs/sockfs/sockcommon.h> 87 #include <fs/sockfs/socktpi.h> 88 #include <fs/sockfs/socktpi_impl.h> 89 90 /* 91 * Possible failures when memory can't be allocated. The documented behavior: 92 * 93 * 5.5: 4.X: XNET: 94 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 95 * EINTR 96 * (4.X does not document EINTR but returns it) 97 * bind: ENOSR - ENOBUFS/ENOSR 98 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 99 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 100 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 101 * (4.X getpeername and getsockname do not fail in practice) 102 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 103 * listen: - - ENOBUFS 104 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 107 * EINTR 108 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 109 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 110 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 111 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 112 * 113 * Resolution. When allocation fails: 114 * recv: return EINTR 115 * send: return EINTR 116 * connect, accept: EINTR 117 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 118 * socket, socketpair: ENOBUFS 119 * getpeername, getsockname: sleep 120 * getsockopt, setsockopt: sleep 121 */ 122 123 #ifdef SOCK_TEST 124 /* 125 * Variables that make sockfs do something other than the standard TPI 126 * for the AF_INET transports. 127 * 128 * solisten_tpi_tcp: 129 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 130 * the transport is already bound. This is needed to avoid loosing the 131 * port number should listen() do a T_UNBIND_REQ followed by a 132 * O_T_BIND_REQ. 133 * 134 * soconnect_tpi_udp: 135 * UDP and ICMP can handle a T_CONN_REQ. 136 * This is needed to make the sequence of connect(), getsockname() 137 * return the local IP address used to send packets to the connected to 138 * destination. 139 * 140 * soconnect_tpi_tcp: 141 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 142 * Set this to non-zero to send TPI conformant messages to TCP in this 143 * respect. This is a performance optimization. 144 * 145 * soaccept_tpi_tcp: 146 * TCP can handle a T_CONN_REQ without the acceptor being bound. 147 * This is a performance optimization that has been picked up in XTI. 148 * 149 * soaccept_tpi_multioptions: 150 * When inheriting SOL_SOCKET options from the listener to the accepting 151 * socket send them as a single message for AF_INET{,6}. 152 */ 153 int solisten_tpi_tcp = 0; 154 int soconnect_tpi_udp = 0; 155 int soconnect_tpi_tcp = 0; 156 int soaccept_tpi_tcp = 0; 157 int soaccept_tpi_multioptions = 1; 158 #else /* SOCK_TEST */ 159 #define soconnect_tpi_tcp 0 160 #define soconnect_tpi_udp 0 161 #define solisten_tpi_tcp 0 162 #define soaccept_tpi_tcp 0 163 #define soaccept_tpi_multioptions 1 164 #endif /* SOCK_TEST */ 165 166 #ifdef SOCK_TEST 167 extern int do_useracc; 168 extern clock_t sock_test_timelimit; 169 #endif /* SOCK_TEST */ 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 /* Sockets acting as an in-kernel SSL proxy */ 203 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 204 strsigset_t *, strsigset_t *, strpollset_t *); 205 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 206 strsigset_t *, strsigset_t *, strpollset_t *); 207 208 static int sotpi_unbind(struct sonode *, int); 209 210 extern int sodput(sodirect_t *, mblk_t *); 211 extern void sodwakeup(sodirect_t *); 212 213 /* TPI sockfs sonode operations */ 214 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 215 int); 216 static int sotpi_accept(struct sonode *, int, struct cred *, 217 struct sonode **); 218 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 219 int, struct cred *); 220 static int sotpi_listen(struct sonode *, int, struct cred *); 221 static int sotpi_connect(struct sonode *, const struct sockaddr *, 222 socklen_t, int, int, struct cred *); 223 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 224 struct uio *, struct cred *); 225 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 226 struct uio *, struct cred *); 227 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 228 struct cred *, mblk_t **); 229 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 230 struct uio *, void *, t_uscalar_t, int); 231 static int sodgram_direct(struct sonode *, struct sockaddr *, 232 socklen_t, struct uio *, int); 233 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 234 socklen_t *, boolean_t, struct cred *); 235 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 236 socklen_t *, struct cred *); 237 static int sotpi_shutdown(struct sonode *, int, struct cred *); 238 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 239 socklen_t *, int, struct cred *); 240 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 241 socklen_t, struct cred *); 242 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 243 int32_t *); 244 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 245 struct cred *, int32_t *); 246 static int sotpi_poll(struct sonode *, short, int, short *, 247 struct pollhead **); 248 static int sotpi_close(struct sonode *, int, struct cred *); 249 250 static int i_sotpi_info_constructor(sotpi_info_t *); 251 static void i_sotpi_info_destructor(sotpi_info_t *); 252 253 sonodeops_t sotpi_sonodeops = { 254 sotpi_init, /* sop_init */ 255 sotpi_accept, /* sop_accept */ 256 sotpi_bind, /* sop_bind */ 257 sotpi_listen, /* sop_listen */ 258 sotpi_connect, /* sop_connect */ 259 sotpi_recvmsg, /* sop_recvmsg */ 260 sotpi_sendmsg, /* sop_sendmsg */ 261 sotpi_sendmblk, /* sop_sendmblk */ 262 sotpi_getpeername, /* sop_getpeername */ 263 sotpi_getsockname, /* sop_getsockname */ 264 sotpi_shutdown, /* sop_shutdown */ 265 sotpi_getsockopt, /* sop_getsockopt */ 266 sotpi_setsockopt, /* sop_setsockopt */ 267 sotpi_ioctl, /* sop_ioctl */ 268 sotpi_poll, /* sop_poll */ 269 sotpi_close, /* sop_close */ 270 }; 271 272 /* 273 * Return a TPI socket vnode. 274 * 275 * Note that sockets assume that the driver will clone (either itself 276 * or by using the clone driver) i.e. a socket() call will always 277 * result in a new vnode being created. 278 */ 279 280 /* 281 * Common create code for socket and accept. If tso is set the values 282 * from that node is used instead of issuing a T_INFO_REQ. 283 */ 284 285 /* ARGSUSED */ 286 static struct sonode * 287 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 288 int version, int sflags, int *errorp, cred_t *cr) 289 { 290 struct sonode *so; 291 kmem_cache_t *cp; 292 int sfamily = family; 293 294 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 295 296 if (family == AF_NCA) { 297 /* 298 * The request is for an NCA socket so for NL7C use the 299 * INET domain instead and mark NL7C_AF_NCA below. 300 */ 301 family = AF_INET; 302 /* 303 * NL7C is not supported in the non-global zone, 304 * we enforce this restriction here. 305 */ 306 if (getzoneid() != GLOBAL_ZONEID) { 307 *errorp = ENOTSUP; 308 return (NULL); 309 } 310 } 311 312 /* 313 * to be compatible with old tpi socket implementation ignore 314 * sleep flag (sflags) passed in 315 */ 316 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 317 so = kmem_cache_alloc(cp, KM_SLEEP); 318 if (so == NULL) { 319 *errorp = ENOMEM; 320 return (NULL); 321 } 322 323 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 324 sotpi_info_init(so); 325 326 if (sfamily == AF_NCA) { 327 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 328 } 329 330 if (version == SOV_DEFAULT) 331 version = so_default_version; 332 333 so->so_version = (short)version; 334 *errorp = 0; 335 336 return (so); 337 } 338 339 static void 340 sotpi_destroy(struct sonode *so) 341 { 342 kmem_cache_t *cp; 343 struct sockparams *origsp; 344 345 /* 346 * If there is a new dealloc function (ie. smod_destroy_func), 347 * then it should check the correctness of the ops. 348 */ 349 350 ASSERT(so->so_ops == &sotpi_sonodeops); 351 352 origsp = SOTOTPI(so)->sti_orig_sp; 353 354 sotpi_info_fini(so); 355 356 if (so->so_state & SS_FALLBACK_COMP) { 357 /* 358 * A fallback happend, which means that a sotpi_info_t struct 359 * was allocated (as opposed to being allocated from the TPI 360 * sonode cache. Therefore we explicitly free the struct 361 * here. 362 */ 363 sotpi_info_destroy(so); 364 ASSERT(origsp != NULL); 365 366 origsp->sp_smod_info->smod_sock_destroy_func(so); 367 SOCKPARAMS_DEC_REF(origsp); 368 } else { 369 sonode_fini(so); 370 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 371 socktpi_cache; 372 kmem_cache_free(cp, so); 373 } 374 } 375 376 /* ARGSUSED1 */ 377 int 378 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 379 { 380 major_t maj; 381 dev_t newdev; 382 struct vnode *vp; 383 int error = 0; 384 struct stdata *stp; 385 386 sotpi_info_t *sti = SOTOTPI(so); 387 388 dprint(1, ("sotpi_init()\n")); 389 390 /* 391 * over write the sleep flag passed in but that is ok 392 * as tpi socket does not honor sleep flag. 393 */ 394 flags |= FREAD|FWRITE; 395 396 /* 397 * Record in so_flag that it is a clone. 398 */ 399 if (getmajor(sti->sti_dev) == clone_major) 400 so->so_flag |= SOCLONE; 401 402 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 403 (so->so_family == AF_INET || so->so_family == AF_INET6) && 404 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 405 so->so_protocol == IPPROTO_IP)) { 406 /* Tell tcp or udp that it's talking to sockets */ 407 flags |= SO_SOCKSTR; 408 409 /* 410 * Here we indicate to socktpi_open() our attempt to 411 * make direct calls between sockfs and transport. 412 * The final decision is left to socktpi_open(). 413 */ 414 sti->sti_direct = 1; 415 416 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 417 if (so->so_type == SOCK_STREAM && tso != NULL) { 418 if (SOTOTPI(tso)->sti_direct) { 419 /* 420 * Inherit sti_direct from listener and pass 421 * SO_ACCEPTOR open flag to tcp, indicating 422 * that this is an accept fast-path instance. 423 */ 424 flags |= SO_ACCEPTOR; 425 } else { 426 /* 427 * sti_direct is not set on listener, meaning 428 * that the listener has been converted from 429 * a socket to a stream. Ensure that the 430 * acceptor inherits these settings. 431 */ 432 sti->sti_direct = 0; 433 flags &= ~SO_SOCKSTR; 434 } 435 } 436 } 437 438 /* 439 * Tell local transport that it is talking to sockets. 440 */ 441 if (so->so_family == AF_UNIX) { 442 flags |= SO_SOCKSTR; 443 } 444 445 vp = SOTOV(so); 446 newdev = vp->v_rdev; 447 maj = getmajor(newdev); 448 ASSERT(STREAMSTAB(maj)); 449 450 error = stropen(vp, &newdev, flags, cr); 451 452 stp = vp->v_stream; 453 if (error == 0) { 454 if (so->so_flag & SOCLONE) 455 ASSERT(newdev != vp->v_rdev); 456 mutex_enter(&so->so_lock); 457 sti->sti_dev = newdev; 458 vp->v_rdev = newdev; 459 mutex_exit(&so->so_lock); 460 461 if (stp->sd_flag & STRISTTY) { 462 /* 463 * this is a post SVR4 tty driver - a socket can not 464 * be a controlling terminal. Fail the open. 465 */ 466 (void) sotpi_close(so, flags, cr); 467 return (ENOTTY); /* XXX */ 468 } 469 470 ASSERT(stp->sd_wrq != NULL); 471 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 472 473 /* 474 * If caller is interested in doing direct function call 475 * interface to/from transport module, probe the module 476 * directly beneath the streamhead to see if it qualifies. 477 * 478 * We turn off the direct interface when qualifications fail. 479 * In the acceptor case, we simply turn off the sti_direct 480 * flag on the socket. We do the fallback after the accept 481 * has completed, before the new socket is returned to the 482 * application. 483 */ 484 if (sti->sti_direct) { 485 queue_t *tq = stp->sd_wrq->q_next; 486 487 /* 488 * sti_direct is currently supported and tested 489 * only for tcp/udp; this is the main reason to 490 * have the following assertions. 491 */ 492 ASSERT(so->so_family == AF_INET || 493 so->so_family == AF_INET6); 494 ASSERT(so->so_protocol == IPPROTO_UDP || 495 so->so_protocol == IPPROTO_TCP || 496 so->so_protocol == IPPROTO_IP); 497 ASSERT(so->so_type == SOCK_DGRAM || 498 so->so_type == SOCK_STREAM); 499 500 /* 501 * Abort direct call interface if the module directly 502 * underneath the stream head is not defined with the 503 * _D_DIRECT flag. This could happen in the tcp or 504 * udp case, when some other module is autopushed 505 * above it, or for some reasons the expected module 506 * isn't purely D_MP (which is the main requirement). 507 * 508 * Else, SS_DIRECT is valid. If the read-side Q has 509 * _QSODIRECT set then and uioasync is enabled then 510 * set SS_SODIRECT to enable sodirect. 511 */ 512 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 513 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 514 int rval; 515 516 /* Continue on without direct calls */ 517 sti->sti_direct = 0; 518 519 /* 520 * Cannot issue ioctl on fallback socket since 521 * there is no conn associated with the queue. 522 * The fallback downcall will notify the proto 523 * of the change. 524 */ 525 if (!(flags & SO_ACCEPTOR) && 526 !(flags & SO_FALLBACK)) { 527 if ((error = strioctl(vp, 528 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 529 cr, &rval)) != 0) { 530 (void) sotpi_close(so, flags, 531 cr); 532 return (error); 533 } 534 } 535 } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) && 536 uioasync.enabled) { 537 /* Enable sodirect */ 538 so->so_state |= SS_SODIRECT; 539 } 540 } 541 542 if (flags & SO_FALLBACK) { 543 /* 544 * The stream created does not have a conn. 545 * do stream set up after conn has been assigned 546 */ 547 return (error); 548 } 549 if (error = so_strinit(so, tso)) { 550 (void) sotpi_close(so, flags, cr); 551 return (error); 552 } 553 554 /* Wildcard */ 555 if (so->so_protocol != so->so_sockparams->sp_protocol) { 556 int protocol = so->so_protocol; 557 /* 558 * Issue SO_PROTOTYPE setsockopt. 559 */ 560 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 561 &protocol, (t_uscalar_t)sizeof (protocol), cr); 562 if (error != 0) { 563 (void) sotpi_close(so, flags, cr); 564 /* 565 * Setsockopt often fails with ENOPROTOOPT but 566 * socket() should fail with 567 * EPROTONOSUPPORT/EPROTOTYPE. 568 */ 569 return (EPROTONOSUPPORT); 570 } 571 } 572 573 } else { 574 /* 575 * While the same socket can not be reopened (unlike specfs) 576 * the stream head sets STREOPENFAIL when the autopush fails. 577 */ 578 if ((stp != NULL) && 579 (stp->sd_flag & STREOPENFAIL)) { 580 /* 581 * Open failed part way through. 582 */ 583 mutex_enter(&stp->sd_lock); 584 stp->sd_flag &= ~STREOPENFAIL; 585 mutex_exit(&stp->sd_lock); 586 (void) sotpi_close(so, flags, cr); 587 return (error); 588 /*NOTREACHED*/ 589 } 590 ASSERT(stp == NULL); 591 } 592 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 593 "sockfs open:maj %d vp %p so %p error %d", 594 maj, vp, so, error); 595 return (error); 596 } 597 598 /* 599 * Bind the socket to an unspecified address in sockfs only. 600 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 601 * required in all cases. 602 */ 603 static void 604 so_automatic_bind(struct sonode *so) 605 { 606 sotpi_info_t *sti = SOTOTPI(so); 607 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 608 609 ASSERT(MUTEX_HELD(&so->so_lock)); 610 ASSERT(!(so->so_state & SS_ISBOUND)); 611 ASSERT(sti->sti_unbind_mp); 612 613 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 614 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 615 sti->sti_laddr_sa->sa_family = so->so_family; 616 so->so_state |= SS_ISBOUND; 617 } 618 619 620 /* 621 * bind the socket. 622 * 623 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 624 * are passed in we allow rebinding. Note that for backwards compatibility 625 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 626 * Thus the rebinding code is currently not executed. 627 * 628 * The constraints for rebinding are: 629 * - it is a SOCK_DGRAM, or 630 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 631 * and no listen() has been done. 632 * This rebinding code was added based on some language in the XNET book 633 * about not returning EINVAL it the protocol allows rebinding. However, 634 * this language is not present in the Posix socket draft. Thus maybe the 635 * rebinding logic should be deleted from the source. 636 * 637 * A null "name" can be used to unbind the socket if: 638 * - it is a SOCK_DGRAM, or 639 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 640 * and no listen() has been done. 641 */ 642 /* ARGSUSED */ 643 static int 644 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 645 socklen_t namelen, int backlog, int flags, struct cred *cr) 646 { 647 struct T_bind_req bind_req; 648 struct T_bind_ack *bind_ack; 649 int error = 0; 650 mblk_t *mp; 651 void *addr; 652 t_uscalar_t addrlen; 653 int unbind_on_err = 1; 654 boolean_t clear_acceptconn_on_err = B_FALSE; 655 boolean_t restore_backlog_on_err = B_FALSE; 656 int save_so_backlog; 657 t_scalar_t PRIM_type = O_T_BIND_REQ; 658 boolean_t tcp_udp_xport; 659 void *nl7c = NULL; 660 sotpi_info_t *sti = SOTOTPI(so); 661 662 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 663 (void *)so, (void *)name, namelen, backlog, flags, 664 pr_state(so->so_state, so->so_mode))); 665 666 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 667 668 if (!(flags & _SOBIND_LOCK_HELD)) { 669 mutex_enter(&so->so_lock); 670 so_lock_single(so); /* Set SOLOCKED */ 671 } else { 672 ASSERT(MUTEX_HELD(&so->so_lock)); 673 ASSERT(so->so_flag & SOLOCKED); 674 } 675 676 /* 677 * Make sure that there is a preallocated unbind_req message 678 * before binding. This message allocated when the socket is 679 * created but it might be have been consumed. 680 */ 681 if (sti->sti_unbind_mp == NULL) { 682 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 683 /* NOTE: holding so_lock while sleeping */ 684 sti->sti_unbind_mp = 685 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 686 cr); 687 } 688 689 if (flags & _SOBIND_REBIND) { 690 /* 691 * Called from solisten after doing an sotpi_unbind() or 692 * potentially without the unbind (latter for AF_INET{,6}). 693 */ 694 ASSERT(name == NULL && namelen == 0); 695 696 if (so->so_family == AF_UNIX) { 697 ASSERT(sti->sti_ux_bound_vp); 698 addr = &sti->sti_ux_laddr; 699 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 700 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 701 "addr 0x%p, vp %p\n", 702 addrlen, 703 (void *)((struct so_ux_addr *)addr)->soua_vp, 704 (void *)sti->sti_ux_bound_vp)); 705 } else { 706 addr = sti->sti_laddr_sa; 707 addrlen = (t_uscalar_t)sti->sti_laddr_len; 708 } 709 } else if (flags & _SOBIND_UNSPEC) { 710 ASSERT(name == NULL && namelen == 0); 711 712 /* 713 * The caller checked SS_ISBOUND but not necessarily 714 * under so_lock 715 */ 716 if (so->so_state & SS_ISBOUND) { 717 /* No error */ 718 goto done; 719 } 720 721 /* Set an initial local address */ 722 switch (so->so_family) { 723 case AF_UNIX: 724 /* 725 * Use an address with same size as struct sockaddr 726 * just like BSD. 727 */ 728 sti->sti_laddr_len = 729 (socklen_t)sizeof (struct sockaddr); 730 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 731 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 732 sti->sti_laddr_sa->sa_family = so->so_family; 733 734 /* 735 * Pass down an address with the implicit bind 736 * magic number and the rest all zeros. 737 * The transport will return a unique address. 738 */ 739 sti->sti_ux_laddr.soua_vp = NULL; 740 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 741 addr = &sti->sti_ux_laddr; 742 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 743 break; 744 745 case AF_INET: 746 case AF_INET6: 747 /* 748 * An unspecified bind in TPI has a NULL address. 749 * Set the address in sockfs to have the sa_family. 750 */ 751 sti->sti_laddr_len = (so->so_family == AF_INET) ? 752 (socklen_t)sizeof (sin_t) : 753 (socklen_t)sizeof (sin6_t); 754 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 755 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 756 sti->sti_laddr_sa->sa_family = so->so_family; 757 addr = NULL; 758 addrlen = 0; 759 break; 760 761 default: 762 /* 763 * An unspecified bind in TPI has a NULL address. 764 * Set the address in sockfs to be zero length. 765 * 766 * Can not assume there is a sa_family for all 767 * protocol families. For example, AF_X25 does not 768 * have a family field. 769 */ 770 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 771 sti->sti_laddr_len = 0; /* XXX correct? */ 772 addr = NULL; 773 addrlen = 0; 774 break; 775 } 776 777 } else { 778 if (so->so_state & SS_ISBOUND) { 779 /* 780 * If it is ok to rebind the socket, first unbind 781 * with the transport. A rebind to the NULL address 782 * is interpreted as an unbind. 783 * Note that a bind to NULL in BSD does unbind the 784 * socket but it fails with EINVAL. 785 * Note that regular sockets set SOV_SOCKBSD i.e. 786 * _SOBIND_SOCKBSD gets set here hence no type of 787 * socket does currently allow rebinding. 788 * 789 * If the name is NULL just do an unbind. 790 */ 791 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 792 name != NULL) { 793 error = EINVAL; 794 unbind_on_err = 0; 795 eprintsoline(so, error); 796 goto done; 797 } 798 if ((so->so_mode & SM_CONNREQUIRED) && 799 (so->so_state & SS_CANTREBIND)) { 800 error = EINVAL; 801 unbind_on_err = 0; 802 eprintsoline(so, error); 803 goto done; 804 } 805 error = sotpi_unbind(so, 0); 806 if (error) { 807 eprintsoline(so, error); 808 goto done; 809 } 810 ASSERT(!(so->so_state & SS_ISBOUND)); 811 if (name == NULL) { 812 so->so_state &= 813 ~(SS_ISCONNECTED|SS_ISCONNECTING); 814 goto done; 815 } 816 } 817 818 /* X/Open requires this check */ 819 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 820 if (xnet_check_print) { 821 printf("sockfs: X/Open bind state check " 822 "caused EINVAL\n"); 823 } 824 error = EINVAL; 825 goto done; 826 } 827 828 switch (so->so_family) { 829 case AF_UNIX: 830 /* 831 * All AF_UNIX addresses are nul terminated 832 * when copied (copyin_name) in so the minimum 833 * length is 3 bytes. 834 */ 835 if (name == NULL || 836 (ssize_t)namelen <= sizeof (short) + 1) { 837 error = EISDIR; 838 eprintsoline(so, error); 839 goto done; 840 } 841 /* 842 * Verify so_family matches the bound family. 843 * BSD does not check this for AF_UNIX resulting 844 * in funny mknods. 845 */ 846 if (name->sa_family != so->so_family) { 847 error = EAFNOSUPPORT; 848 goto done; 849 } 850 break; 851 case AF_INET: 852 if (name == NULL) { 853 error = EINVAL; 854 eprintsoline(so, error); 855 goto done; 856 } 857 if ((size_t)namelen != sizeof (sin_t)) { 858 error = name->sa_family != so->so_family ? 859 EAFNOSUPPORT : EINVAL; 860 eprintsoline(so, error); 861 goto done; 862 } 863 if ((flags & _SOBIND_XPG4_2) && 864 (name->sa_family != so->so_family)) { 865 /* 866 * This check has to be made for X/Open 867 * sockets however application failures have 868 * been observed when it is applied to 869 * all sockets. 870 */ 871 error = EAFNOSUPPORT; 872 eprintsoline(so, error); 873 goto done; 874 } 875 /* 876 * Force a zero sa_family to match so_family. 877 * 878 * Some programs like inetd(1M) don't set the 879 * family field. Other programs leave 880 * sin_family set to garbage - SunOS 4.X does 881 * not check the family field on a bind. 882 * We use the family field that 883 * was passed in to the socket() call. 884 */ 885 name->sa_family = so->so_family; 886 break; 887 888 case AF_INET6: { 889 #ifdef DEBUG 890 sin6_t *sin6 = (sin6_t *)name; 891 #endif /* DEBUG */ 892 893 if (name == NULL) { 894 error = EINVAL; 895 eprintsoline(so, error); 896 goto done; 897 } 898 if ((size_t)namelen != sizeof (sin6_t)) { 899 error = name->sa_family != so->so_family ? 900 EAFNOSUPPORT : EINVAL; 901 eprintsoline(so, error); 902 goto done; 903 } 904 if (name->sa_family != so->so_family) { 905 /* 906 * With IPv6 we require the family to match 907 * unlike in IPv4. 908 */ 909 error = EAFNOSUPPORT; 910 eprintsoline(so, error); 911 goto done; 912 } 913 #ifdef DEBUG 914 /* 915 * Verify that apps don't forget to clear 916 * sin6_scope_id etc 917 */ 918 if (sin6->sin6_scope_id != 0 && 919 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 920 zcmn_err(getzoneid(), CE_WARN, 921 "bind with uninitialized sin6_scope_id " 922 "(%d) on socket. Pid = %d\n", 923 (int)sin6->sin6_scope_id, 924 (int)curproc->p_pid); 925 } 926 if (sin6->__sin6_src_id != 0) { 927 zcmn_err(getzoneid(), CE_WARN, 928 "bind with uninitialized __sin6_src_id " 929 "(%d) on socket. Pid = %d\n", 930 (int)sin6->__sin6_src_id, 931 (int)curproc->p_pid); 932 } 933 #endif /* DEBUG */ 934 break; 935 } 936 default: 937 /* 938 * Don't do any length or sa_family check to allow 939 * non-sockaddr style addresses. 940 */ 941 if (name == NULL) { 942 error = EINVAL; 943 eprintsoline(so, error); 944 goto done; 945 } 946 break; 947 } 948 949 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 950 error = ENAMETOOLONG; 951 eprintsoline(so, error); 952 goto done; 953 } 954 /* 955 * Save local address. 956 */ 957 sti->sti_laddr_len = (socklen_t)namelen; 958 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 959 bcopy(name, sti->sti_laddr_sa, namelen); 960 961 addr = sti->sti_laddr_sa; 962 addrlen = (t_uscalar_t)sti->sti_laddr_len; 963 switch (so->so_family) { 964 case AF_INET6: 965 case AF_INET: 966 break; 967 case AF_UNIX: { 968 struct sockaddr_un *soun = 969 (struct sockaddr_un *)sti->sti_laddr_sa; 970 struct vnode *vp, *rvp; 971 struct vattr vattr; 972 973 ASSERT(sti->sti_ux_bound_vp == NULL); 974 /* 975 * Create vnode for the specified path name. 976 * Keep vnode held with a reference in sti_ux_bound_vp. 977 * Use the vnode pointer as the address used in the 978 * bind with the transport. 979 * 980 * Use the same mode as in BSD. In particular this does 981 * not observe the umask. 982 */ 983 /* MAXPATHLEN + soun_family + nul termination */ 984 if (sti->sti_laddr_len > 985 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 986 error = ENAMETOOLONG; 987 eprintsoline(so, error); 988 goto done; 989 } 990 vattr.va_type = VSOCK; 991 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 992 vattr.va_mask = AT_TYPE|AT_MODE; 993 /* NOTE: holding so_lock */ 994 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 995 EXCL, 0, &vp, CRMKNOD, 0, 0); 996 if (error) { 997 if (error == EEXIST) 998 error = EADDRINUSE; 999 eprintsoline(so, error); 1000 goto done; 1001 } 1002 /* 1003 * Establish pointer from the underlying filesystem 1004 * vnode to the socket node. 1005 * sti_ux_bound_vp and v_stream->sd_vnode form the 1006 * cross-linkage between the underlying filesystem 1007 * node and the socket node. 1008 */ 1009 1010 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 1011 VN_HOLD(rvp); 1012 VN_RELE(vp); 1013 vp = rvp; 1014 } 1015 1016 ASSERT(SOTOV(so)->v_stream); 1017 mutex_enter(&vp->v_lock); 1018 vp->v_stream = SOTOV(so)->v_stream; 1019 sti->sti_ux_bound_vp = vp; 1020 mutex_exit(&vp->v_lock); 1021 1022 /* 1023 * Use the vnode pointer value as a unique address 1024 * (together with the magic number to avoid conflicts 1025 * with implicit binds) in the transport provider. 1026 */ 1027 sti->sti_ux_laddr.soua_vp = 1028 (void *)sti->sti_ux_bound_vp; 1029 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1030 addr = &sti->sti_ux_laddr; 1031 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1032 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1033 addrlen, 1034 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1035 break; 1036 } 1037 } /* end switch (so->so_family) */ 1038 } 1039 1040 /* 1041 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1042 * the transport can start passing up T_CONN_IND messages 1043 * as soon as it receives the bind req and strsock_proto() 1044 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1045 */ 1046 if (flags & _SOBIND_LISTEN) { 1047 if ((so->so_state & SS_ACCEPTCONN) == 0) 1048 clear_acceptconn_on_err = B_TRUE; 1049 save_so_backlog = so->so_backlog; 1050 restore_backlog_on_err = B_TRUE; 1051 so->so_state |= SS_ACCEPTCONN; 1052 so->so_backlog = backlog; 1053 } 1054 1055 /* 1056 * If NL7C addr(s) have been configured check for addr/port match, 1057 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1058 * 1059 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1060 * family sockets only. If match mark as such. 1061 */ 1062 if (nl7c_enabled && ((addr != NULL && 1063 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1064 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1065 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1066 /* 1067 * NL7C is not supported in non-global zones, 1068 * we enforce this restriction here. 1069 */ 1070 if (so->so_zoneid == GLOBAL_ZONEID) { 1071 /* An NL7C socket, mark it */ 1072 sti->sti_nl7c_flags |= NL7C_ENABLED; 1073 if (nl7c == NULL) { 1074 /* 1075 * Was an AF_NCA bind() so add it to the 1076 * addr list for reporting purposes. 1077 */ 1078 nl7c = nl7c_add_addr(addr, addrlen); 1079 } 1080 } else 1081 nl7c = NULL; 1082 } 1083 1084 /* 1085 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1086 * for other transports we will send in a O_T_BIND_REQ. 1087 */ 1088 if (tcp_udp_xport && 1089 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1090 PRIM_type = T_BIND_REQ; 1091 1092 bind_req.PRIM_type = PRIM_type; 1093 bind_req.ADDR_length = addrlen; 1094 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1095 bind_req.CONIND_number = backlog; 1096 /* NOTE: holding so_lock while sleeping */ 1097 mp = soallocproto2(&bind_req, sizeof (bind_req), 1098 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1099 sti->sti_laddr_valid = 0; 1100 1101 /* Done using sti_laddr_sa - can drop the lock */ 1102 mutex_exit(&so->so_lock); 1103 1104 /* 1105 * Intercept the bind_req message here to check if this <address/port> 1106 * was configured as an SSL proxy server, or if another endpoint was 1107 * already configured to act as a proxy for us. 1108 * 1109 * Note, only if NL7C not enabled for this socket. 1110 */ 1111 if (nl7c == NULL && 1112 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1113 so->so_type == SOCK_STREAM) { 1114 1115 if (sti->sti_kssl_ent != NULL) { 1116 kssl_release_ent(sti->sti_kssl_ent, so, 1117 sti->sti_kssl_type); 1118 sti->sti_kssl_ent = NULL; 1119 } 1120 1121 sti->sti_kssl_type = kssl_check_proxy(mp, so, 1122 &sti->sti_kssl_ent); 1123 switch (sti->sti_kssl_type) { 1124 case KSSL_NO_PROXY: 1125 break; 1126 1127 case KSSL_HAS_PROXY: 1128 mutex_enter(&so->so_lock); 1129 goto skip_transport; 1130 1131 case KSSL_IS_PROXY: 1132 break; 1133 } 1134 } 1135 1136 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1137 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1138 if (error) { 1139 eprintsoline(so, error); 1140 mutex_enter(&so->so_lock); 1141 goto done; 1142 } 1143 1144 mutex_enter(&so->so_lock); 1145 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1146 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1147 if (error) { 1148 eprintsoline(so, error); 1149 goto done; 1150 } 1151 skip_transport: 1152 ASSERT(mp); 1153 /* 1154 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1155 * strsock_proto while the lock was dropped above, the bind 1156 * is allowed to complete. 1157 */ 1158 1159 /* Mark as bound. This will be undone if we detect errors below. */ 1160 if (flags & _SOBIND_NOXLATE) { 1161 ASSERT(so->so_family == AF_UNIX); 1162 sti->sti_faddr_noxlate = 1; 1163 } 1164 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1165 so->so_state |= SS_ISBOUND; 1166 ASSERT(sti->sti_unbind_mp); 1167 1168 /* note that we've already set SS_ACCEPTCONN above */ 1169 1170 /* 1171 * Recompute addrlen - an unspecied bind sent down an 1172 * address of length zero but we expect the appropriate length 1173 * in return. 1174 */ 1175 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1176 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1177 1178 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1179 /* 1180 * The alignment restriction is really too strict but 1181 * we want enough alignment to inspect the fields of 1182 * a sockaddr_in. 1183 */ 1184 addr = sogetoff(mp, bind_ack->ADDR_offset, 1185 bind_ack->ADDR_length, 1186 __TPI_ALIGN_SIZE); 1187 if (addr == NULL) { 1188 freemsg(mp); 1189 error = EPROTO; 1190 eprintsoline(so, error); 1191 goto done; 1192 } 1193 if (!(flags & _SOBIND_UNSPEC)) { 1194 /* 1195 * Verify that the transport didn't return something we 1196 * did not want e.g. an address other than what we asked for. 1197 * 1198 * NOTE: These checks would go away if/when we switch to 1199 * using the new TPI (in which the transport would fail 1200 * the request instead of assigning a different address). 1201 * 1202 * NOTE2: For protocols that we don't know (i.e. any 1203 * other than AF_INET6, AF_INET and AF_UNIX), we 1204 * cannot know if the transport should be expected to 1205 * return the same address as that requested. 1206 * 1207 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1208 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1209 * 1210 * For example, in the case of netatalk it may be 1211 * inappropriate for the transport to return the 1212 * requested address (as it may have allocated a local 1213 * port number in behaviour similar to that of an 1214 * AF_INET bind request with a port number of zero). 1215 * 1216 * Given the definition of O_T_BIND_REQ, where the 1217 * transport may bind to an address other than the 1218 * requested address, it's not possible to determine 1219 * whether a returned address that differs from the 1220 * requested address is a reason to fail (because the 1221 * requested address was not available) or succeed 1222 * (because the transport allocated an appropriate 1223 * address and/or port). 1224 * 1225 * sockfs currently requires that the transport return 1226 * the requested address in the T_BIND_ACK, unless 1227 * there is code here to allow for any discrepancy. 1228 * Such code exists for AF_INET and AF_INET6. 1229 * 1230 * Netatalk chooses to return the requested address 1231 * rather than the (correct) allocated address. This 1232 * means that netatalk violates the TPI specification 1233 * (and would not function correctly if used from a 1234 * TLI application), but it does mean that it works 1235 * with sockfs. 1236 * 1237 * As noted above, using the newer XTI bind primitive 1238 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1239 * allow sockfs to be more sure about whether or not 1240 * the bind request had succeeded (as transports are 1241 * not permitted to bind to a different address than 1242 * that requested - they must return failure). 1243 * Unfortunately, support for T_BIND_REQ may not be 1244 * present in all transport implementations (netatalk, 1245 * for example, doesn't have it), making the 1246 * transition difficult. 1247 */ 1248 if (bind_ack->ADDR_length != addrlen) { 1249 /* Assumes that the requested address was in use */ 1250 freemsg(mp); 1251 error = EADDRINUSE; 1252 eprintsoline(so, error); 1253 goto done; 1254 } 1255 1256 switch (so->so_family) { 1257 case AF_INET6: 1258 case AF_INET: { 1259 sin_t *rname, *aname; 1260 1261 rname = (sin_t *)addr; 1262 aname = (sin_t *)sti->sti_laddr_sa; 1263 1264 /* 1265 * Take advantage of the alignment 1266 * of sin_port and sin6_port which fall 1267 * in the same place in their data structures. 1268 * Just use sin_port for either address family. 1269 * 1270 * This may become a problem if (heaven forbid) 1271 * there's a separate ipv6port_reserved... :-P 1272 * 1273 * Binding to port 0 has the semantics of letting 1274 * the transport bind to any port. 1275 * 1276 * If the transport is TCP or UDP since we had sent 1277 * a T_BIND_REQ we would not get a port other than 1278 * what we asked for. 1279 */ 1280 if (tcp_udp_xport) { 1281 /* 1282 * Pick up the new port number if we bound to 1283 * port 0. 1284 */ 1285 if (aname->sin_port == 0) 1286 aname->sin_port = rname->sin_port; 1287 sti->sti_laddr_valid = 1; 1288 break; 1289 } 1290 if (aname->sin_port != 0 && 1291 aname->sin_port != rname->sin_port) { 1292 freemsg(mp); 1293 error = EADDRINUSE; 1294 eprintsoline(so, error); 1295 goto done; 1296 } 1297 /* 1298 * Pick up the new port number if we bound to port 0. 1299 */ 1300 aname->sin_port = rname->sin_port; 1301 1302 /* 1303 * Unfortunately, addresses aren't _quite_ the same. 1304 */ 1305 if (so->so_family == AF_INET) { 1306 if (aname->sin_addr.s_addr != 1307 rname->sin_addr.s_addr) { 1308 freemsg(mp); 1309 error = EADDRNOTAVAIL; 1310 eprintsoline(so, error); 1311 goto done; 1312 } 1313 } else { 1314 sin6_t *rname6 = (sin6_t *)rname; 1315 sin6_t *aname6 = (sin6_t *)aname; 1316 1317 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1318 &rname6->sin6_addr)) { 1319 freemsg(mp); 1320 error = EADDRNOTAVAIL; 1321 eprintsoline(so, error); 1322 goto done; 1323 } 1324 } 1325 break; 1326 } 1327 case AF_UNIX: 1328 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1329 freemsg(mp); 1330 error = EADDRINUSE; 1331 eprintsoline(so, error); 1332 eprintso(so, 1333 ("addrlen %d, addr 0x%x, vp %p\n", 1334 addrlen, *((int *)addr), 1335 (void *)sti->sti_ux_bound_vp)); 1336 goto done; 1337 } 1338 sti->sti_laddr_valid = 1; 1339 break; 1340 default: 1341 /* 1342 * NOTE: This assumes that addresses can be 1343 * byte-compared for equivalence. 1344 */ 1345 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1346 freemsg(mp); 1347 error = EADDRINUSE; 1348 eprintsoline(so, error); 1349 goto done; 1350 } 1351 /* 1352 * Don't mark sti_laddr_valid, as we cannot be 1353 * sure that the returned address is the real 1354 * bound address when talking to an unknown 1355 * transport. 1356 */ 1357 break; 1358 } 1359 } else { 1360 /* 1361 * Save for returned address for getsockname. 1362 * Needed for unspecific bind unless transport supports 1363 * the TI_GETMYNAME ioctl. 1364 * Do this for AF_INET{,6} even though they do, as 1365 * caching info here is much better performance than 1366 * a TPI/STREAMS trip to the transport for getsockname. 1367 * Any which can't for some reason _must_ _not_ set 1368 * sti_laddr_valid here for the caching version of 1369 * getsockname to not break; 1370 */ 1371 switch (so->so_family) { 1372 case AF_UNIX: 1373 /* 1374 * Record the address bound with the transport 1375 * for use by socketpair. 1376 */ 1377 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1378 sti->sti_laddr_valid = 1; 1379 break; 1380 case AF_INET: 1381 case AF_INET6: 1382 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1383 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1384 sti->sti_laddr_valid = 1; 1385 break; 1386 default: 1387 /* 1388 * Don't mark sti_laddr_valid, as we cannot be 1389 * sure that the returned address is the real 1390 * bound address when talking to an unknown 1391 * transport. 1392 */ 1393 break; 1394 } 1395 } 1396 1397 if (nl7c != NULL) { 1398 /* Register listen()er sonode pointer with NL7C */ 1399 nl7c_listener_addr(nl7c, so); 1400 } 1401 1402 freemsg(mp); 1403 1404 done: 1405 if (error) { 1406 /* reset state & backlog to values held on entry */ 1407 if (clear_acceptconn_on_err == B_TRUE) 1408 so->so_state &= ~SS_ACCEPTCONN; 1409 if (restore_backlog_on_err == B_TRUE) 1410 so->so_backlog = save_so_backlog; 1411 1412 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1413 int err; 1414 1415 err = sotpi_unbind(so, 0); 1416 /* LINTED - statement has no consequent: if */ 1417 if (err) { 1418 eprintsoline(so, error); 1419 } else { 1420 ASSERT(!(so->so_state & SS_ISBOUND)); 1421 } 1422 } 1423 } 1424 if (!(flags & _SOBIND_LOCK_HELD)) { 1425 so_unlock_single(so, SOLOCKED); 1426 mutex_exit(&so->so_lock); 1427 } else { 1428 ASSERT(MUTEX_HELD(&so->so_lock)); 1429 ASSERT(so->so_flag & SOLOCKED); 1430 } 1431 return (error); 1432 } 1433 1434 /* bind the socket */ 1435 static int 1436 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1437 int flags, struct cred *cr) 1438 { 1439 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1440 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1441 1442 flags &= ~_SOBIND_SOCKETPAIR; 1443 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1444 } 1445 1446 /* 1447 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1448 * address, or when listen needs to unbind and bind. 1449 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1450 * so that a sobind can pick them up. 1451 */ 1452 static int 1453 sotpi_unbind(struct sonode *so, int flags) 1454 { 1455 struct T_unbind_req unbind_req; 1456 int error = 0; 1457 mblk_t *mp; 1458 sotpi_info_t *sti = SOTOTPI(so); 1459 1460 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1461 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1462 1463 ASSERT(MUTEX_HELD(&so->so_lock)); 1464 ASSERT(so->so_flag & SOLOCKED); 1465 1466 if (!(so->so_state & SS_ISBOUND)) { 1467 error = EINVAL; 1468 eprintsoline(so, error); 1469 goto done; 1470 } 1471 1472 mutex_exit(&so->so_lock); 1473 1474 /* 1475 * Flush the read and write side (except stream head read queue) 1476 * and send down T_UNBIND_REQ. 1477 */ 1478 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1479 1480 unbind_req.PRIM_type = T_UNBIND_REQ; 1481 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1482 0, _ALLOC_SLEEP, CRED()); 1483 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1484 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1485 mutex_enter(&so->so_lock); 1486 if (error) { 1487 eprintsoline(so, error); 1488 goto done; 1489 } 1490 1491 error = sowaitokack(so, T_UNBIND_REQ); 1492 if (error) { 1493 eprintsoline(so, error); 1494 goto done; 1495 } 1496 1497 /* 1498 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1499 * strsock_proto while the lock was dropped above, the unbind 1500 * is allowed to complete. 1501 */ 1502 if (!(flags & _SOUNBIND_REBIND)) { 1503 /* 1504 * Clear out bound address. 1505 */ 1506 vnode_t *vp; 1507 1508 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1509 1510 /* Undo any SSL proxy setup */ 1511 if ((so->so_family == AF_INET || 1512 so->so_family == AF_INET6) && 1513 (so->so_type == SOCK_STREAM) && 1514 (sti->sti_kssl_ent != NULL)) { 1515 kssl_release_ent(sti->sti_kssl_ent, so, 1516 sti->sti_kssl_type); 1517 sti->sti_kssl_ent = NULL; 1518 sti->sti_kssl_type = KSSL_NO_PROXY; 1519 } 1520 sti->sti_ux_bound_vp = NULL; 1521 vn_rele_stream(vp); 1522 } 1523 /* Clear out address */ 1524 sti->sti_laddr_len = 0; 1525 } 1526 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1527 sti->sti_laddr_valid = 0; 1528 1529 done: 1530 1531 /* If the caller held the lock don't release it here */ 1532 ASSERT(MUTEX_HELD(&so->so_lock)); 1533 ASSERT(so->so_flag & SOLOCKED); 1534 1535 return (error); 1536 } 1537 1538 /* 1539 * listen on the socket. 1540 * For TPI conforming transports this has to first unbind with the transport 1541 * and then bind again using the new backlog. 1542 */ 1543 /* ARGSUSED */ 1544 int 1545 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1546 { 1547 int error = 0; 1548 sotpi_info_t *sti = SOTOTPI(so); 1549 1550 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1551 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1552 1553 if (sti->sti_serv_type == T_CLTS) 1554 return (EOPNOTSUPP); 1555 1556 /* 1557 * If the socket is ready to accept connections already, then 1558 * return without doing anything. This avoids a problem where 1559 * a second listen() call fails if a connection is pending and 1560 * leaves the socket unbound. Only when we are not unbinding 1561 * with the transport can we safely increase the backlog. 1562 */ 1563 if (so->so_state & SS_ACCEPTCONN && 1564 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1565 /*CONSTCOND*/ 1566 !solisten_tpi_tcp)) 1567 return (0); 1568 1569 if (so->so_state & SS_ISCONNECTED) 1570 return (EINVAL); 1571 1572 mutex_enter(&so->so_lock); 1573 so_lock_single(so); /* Set SOLOCKED */ 1574 1575 /* 1576 * If the listen doesn't change the backlog we do nothing. 1577 * This avoids an EPROTO error from the transport. 1578 */ 1579 if ((so->so_state & SS_ACCEPTCONN) && 1580 so->so_backlog == backlog) 1581 goto done; 1582 1583 if (!(so->so_state & SS_ISBOUND)) { 1584 /* 1585 * Must have been explicitly bound in the UNIX domain. 1586 */ 1587 if (so->so_family == AF_UNIX) { 1588 error = EINVAL; 1589 goto done; 1590 } 1591 error = sotpi_bindlisten(so, NULL, 0, backlog, 1592 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1593 } else if (backlog > 0) { 1594 /* 1595 * AF_INET{,6} hack to avoid losing the port. 1596 * Assumes that all AF_INET{,6} transports can handle a 1597 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1598 * has already bound thus it is possible to avoid the unbind. 1599 */ 1600 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1601 /*CONSTCOND*/ 1602 !solisten_tpi_tcp)) { 1603 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1604 if (error) 1605 goto done; 1606 } 1607 error = sotpi_bindlisten(so, NULL, 0, backlog, 1608 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1609 } else { 1610 so->so_state |= SS_ACCEPTCONN; 1611 so->so_backlog = backlog; 1612 } 1613 if (error) 1614 goto done; 1615 ASSERT(so->so_state & SS_ACCEPTCONN); 1616 done: 1617 so_unlock_single(so, SOLOCKED); 1618 mutex_exit(&so->so_lock); 1619 return (error); 1620 } 1621 1622 /* 1623 * Disconnect either a specified seqno or all (-1). 1624 * The former is used on listening sockets only. 1625 * 1626 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1627 * the current use of sodisconnect(seqno == -1) is only for shutdown 1628 * so there is no point (and potentially incorrect) to unbind. 1629 */ 1630 static int 1631 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1632 { 1633 struct T_discon_req discon_req; 1634 int error = 0; 1635 mblk_t *mp; 1636 1637 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1638 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1639 1640 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1641 mutex_enter(&so->so_lock); 1642 so_lock_single(so); /* Set SOLOCKED */ 1643 } else { 1644 ASSERT(MUTEX_HELD(&so->so_lock)); 1645 ASSERT(so->so_flag & SOLOCKED); 1646 } 1647 1648 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1649 error = EINVAL; 1650 eprintsoline(so, error); 1651 goto done; 1652 } 1653 1654 mutex_exit(&so->so_lock); 1655 /* 1656 * Flush the write side (unless this is a listener) 1657 * and then send down a T_DISCON_REQ. 1658 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1659 * and other messages.) 1660 */ 1661 if (!(so->so_state & SS_ACCEPTCONN)) 1662 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1663 1664 discon_req.PRIM_type = T_DISCON_REQ; 1665 discon_req.SEQ_number = seqno; 1666 mp = soallocproto1(&discon_req, sizeof (discon_req), 1667 0, _ALLOC_SLEEP, CRED()); 1668 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1669 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1670 mutex_enter(&so->so_lock); 1671 if (error) { 1672 eprintsoline(so, error); 1673 goto done; 1674 } 1675 1676 error = sowaitokack(so, T_DISCON_REQ); 1677 if (error) { 1678 eprintsoline(so, error); 1679 goto done; 1680 } 1681 /* 1682 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1683 * strsock_proto while the lock was dropped above, the disconnect 1684 * is allowed to complete. However, it is not possible to 1685 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1686 */ 1687 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1688 SOTOTPI(so)->sti_laddr_valid = 0; 1689 SOTOTPI(so)->sti_faddr_valid = 0; 1690 done: 1691 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1692 so_unlock_single(so, SOLOCKED); 1693 mutex_exit(&so->so_lock); 1694 } else { 1695 /* If the caller held the lock don't release it here */ 1696 ASSERT(MUTEX_HELD(&so->so_lock)); 1697 ASSERT(so->so_flag & SOLOCKED); 1698 } 1699 return (error); 1700 } 1701 1702 /* ARGSUSED */ 1703 int 1704 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1705 struct sonode **nsop) 1706 { 1707 struct T_conn_ind *conn_ind; 1708 struct T_conn_res *conn_res; 1709 int error = 0; 1710 mblk_t *mp, *ctxmp, *ack_mp; 1711 struct sonode *nso; 1712 vnode_t *nvp; 1713 void *src; 1714 t_uscalar_t srclen; 1715 void *opt; 1716 t_uscalar_t optlen; 1717 t_scalar_t PRIM_type; 1718 t_scalar_t SEQ_number; 1719 size_t sinlen; 1720 sotpi_info_t *sti = SOTOTPI(so); 1721 sotpi_info_t *nsti; 1722 1723 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1724 (void *)so, fflag, (void *)nsop, 1725 pr_state(so->so_state, so->so_mode))); 1726 1727 /* 1728 * Defer single-threading the accepting socket until 1729 * the T_CONN_IND has been received and parsed and the 1730 * new sonode has been opened. 1731 */ 1732 1733 /* Check that we are not already connected */ 1734 if ((so->so_state & SS_ACCEPTCONN) == 0) 1735 goto conn_bad; 1736 again: 1737 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1738 goto e_bad; 1739 1740 ASSERT(mp != NULL); 1741 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1742 ctxmp = mp->b_cont; 1743 1744 /* 1745 * Save SEQ_number for error paths. 1746 */ 1747 SEQ_number = conn_ind->SEQ_number; 1748 1749 srclen = conn_ind->SRC_length; 1750 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1751 if (src == NULL) { 1752 error = EPROTO; 1753 freemsg(mp); 1754 eprintsoline(so, error); 1755 goto disconnect_unlocked; 1756 } 1757 optlen = conn_ind->OPT_length; 1758 switch (so->so_family) { 1759 case AF_INET: 1760 case AF_INET6: 1761 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1762 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1763 &opt, conn_ind->OPT_length); 1764 } else { 1765 /* 1766 * The transport (in this case TCP) hasn't sent up 1767 * a pointer to an instance for the accept fast-path. 1768 * Disable fast-path completely because the call to 1769 * sotpi_create() below would otherwise create an 1770 * incomplete TCP instance, which would lead to 1771 * problems when sockfs sends a normal T_CONN_RES 1772 * message down the new stream. 1773 */ 1774 if (sti->sti_direct) { 1775 int rval; 1776 /* 1777 * For consistency we inform tcp to disable 1778 * direct interface on the listener, though 1779 * we can certainly live without doing this 1780 * because no data will ever travel upstream 1781 * on the listening socket. 1782 */ 1783 sti->sti_direct = 0; 1784 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1785 0, 0, K_TO_K, cr, &rval); 1786 } 1787 opt = NULL; 1788 optlen = 0; 1789 } 1790 break; 1791 case AF_UNIX: 1792 default: 1793 if (optlen != 0) { 1794 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1795 __TPI_ALIGN_SIZE); 1796 if (opt == NULL) { 1797 error = EPROTO; 1798 freemsg(mp); 1799 eprintsoline(so, error); 1800 goto disconnect_unlocked; 1801 } 1802 } 1803 if (so->so_family == AF_UNIX) { 1804 if (!sti->sti_faddr_noxlate) { 1805 src = NULL; 1806 srclen = 0; 1807 } 1808 /* Extract src address from options */ 1809 if (optlen != 0) 1810 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1811 } 1812 break; 1813 } 1814 1815 /* 1816 * Create the new socket. 1817 */ 1818 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1819 if (nso == NULL) { 1820 ASSERT(error != 0); 1821 /* 1822 * Accept can not fail with ENOBUFS. sotpi_create 1823 * sleeps waiting for memory until a signal is caught 1824 * so return EINTR. 1825 */ 1826 freemsg(mp); 1827 if (error == ENOBUFS) 1828 error = EINTR; 1829 goto e_disc_unl; 1830 } 1831 nvp = SOTOV(nso); 1832 nsti = SOTOTPI(nso); 1833 1834 /* 1835 * If the transport sent up an SSL connection context, then attach 1836 * it the new socket, and set the (sd_wputdatafunc)() and 1837 * (sd_rputdatafunc)() stream head hooks to intercept and process 1838 * SSL records. 1839 */ 1840 if (ctxmp != NULL) { 1841 /* 1842 * This kssl_ctx_t is already held for us by the transport. 1843 * So, we don't need to do a kssl_hold_ctx() here. 1844 */ 1845 nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1846 freemsg(ctxmp); 1847 mp->b_cont = NULL; 1848 strsetrwputdatahooks(nvp, strsock_kssl_input, 1849 strsock_kssl_output); 1850 1851 /* Disable sodirect if any */ 1852 if (nso->so_direct != NULL) { 1853 mutex_enter(nso->so_direct->sod_lockp); 1854 SOD_DISABLE(nso->so_direct); 1855 mutex_exit(nso->so_direct->sod_lockp); 1856 } 1857 } 1858 #ifdef DEBUG 1859 /* 1860 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1861 * it's inherited early to allow debugging of the accept code itself. 1862 */ 1863 nso->so_options |= so->so_options & SO_DEBUG; 1864 #endif /* DEBUG */ 1865 1866 /* 1867 * Save the SRC address from the T_CONN_IND 1868 * for getpeername to work on AF_UNIX and on transports that do not 1869 * support TI_GETPEERNAME. 1870 * 1871 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1872 * copyin_name(). 1873 */ 1874 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1875 error = EINVAL; 1876 freemsg(mp); 1877 eprintsoline(so, error); 1878 goto disconnect_vp_unlocked; 1879 } 1880 nsti->sti_faddr_len = (socklen_t)srclen; 1881 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1882 bcopy(src, nsti->sti_faddr_sa, srclen); 1883 nsti->sti_faddr_valid = 1; 1884 1885 /* 1886 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1887 */ 1888 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1889 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1890 cred_t *cr; 1891 pid_t cpid; 1892 1893 cr = msg_getcred(mp, &cpid); 1894 if (cr != NULL) { 1895 crhold(cr); 1896 nso->so_peercred = cr; 1897 nso->so_cpid = cpid; 1898 } 1899 freemsg(mp); 1900 1901 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1902 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1903 if (mp == NULL) { 1904 /* 1905 * Accept can not fail with ENOBUFS. 1906 * A signal was caught so return EINTR. 1907 */ 1908 error = EINTR; 1909 eprintsoline(so, error); 1910 goto disconnect_vp_unlocked; 1911 } 1912 conn_res = (struct T_conn_res *)mp->b_rptr; 1913 } else { 1914 /* 1915 * For efficency reasons we use msg_extractcred; no crhold 1916 * needed since db_credp is cleared (i.e., we move the cred 1917 * from the message to so_peercred. 1918 */ 1919 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1920 1921 mp->b_rptr = DB_BASE(mp); 1922 conn_res = (struct T_conn_res *)mp->b_rptr; 1923 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1924 1925 mblk_setcred(mp, cr, curproc->p_pid); 1926 } 1927 1928 /* 1929 * New socket must be bound at least in sockfs and, except for AF_INET, 1930 * (or AF_INET6) it also has to be bound in the transport provider. 1931 * We set the local address in the sonode from the T_OK_ACK of the 1932 * T_CONN_RES. For this reason the address we bind to here isn't 1933 * important. 1934 */ 1935 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1936 /*CONSTCOND*/ 1937 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1938 /* 1939 * Optimization for AF_INET{,6} transports 1940 * that can handle a T_CONN_RES without being bound. 1941 */ 1942 mutex_enter(&nso->so_lock); 1943 so_automatic_bind(nso); 1944 mutex_exit(&nso->so_lock); 1945 } else { 1946 /* Perform NULL bind with the transport provider. */ 1947 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1948 cr)) != 0) { 1949 ASSERT(error != ENOBUFS); 1950 freemsg(mp); 1951 eprintsoline(nso, error); 1952 goto disconnect_vp_unlocked; 1953 } 1954 } 1955 1956 /* 1957 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1958 * so that any data arriving on the new socket will cause the 1959 * appropriate signals to be delivered for the new socket. 1960 * 1961 * No other thread (except strsock_proto and strsock_misc) 1962 * can access the new socket thus we relax the locking. 1963 */ 1964 nso->so_pgrp = so->so_pgrp; 1965 nso->so_state |= so->so_state & SS_ASYNC; 1966 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1967 1968 if (nso->so_pgrp != 0) { 1969 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1970 eprintsoline(nso, error); 1971 error = 0; 1972 nso->so_pgrp = 0; 1973 } 1974 } 1975 1976 /* 1977 * Make note of the socket level options. TCP and IP level options 1978 * are already inherited. We could do all this after accept is 1979 * successful but doing it here simplifies code and no harm done 1980 * for error case. 1981 */ 1982 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1983 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1984 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1985 nso->so_sndbuf = so->so_sndbuf; 1986 nso->so_rcvbuf = so->so_rcvbuf; 1987 if (nso->so_options & SO_LINGER) 1988 nso->so_linger = so->so_linger; 1989 1990 /* 1991 * Note that the following sti_direct code path should be 1992 * removed once we are confident that the direct sockets 1993 * do not result in any degradation. 1994 */ 1995 if (sti->sti_direct) { 1996 1997 ASSERT(opt != NULL); 1998 1999 conn_res->OPT_length = optlen; 2000 conn_res->OPT_offset = MBLKL(mp); 2001 bcopy(&opt, mp->b_wptr, optlen); 2002 mp->b_wptr += optlen; 2003 conn_res->PRIM_type = T_CONN_RES; 2004 conn_res->ACCEPTOR_id = 0; 2005 PRIM_type = T_CONN_RES; 2006 2007 /* Send down the T_CONN_RES on acceptor STREAM */ 2008 error = kstrputmsg(SOTOV(nso), mp, NULL, 2009 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2010 if (error) { 2011 mutex_enter(&so->so_lock); 2012 so_lock_single(so); 2013 eprintsoline(so, error); 2014 goto disconnect_vp; 2015 } 2016 mutex_enter(&nso->so_lock); 2017 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 2018 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2019 if (error) { 2020 mutex_exit(&nso->so_lock); 2021 mutex_enter(&so->so_lock); 2022 so_lock_single(so); 2023 eprintsoline(so, error); 2024 goto disconnect_vp; 2025 } 2026 if (nso->so_family == AF_INET) { 2027 sin_t *sin; 2028 2029 sin = (sin_t *)(ack_mp->b_rptr + 2030 sizeof (struct T_ok_ack)); 2031 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 2032 nsti->sti_laddr_len = sizeof (sin_t); 2033 } else { 2034 sin6_t *sin6; 2035 2036 sin6 = (sin6_t *)(ack_mp->b_rptr + 2037 sizeof (struct T_ok_ack)); 2038 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 2039 nsti->sti_laddr_len = sizeof (sin6_t); 2040 } 2041 freemsg(ack_mp); 2042 2043 nso->so_state |= SS_ISCONNECTED; 2044 nso->so_proto_handle = (sock_lower_handle_t)opt; 2045 nsti->sti_laddr_valid = 1; 2046 2047 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 2048 /* 2049 * A NL7C marked listen()er so the new socket 2050 * inherits the listen()er's NL7C state, except 2051 * for NL7C_POLLIN. 2052 * 2053 * Only call NL7C to process the new socket if 2054 * the listen socket allows blocking i/o. 2055 */ 2056 nsti->sti_nl7c_flags = 2057 sti->sti_nl7c_flags & (~NL7C_POLLIN); 2058 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 2059 /* 2060 * Nonblocking accept() just make it 2061 * persist to defer processing to the 2062 * read-side syscall (e.g. read). 2063 */ 2064 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 2065 } else if (nl7c_process(nso, B_FALSE)) { 2066 /* 2067 * NL7C has completed processing on the 2068 * socket, close the socket and back to 2069 * the top to await the next T_CONN_IND. 2070 */ 2071 mutex_exit(&nso->so_lock); 2072 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 2073 cr, NULL); 2074 VN_RELE(nvp); 2075 goto again; 2076 } 2077 /* Pass the new socket out */ 2078 } 2079 2080 mutex_exit(&nso->so_lock); 2081 2082 /* 2083 * It's possible, through the use of autopush for example, 2084 * that the acceptor stream may not support sti_direct 2085 * semantics. If the new socket does not support sti_direct 2086 * we issue a _SIOCSOCKFALLBACK to inform the transport 2087 * as we would in the I_PUSH case. 2088 */ 2089 if (nsti->sti_direct == 0) { 2090 int rval; 2091 2092 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2093 0, 0, K_TO_K, cr, &rval)) != 0) { 2094 mutex_enter(&so->so_lock); 2095 so_lock_single(so); 2096 eprintsoline(so, error); 2097 goto disconnect_vp; 2098 } 2099 } 2100 2101 /* 2102 * Pass out new socket. 2103 */ 2104 if (nsop != NULL) 2105 *nsop = nso; 2106 2107 return (0); 2108 } 2109 2110 /* 2111 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2112 * which don't support the FireEngine accept fast-path. It is also 2113 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2114 * again. Neither sockfs nor TCP attempt to find out if some other 2115 * random module has been inserted in between (in which case we 2116 * should follow TLI accept behaviour). We blindly assume the worst 2117 * case and revert back to old behaviour i.e. TCP will not send us 2118 * any option (eager) and the accept should happen on the listener 2119 * queue. Any queued T_conn_ind have already got their options removed 2120 * by so_sock2_stream() when "sockmod" was I_POP'd. 2121 */ 2122 /* 2123 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2124 */ 2125 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2126 #ifdef _ILP32 2127 queue_t *q; 2128 2129 /* 2130 * Find read queue in driver 2131 * Can safely do this since we "own" nso/nvp. 2132 */ 2133 q = strvp2wq(nvp)->q_next; 2134 while (SAMESTR(q)) 2135 q = q->q_next; 2136 q = RD(q); 2137 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2138 #else 2139 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2140 #endif /* _ILP32 */ 2141 conn_res->PRIM_type = O_T_CONN_RES; 2142 PRIM_type = O_T_CONN_RES; 2143 } else { 2144 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2145 conn_res->PRIM_type = T_CONN_RES; 2146 PRIM_type = T_CONN_RES; 2147 } 2148 conn_res->SEQ_number = SEQ_number; 2149 conn_res->OPT_length = 0; 2150 conn_res->OPT_offset = 0; 2151 2152 mutex_enter(&so->so_lock); 2153 so_lock_single(so); /* Set SOLOCKED */ 2154 mutex_exit(&so->so_lock); 2155 2156 error = kstrputmsg(SOTOV(so), mp, NULL, 2157 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2158 mutex_enter(&so->so_lock); 2159 if (error) { 2160 eprintsoline(so, error); 2161 goto disconnect_vp; 2162 } 2163 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2164 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2165 if (error) { 2166 eprintsoline(so, error); 2167 goto disconnect_vp; 2168 } 2169 /* 2170 * If there is a sin/sin6 appended onto the T_OK_ACK use 2171 * that to set the local address. If this is not present 2172 * then we zero out the address and don't set the 2173 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2174 * the pathname from the listening socket. 2175 */ 2176 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2177 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2178 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2179 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2180 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2181 nsti->sti_laddr_len = sinlen; 2182 nsti->sti_laddr_valid = 1; 2183 } else if (nso->so_family == AF_UNIX) { 2184 ASSERT(so->so_family == AF_UNIX); 2185 nsti->sti_laddr_len = sti->sti_laddr_len; 2186 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2187 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2188 nsti->sti_laddr_len); 2189 nsti->sti_laddr_valid = 1; 2190 } else { 2191 nsti->sti_laddr_len = sti->sti_laddr_len; 2192 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2193 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2194 nsti->sti_laddr_sa->sa_family = nso->so_family; 2195 } 2196 freemsg(ack_mp); 2197 2198 so_unlock_single(so, SOLOCKED); 2199 mutex_exit(&so->so_lock); 2200 2201 nso->so_state |= SS_ISCONNECTED; 2202 2203 /* 2204 * Pass out new socket. 2205 */ 2206 if (nsop != NULL) 2207 *nsop = nso; 2208 2209 return (0); 2210 2211 2212 eproto_disc_unl: 2213 error = EPROTO; 2214 e_disc_unl: 2215 eprintsoline(so, error); 2216 goto disconnect_unlocked; 2217 2218 pr_disc_vp_unl: 2219 eprintsoline(so, error); 2220 disconnect_vp_unlocked: 2221 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2222 VN_RELE(nvp); 2223 disconnect_unlocked: 2224 (void) sodisconnect(so, SEQ_number, 0); 2225 return (error); 2226 2227 pr_disc_vp: 2228 eprintsoline(so, error); 2229 disconnect_vp: 2230 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2231 so_unlock_single(so, SOLOCKED); 2232 mutex_exit(&so->so_lock); 2233 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2234 VN_RELE(nvp); 2235 return (error); 2236 2237 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2238 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2239 ? EOPNOTSUPP : EINVAL; 2240 e_bad: 2241 eprintsoline(so, error); 2242 return (error); 2243 } 2244 2245 /* 2246 * connect a socket. 2247 * 2248 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2249 * unconnect (by specifying a null address). 2250 */ 2251 int 2252 sotpi_connect(struct sonode *so, 2253 const struct sockaddr *name, 2254 socklen_t namelen, 2255 int fflag, 2256 int flags, 2257 struct cred *cr) 2258 { 2259 struct T_conn_req conn_req; 2260 int error = 0; 2261 mblk_t *mp; 2262 void *src; 2263 socklen_t srclen; 2264 void *addr; 2265 socklen_t addrlen; 2266 boolean_t need_unlock; 2267 sotpi_info_t *sti = SOTOTPI(so); 2268 2269 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2270 (void *)so, (void *)name, namelen, fflag, flags, 2271 pr_state(so->so_state, so->so_mode))); 2272 2273 /* 2274 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2275 * avoid sleeping for memory with SOLOCKED held. 2276 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2277 * + sizeof (struct T_opthdr). 2278 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2279 * exceed sti_faddr_maxlen). 2280 */ 2281 mp = soallocproto(sizeof (struct T_conn_req) + 2282 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2283 cr); 2284 if (mp == NULL) { 2285 /* 2286 * Connect can not fail with ENOBUFS. A signal was 2287 * caught so return EINTR. 2288 */ 2289 error = EINTR; 2290 eprintsoline(so, error); 2291 return (error); 2292 } 2293 2294 mutex_enter(&so->so_lock); 2295 /* 2296 * Make sure there is a preallocated T_unbind_req message 2297 * before any binding. This message is allocated when the 2298 * socket is created. Since another thread can consume 2299 * so_unbind_mp by the time we return from so_lock_single(), 2300 * we should check the availability of so_unbind_mp after 2301 * we return from so_lock_single(). 2302 */ 2303 2304 so_lock_single(so); /* Set SOLOCKED */ 2305 need_unlock = B_TRUE; 2306 2307 if (sti->sti_unbind_mp == NULL) { 2308 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2309 /* NOTE: holding so_lock while sleeping */ 2310 sti->sti_unbind_mp = 2311 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2312 if (sti->sti_unbind_mp == NULL) { 2313 error = EINTR; 2314 goto done; 2315 } 2316 } 2317 2318 /* 2319 * Can't have done a listen before connecting. 2320 */ 2321 if (so->so_state & SS_ACCEPTCONN) { 2322 error = EOPNOTSUPP; 2323 goto done; 2324 } 2325 2326 /* 2327 * Must be bound with the transport 2328 */ 2329 if (!(so->so_state & SS_ISBOUND)) { 2330 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2331 /*CONSTCOND*/ 2332 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2333 /* 2334 * Optimization for AF_INET{,6} transports 2335 * that can handle a T_CONN_REQ without being bound. 2336 */ 2337 so_automatic_bind(so); 2338 } else { 2339 error = sotpi_bind(so, NULL, 0, 2340 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2341 if (error) 2342 goto done; 2343 } 2344 ASSERT(so->so_state & SS_ISBOUND); 2345 flags |= _SOCONNECT_DID_BIND; 2346 } 2347 2348 /* 2349 * Handle a connect to a name parameter of type AF_UNSPEC like a 2350 * connect to a null address. This is the portable method to 2351 * unconnect a socket. 2352 */ 2353 if ((namelen >= sizeof (sa_family_t)) && 2354 (name->sa_family == AF_UNSPEC)) { 2355 name = NULL; 2356 namelen = 0; 2357 } 2358 2359 /* 2360 * Check that we are not already connected. 2361 * A connection-oriented socket cannot be reconnected. 2362 * A connected connection-less socket can be 2363 * - connected to a different address by a subsequent connect 2364 * - "unconnected" by a connect to the NULL address 2365 */ 2366 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2367 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2368 if (so->so_mode & SM_CONNREQUIRED) { 2369 /* Connection-oriented socket */ 2370 error = so->so_state & SS_ISCONNECTED ? 2371 EISCONN : EALREADY; 2372 goto done; 2373 } 2374 /* Connection-less socket */ 2375 if (name == NULL) { 2376 /* 2377 * Remove the connected state and clear SO_DGRAM_ERRIND 2378 * since it was set when the socket was connected. 2379 * If this is UDP also send down a T_DISCON_REQ. 2380 */ 2381 int val; 2382 2383 if ((so->so_family == AF_INET || 2384 so->so_family == AF_INET6) && 2385 (so->so_type == SOCK_DGRAM || 2386 so->so_type == SOCK_RAW) && 2387 /*CONSTCOND*/ 2388 !soconnect_tpi_udp) { 2389 /* XXX What about implicitly unbinding here? */ 2390 error = sodisconnect(so, -1, 2391 _SODISCONNECT_LOCK_HELD); 2392 } else { 2393 so->so_state &= 2394 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2395 sti->sti_faddr_valid = 0; 2396 sti->sti_faddr_len = 0; 2397 } 2398 2399 /* Remove SOLOCKED since setsockopt will grab it */ 2400 so_unlock_single(so, SOLOCKED); 2401 mutex_exit(&so->so_lock); 2402 2403 val = 0; 2404 (void) sotpi_setsockopt(so, SOL_SOCKET, 2405 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2406 cr); 2407 2408 mutex_enter(&so->so_lock); 2409 so_lock_single(so); /* Set SOLOCKED */ 2410 goto done; 2411 } 2412 } 2413 ASSERT(so->so_state & SS_ISBOUND); 2414 2415 if (name == NULL || namelen == 0) { 2416 error = EINVAL; 2417 goto done; 2418 } 2419 /* 2420 * Mark the socket if sti_faddr_sa represents the transport level 2421 * address. 2422 */ 2423 if (flags & _SOCONNECT_NOXLATE) { 2424 struct sockaddr_ux *soaddr_ux; 2425 2426 ASSERT(so->so_family == AF_UNIX); 2427 if (namelen != sizeof (struct sockaddr_ux)) { 2428 error = EINVAL; 2429 goto done; 2430 } 2431 soaddr_ux = (struct sockaddr_ux *)name; 2432 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2433 namelen = sizeof (soaddr_ux->sou_addr); 2434 sti->sti_faddr_noxlate = 1; 2435 } 2436 2437 /* 2438 * Length and family checks. 2439 */ 2440 error = so_addr_verify(so, name, namelen); 2441 if (error) 2442 goto bad; 2443 2444 /* 2445 * Save foreign address. Needed for AF_UNIX as well as 2446 * transport providers that do not support TI_GETPEERNAME. 2447 * Also used for cached foreign address for TCP and UDP. 2448 */ 2449 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2450 error = EINVAL; 2451 goto done; 2452 } 2453 sti->sti_faddr_len = (socklen_t)namelen; 2454 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2455 bcopy(name, sti->sti_faddr_sa, namelen); 2456 sti->sti_faddr_valid = 1; 2457 2458 if (so->so_family == AF_UNIX) { 2459 if (sti->sti_faddr_noxlate) { 2460 /* 2461 * Already have a transport internal address. Do not 2462 * pass any (transport internal) source address. 2463 */ 2464 addr = sti->sti_faddr_sa; 2465 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2466 src = NULL; 2467 srclen = 0; 2468 } else { 2469 /* 2470 * Pass the sockaddr_un source address as an option 2471 * and translate the remote address. 2472 * Holding so_lock thus sti_laddr_sa can not change. 2473 */ 2474 src = sti->sti_laddr_sa; 2475 srclen = (t_uscalar_t)sti->sti_laddr_len; 2476 dprintso(so, 1, 2477 ("sotpi_connect UNIX: srclen %d, src %p\n", 2478 srclen, src)); 2479 error = so_ux_addr_xlate(so, 2480 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2481 (flags & _SOCONNECT_XPG4_2), 2482 &addr, &addrlen); 2483 if (error) 2484 goto bad; 2485 } 2486 } else { 2487 addr = sti->sti_faddr_sa; 2488 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2489 src = NULL; 2490 srclen = 0; 2491 } 2492 /* 2493 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2494 * option which asks the transport provider to send T_UDERR_IND 2495 * messages. These T_UDERR_IND messages are used to return connected 2496 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2497 * 2498 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2499 * we send down a T_CONN_REQ. This is needed to let the 2500 * transport assign a local address that is consistent with 2501 * the remote address. Applications depend on a getsockname() 2502 * after a connect() to retrieve the "source" IP address for 2503 * the connected socket. Invalidate the cached local address 2504 * to force getsockname() to enquire of the transport. 2505 */ 2506 if (!(so->so_mode & SM_CONNREQUIRED)) { 2507 /* 2508 * Datagram socket. 2509 */ 2510 int32_t val; 2511 2512 so_unlock_single(so, SOLOCKED); 2513 mutex_exit(&so->so_lock); 2514 2515 val = 1; 2516 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2517 &val, (t_uscalar_t)sizeof (val), cr); 2518 2519 mutex_enter(&so->so_lock); 2520 so_lock_single(so); /* Set SOLOCKED */ 2521 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2522 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2523 soconnect_tpi_udp) { 2524 soisconnected(so); 2525 goto done; 2526 } 2527 /* 2528 * Send down T_CONN_REQ etc. 2529 * Clear fflag to avoid returning EWOULDBLOCK. 2530 */ 2531 fflag = 0; 2532 ASSERT(so->so_family != AF_UNIX); 2533 sti->sti_laddr_valid = 0; 2534 } else if (sti->sti_laddr_len != 0) { 2535 /* 2536 * If the local address or port was "any" then it may be 2537 * changed by the transport as a result of the 2538 * connect. Invalidate the cached version if we have one. 2539 */ 2540 switch (so->so_family) { 2541 case AF_INET: 2542 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2543 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2544 INADDR_ANY || 2545 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2546 sti->sti_laddr_valid = 0; 2547 break; 2548 2549 case AF_INET6: 2550 ASSERT(sti->sti_laddr_len == 2551 (socklen_t)sizeof (sin6_t)); 2552 if (IN6_IS_ADDR_UNSPECIFIED( 2553 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2554 IN6_IS_ADDR_V4MAPPED_ANY( 2555 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2556 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2557 sti->sti_laddr_valid = 0; 2558 break; 2559 2560 default: 2561 break; 2562 } 2563 } 2564 2565 /* 2566 * Check for failure of an earlier call 2567 */ 2568 if (so->so_error != 0) 2569 goto so_bad; 2570 2571 /* 2572 * Send down T_CONN_REQ. Message was allocated above. 2573 */ 2574 conn_req.PRIM_type = T_CONN_REQ; 2575 conn_req.DEST_length = addrlen; 2576 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2577 if (srclen == 0) { 2578 conn_req.OPT_length = 0; 2579 conn_req.OPT_offset = 0; 2580 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2581 soappendmsg(mp, addr, addrlen); 2582 } else { 2583 /* 2584 * There is a AF_UNIX sockaddr_un to include as a source 2585 * address option. 2586 */ 2587 struct T_opthdr toh; 2588 2589 toh.level = SOL_SOCKET; 2590 toh.name = SO_SRCADDR; 2591 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2592 toh.status = 0; 2593 conn_req.OPT_length = 2594 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2595 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2596 _TPI_ALIGN_TOPT(addrlen)); 2597 2598 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2599 soappendmsg(mp, addr, addrlen); 2600 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2601 soappendmsg(mp, &toh, sizeof (toh)); 2602 soappendmsg(mp, src, srclen); 2603 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2604 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2605 } 2606 /* 2607 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2608 * in order to have the right state when the T_CONN_CON shows up. 2609 */ 2610 soisconnecting(so); 2611 mutex_exit(&so->so_lock); 2612 2613 if (audit_active) 2614 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2615 2616 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2617 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2618 mp = NULL; 2619 mutex_enter(&so->so_lock); 2620 if (error != 0) 2621 goto bad; 2622 2623 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2624 goto bad; 2625 2626 /* Allow other threads to access the socket */ 2627 so_unlock_single(so, SOLOCKED); 2628 need_unlock = B_FALSE; 2629 2630 /* 2631 * Wait until we get a T_CONN_CON or an error 2632 */ 2633 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2634 so_lock_single(so); /* Set SOLOCKED */ 2635 need_unlock = B_TRUE; 2636 } 2637 2638 done: 2639 freemsg(mp); 2640 switch (error) { 2641 case EINPROGRESS: 2642 case EALREADY: 2643 case EISCONN: 2644 case EINTR: 2645 /* Non-fatal errors */ 2646 sti->sti_laddr_valid = 0; 2647 /* FALLTHRU */ 2648 case 0: 2649 break; 2650 default: 2651 ASSERT(need_unlock); 2652 /* 2653 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2654 * and invalidate local-address cache 2655 */ 2656 so->so_state &= ~SS_ISCONNECTING; 2657 sti->sti_laddr_valid = 0; 2658 /* A discon_ind might have already unbound us */ 2659 if ((flags & _SOCONNECT_DID_BIND) && 2660 (so->so_state & SS_ISBOUND)) { 2661 int err; 2662 2663 err = sotpi_unbind(so, 0); 2664 /* LINTED - statement has no conseq */ 2665 if (err) { 2666 eprintsoline(so, err); 2667 } 2668 } 2669 break; 2670 } 2671 if (need_unlock) 2672 so_unlock_single(so, SOLOCKED); 2673 mutex_exit(&so->so_lock); 2674 return (error); 2675 2676 so_bad: error = sogeterr(so, B_TRUE); 2677 bad: eprintsoline(so, error); 2678 goto done; 2679 } 2680 2681 /* ARGSUSED */ 2682 int 2683 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2684 { 2685 struct T_ordrel_req ordrel_req; 2686 mblk_t *mp; 2687 uint_t old_state, state_change; 2688 int error = 0; 2689 sotpi_info_t *sti = SOTOTPI(so); 2690 2691 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2692 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2693 2694 mutex_enter(&so->so_lock); 2695 so_lock_single(so); /* Set SOLOCKED */ 2696 2697 /* 2698 * SunOS 4.X has no check for datagram sockets. 2699 * 5.X checks that it is connected (ENOTCONN) 2700 * X/Open requires that we check the connected state. 2701 */ 2702 if (!(so->so_state & SS_ISCONNECTED)) { 2703 if (!xnet_skip_checks) { 2704 error = ENOTCONN; 2705 if (xnet_check_print) { 2706 printf("sockfs: X/Open shutdown check " 2707 "caused ENOTCONN\n"); 2708 } 2709 } 2710 goto done; 2711 } 2712 /* 2713 * Record the current state and then perform any state changes. 2714 * Then use the difference between the old and new states to 2715 * determine which messages need to be sent. 2716 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2717 * duplicate calls to shutdown(). 2718 */ 2719 old_state = so->so_state; 2720 2721 switch (how) { 2722 case 0: 2723 socantrcvmore(so); 2724 break; 2725 case 1: 2726 socantsendmore(so); 2727 break; 2728 case 2: 2729 socantsendmore(so); 2730 socantrcvmore(so); 2731 break; 2732 default: 2733 error = EINVAL; 2734 goto done; 2735 } 2736 2737 /* 2738 * Assumes that the SS_CANT* flags are never cleared in the above code. 2739 */ 2740 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2741 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2742 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2743 2744 switch (state_change) { 2745 case 0: 2746 dprintso(so, 1, 2747 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2748 so->so_state)); 2749 goto done; 2750 2751 case SS_CANTRCVMORE: 2752 mutex_exit(&so->so_lock); 2753 strseteof(SOTOV(so), 1); 2754 /* 2755 * strseteof takes care of read side wakeups, 2756 * pollwakeups, and signals. 2757 */ 2758 /* 2759 * Get the read lock before flushing data to avoid problems 2760 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2761 */ 2762 mutex_enter(&so->so_lock); 2763 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2764 mutex_exit(&so->so_lock); 2765 2766 /* Flush read side queue */ 2767 strflushrq(SOTOV(so), FLUSHALL); 2768 2769 mutex_enter(&so->so_lock); 2770 so_unlock_read(so); /* Clear SOREADLOCKED */ 2771 break; 2772 2773 case SS_CANTSENDMORE: 2774 mutex_exit(&so->so_lock); 2775 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2776 mutex_enter(&so->so_lock); 2777 break; 2778 2779 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2780 mutex_exit(&so->so_lock); 2781 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2782 strseteof(SOTOV(so), 1); 2783 /* 2784 * strseteof takes care of read side wakeups, 2785 * pollwakeups, and signals. 2786 */ 2787 /* 2788 * Get the read lock before flushing data to avoid problems 2789 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2790 */ 2791 mutex_enter(&so->so_lock); 2792 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2793 mutex_exit(&so->so_lock); 2794 2795 /* Flush read side queue */ 2796 strflushrq(SOTOV(so), FLUSHALL); 2797 2798 mutex_enter(&so->so_lock); 2799 so_unlock_read(so); /* Clear SOREADLOCKED */ 2800 break; 2801 } 2802 2803 ASSERT(MUTEX_HELD(&so->so_lock)); 2804 2805 /* 2806 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2807 * was set due to this call and the new state has both of them set: 2808 * Send the AF_UNIX close indication 2809 * For T_COTS send a discon_ind 2810 * 2811 * If cantsend was set due to this call: 2812 * For T_COTSORD send an ordrel_ind 2813 * 2814 * Note that for T_CLTS there is no message sent here. 2815 */ 2816 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2817 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2818 /* 2819 * For SunOS 4.X compatibility we tell the other end 2820 * that we are unable to receive at this point. 2821 */ 2822 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2823 so_unix_close(so); 2824 2825 if (sti->sti_serv_type == T_COTS) 2826 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2827 } 2828 if ((state_change & SS_CANTSENDMORE) && 2829 (sti->sti_serv_type == T_COTS_ORD)) { 2830 /* Send an orderly release */ 2831 ordrel_req.PRIM_type = T_ORDREL_REQ; 2832 2833 mutex_exit(&so->so_lock); 2834 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2835 0, _ALLOC_SLEEP, cr); 2836 /* 2837 * Send down the T_ORDREL_REQ even if there is flow control. 2838 * This prevents shutdown from blocking. 2839 * Note that there is no T_OK_ACK for ordrel_req. 2840 */ 2841 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2842 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2843 mutex_enter(&so->so_lock); 2844 if (error) { 2845 eprintsoline(so, error); 2846 goto done; 2847 } 2848 } 2849 2850 done: 2851 so_unlock_single(so, SOLOCKED); 2852 mutex_exit(&so->so_lock); 2853 return (error); 2854 } 2855 2856 /* 2857 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2858 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2859 * that we have closed. 2860 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2861 * T_UNITDATA_REQ containing the same option. 2862 * 2863 * For SOCK_DGRAM half-connections (somebody connected to this end 2864 * but this end is not connect) we don't know where to send any 2865 * SO_UNIX_CLOSE. 2866 * 2867 * We have to ignore stream head errors just in case there has been 2868 * a shutdown(output). 2869 * Ignore any flow control to try to get the message more quickly to the peer. 2870 * While locally ignoring flow control solves the problem when there 2871 * is only the loopback transport on the stream it would not provide 2872 * the correct AF_UNIX socket semantics when one or more modules have 2873 * been pushed. 2874 */ 2875 void 2876 so_unix_close(struct sonode *so) 2877 { 2878 int error; 2879 struct T_opthdr toh; 2880 mblk_t *mp; 2881 sotpi_info_t *sti = SOTOTPI(so); 2882 2883 ASSERT(MUTEX_HELD(&so->so_lock)); 2884 2885 ASSERT(so->so_family == AF_UNIX); 2886 2887 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2888 (SS_ISCONNECTED|SS_ISBOUND)) 2889 return; 2890 2891 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2892 (void *)so, pr_state(so->so_state, so->so_mode))); 2893 2894 toh.level = SOL_SOCKET; 2895 toh.name = SO_UNIX_CLOSE; 2896 2897 /* zero length + header */ 2898 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2899 toh.status = 0; 2900 2901 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2902 struct T_optdata_req tdr; 2903 2904 tdr.PRIM_type = T_OPTDATA_REQ; 2905 tdr.DATA_flag = 0; 2906 2907 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2908 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2909 2910 /* NOTE: holding so_lock while sleeping */ 2911 mp = soallocproto2(&tdr, sizeof (tdr), 2912 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2913 } else { 2914 struct T_unitdata_req tudr; 2915 void *addr; 2916 socklen_t addrlen; 2917 void *src; 2918 socklen_t srclen; 2919 struct T_opthdr toh2; 2920 t_scalar_t size; 2921 2922 /* Connecteded DGRAM socket */ 2923 2924 /* 2925 * For AF_UNIX the destination address is translated to 2926 * an internal name and the source address is passed as 2927 * an option. 2928 */ 2929 /* 2930 * Length and family checks. 2931 */ 2932 error = so_addr_verify(so, sti->sti_faddr_sa, 2933 (t_uscalar_t)sti->sti_faddr_len); 2934 if (error) { 2935 eprintsoline(so, error); 2936 return; 2937 } 2938 if (sti->sti_faddr_noxlate) { 2939 /* 2940 * Already have a transport internal address. Do not 2941 * pass any (transport internal) source address. 2942 */ 2943 addr = sti->sti_faddr_sa; 2944 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2945 src = NULL; 2946 srclen = 0; 2947 } else { 2948 /* 2949 * Pass the sockaddr_un source address as an option 2950 * and translate the remote address. 2951 * Holding so_lock thus sti_laddr_sa can not change. 2952 */ 2953 src = sti->sti_laddr_sa; 2954 srclen = (socklen_t)sti->sti_laddr_len; 2955 dprintso(so, 1, 2956 ("so_ux_close: srclen %d, src %p\n", 2957 srclen, src)); 2958 error = so_ux_addr_xlate(so, 2959 sti->sti_faddr_sa, 2960 (socklen_t)sti->sti_faddr_len, 0, 2961 &addr, &addrlen); 2962 if (error) { 2963 eprintsoline(so, error); 2964 return; 2965 } 2966 } 2967 tudr.PRIM_type = T_UNITDATA_REQ; 2968 tudr.DEST_length = addrlen; 2969 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2970 if (srclen == 0) { 2971 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2972 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2973 _TPI_ALIGN_TOPT(addrlen)); 2974 2975 size = tudr.OPT_offset + tudr.OPT_length; 2976 /* NOTE: holding so_lock while sleeping */ 2977 mp = soallocproto2(&tudr, sizeof (tudr), 2978 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2979 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2980 soappendmsg(mp, &toh, sizeof (toh)); 2981 } else { 2982 /* 2983 * There is a AF_UNIX sockaddr_un to include as a 2984 * source address option. 2985 */ 2986 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2987 _TPI_ALIGN_TOPT(srclen)); 2988 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2989 _TPI_ALIGN_TOPT(addrlen)); 2990 2991 toh2.level = SOL_SOCKET; 2992 toh2.name = SO_SRCADDR; 2993 toh2.len = (t_uscalar_t)(srclen + 2994 sizeof (struct T_opthdr)); 2995 toh2.status = 0; 2996 2997 size = tudr.OPT_offset + tudr.OPT_length; 2998 2999 /* NOTE: holding so_lock while sleeping */ 3000 mp = soallocproto2(&tudr, sizeof (tudr), 3001 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 3002 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3003 soappendmsg(mp, &toh, sizeof (toh)); 3004 soappendmsg(mp, &toh2, sizeof (toh2)); 3005 soappendmsg(mp, src, srclen); 3006 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3007 } 3008 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3009 } 3010 mutex_exit(&so->so_lock); 3011 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 3012 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 3013 mutex_enter(&so->so_lock); 3014 } 3015 3016 /* 3017 * Called by sotpi_recvmsg when reading a non-zero amount of data. 3018 * In addition, the caller typically verifies that there is some 3019 * potential state to clear by checking 3020 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 3021 * before calling this routine. 3022 * Note that such a check can be made without holding so_lock since 3023 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 3024 * decrements sti_oobsigcnt. 3025 * 3026 * When data is read *after* the point that all pending 3027 * oob data has been consumed the oob indication is cleared. 3028 * 3029 * This logic keeps select/poll returning POLLRDBAND and 3030 * SIOCATMARK returning true until we have read past 3031 * the mark. 3032 */ 3033 static void 3034 sorecv_update_oobstate(struct sonode *so) 3035 { 3036 sotpi_info_t *sti = SOTOTPI(so); 3037 3038 mutex_enter(&so->so_lock); 3039 ASSERT(so_verify_oobstate(so)); 3040 dprintso(so, 1, 3041 ("sorecv_update_oobstate: counts %d/%d state %s\n", 3042 sti->sti_oobsigcnt, 3043 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 3044 if (sti->sti_oobsigcnt == 0) { 3045 /* No more pending oob indications */ 3046 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 3047 freemsg(so->so_oobmsg); 3048 so->so_oobmsg = NULL; 3049 } 3050 ASSERT(so_verify_oobstate(so)); 3051 mutex_exit(&so->so_lock); 3052 } 3053 3054 /* 3055 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 3056 */ 3057 static int 3058 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 3059 { 3060 sotpi_info_t *sti = SOTOTPI(so); 3061 int error = 0; 3062 mblk_t *tmp = NULL; 3063 mblk_t *pmp = NULL; 3064 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 3065 3066 ASSERT(nmp != NULL); 3067 3068 while (nmp != NULL && uiop->uio_resid > 0) { 3069 ssize_t n; 3070 3071 if (DB_TYPE(nmp) == M_DATA) { 3072 /* 3073 * We have some data, uiomove up to resid bytes. 3074 */ 3075 n = MIN(MBLKL(nmp), uiop->uio_resid); 3076 if (n > 0) 3077 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3078 nmp->b_rptr += n; 3079 if (nmp->b_rptr == nmp->b_wptr) { 3080 pmp = nmp; 3081 nmp = nmp->b_cont; 3082 } 3083 if (error) 3084 break; 3085 } else { 3086 /* 3087 * We only handle data, save for caller to handle. 3088 */ 3089 if (pmp != NULL) { 3090 pmp->b_cont = nmp->b_cont; 3091 } 3092 nmp->b_cont = NULL; 3093 if (*rmp == NULL) { 3094 *rmp = nmp; 3095 } else { 3096 tmp->b_cont = nmp; 3097 } 3098 nmp = nmp->b_cont; 3099 tmp = nmp; 3100 } 3101 } 3102 if (pmp != NULL) { 3103 /* Free any mblk_t(s) which we have consumed */ 3104 pmp->b_cont = NULL; 3105 freemsg(sti->sti_nl7c_rcv_mp); 3106 } 3107 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3108 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3109 if (error == 0) { 3110 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3111 3112 error = p->r_v.r_v2; 3113 p->r_v.r_v2 = 0; 3114 } 3115 rp->r_vals = sti->sti_nl7c_rcv_rval; 3116 sti->sti_nl7c_rcv_rval = 0; 3117 } else { 3118 /* More mblk_t(s) to process so no rval to return */ 3119 rp->r_vals = 0; 3120 } 3121 return (error); 3122 } 3123 /* 3124 * Receive the next message on the queue. 3125 * If msg_controllen is non-zero when called the caller is interested in 3126 * any received control info (options). 3127 * If msg_namelen is non-zero when called the caller is interested in 3128 * any received source address. 3129 * The routine returns with msg_control and msg_name pointing to 3130 * kmem_alloc'ed memory which the caller has to free. 3131 */ 3132 /* ARGSUSED */ 3133 int 3134 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3135 struct cred *cr) 3136 { 3137 union T_primitives *tpr; 3138 mblk_t *mp; 3139 uchar_t pri; 3140 int pflag, opflag; 3141 void *control; 3142 t_uscalar_t controllen; 3143 t_uscalar_t namelen; 3144 int so_state = so->so_state; /* Snapshot */ 3145 ssize_t saved_resid; 3146 rval_t rval; 3147 int flags; 3148 clock_t timout; 3149 int error = 0; 3150 int reterr = 0; 3151 struct uio *suiop = NULL; 3152 sotpi_info_t *sti = SOTOTPI(so); 3153 3154 flags = msg->msg_flags; 3155 msg->msg_flags = 0; 3156 3157 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3158 (void *)so, (void *)msg, flags, 3159 pr_state(so->so_state, so->so_mode), so->so_error)); 3160 3161 if (so->so_version == SOV_STREAM) { 3162 so_update_attrs(so, SOACC); 3163 /* The imaginary "sockmod" has been popped - act as a stream */ 3164 return (strread(SOTOV(so), uiop, cr)); 3165 } 3166 3167 /* 3168 * If we are not connected because we have never been connected 3169 * we return ENOTCONN. If we have been connected (but are no longer 3170 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3171 * the EOF. 3172 * 3173 * An alternative would be to post an ENOTCONN error in stream head 3174 * (read+write) and clear it when we're connected. However, that error 3175 * would cause incorrect poll/select behavior! 3176 */ 3177 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3178 (so->so_mode & SM_CONNREQUIRED)) { 3179 return (ENOTCONN); 3180 } 3181 3182 /* 3183 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3184 * after checking that the read queue is empty) and returns zero. 3185 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3186 * is zero. 3187 */ 3188 3189 if (flags & MSG_OOB) { 3190 /* Check that the transport supports OOB */ 3191 if (!(so->so_mode & SM_EXDATA)) 3192 return (EOPNOTSUPP); 3193 so_update_attrs(so, SOACC); 3194 return (sorecvoob(so, msg, uiop, flags, 3195 (so->so_options & SO_OOBINLINE))); 3196 } 3197 3198 so_update_attrs(so, SOACC); 3199 3200 /* 3201 * Set msg_controllen and msg_namelen to zero here to make it 3202 * simpler in the cases that no control or name is returned. 3203 */ 3204 controllen = msg->msg_controllen; 3205 namelen = msg->msg_namelen; 3206 msg->msg_controllen = 0; 3207 msg->msg_namelen = 0; 3208 3209 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3210 namelen, controllen)); 3211 3212 mutex_enter(&so->so_lock); 3213 /* 3214 * If an NL7C enabled socket and not waiting for write data. 3215 */ 3216 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3217 NL7C_ENABLED) { 3218 if (sti->sti_nl7c_uri) { 3219 /* Close uri processing for a previous request */ 3220 nl7c_close(so); 3221 } 3222 if ((so_state & SS_CANTRCVMORE) && 3223 sti->sti_nl7c_rcv_mp == NULL) { 3224 /* Nothing to process, EOF */ 3225 mutex_exit(&so->so_lock); 3226 return (0); 3227 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3228 /* Persistent NL7C socket, try to process request */ 3229 boolean_t ret; 3230 3231 ret = nl7c_process(so, 3232 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3233 rval.r_vals = sti->sti_nl7c_rcv_rval; 3234 error = rval.r_v.r_v2; 3235 if (error) { 3236 /* Error of some sort, return it */ 3237 mutex_exit(&so->so_lock); 3238 return (error); 3239 } 3240 if (sti->sti_nl7c_flags && 3241 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3242 /* 3243 * Still an NL7C socket and no data 3244 * to pass up to the caller. 3245 */ 3246 mutex_exit(&so->so_lock); 3247 if (ret) { 3248 /* EOF */ 3249 return (0); 3250 } else { 3251 /* Need more data */ 3252 return (EAGAIN); 3253 } 3254 } 3255 } else { 3256 /* 3257 * Not persistent so no further NL7C processing. 3258 */ 3259 sti->sti_nl7c_flags = 0; 3260 } 3261 } 3262 /* 3263 * Only one reader is allowed at any given time. This is needed 3264 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3265 * 3266 * This is slightly different that BSD behavior in that it fails with 3267 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3268 * is single-threaded using sblock(), which is dropped while waiting 3269 * for data to appear. The difference shows up e.g. if one 3270 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3271 * does use nonblocking io and different threads are reading each 3272 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3273 * in this case as long as the read queue doesn't get empty. 3274 * In this implementation the thread using nonblocking io can 3275 * get an EWOULDBLOCK error due to the blocking thread executing 3276 * e.g. in the uiomove in kstrgetmsg. 3277 * This difference is not believed to be significant. 3278 */ 3279 /* Set SOREADLOCKED */ 3280 error = so_lock_read_intr(so, 3281 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3282 mutex_exit(&so->so_lock); 3283 if (error) 3284 return (error); 3285 3286 /* 3287 * Tell kstrgetmsg to not inspect the stream head errors until all 3288 * queued data has been consumed. 3289 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3290 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3291 * 3292 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3293 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3294 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3295 */ 3296 pflag = MSG_ANY | MSG_DELAYERROR; 3297 if (flags & MSG_PEEK) { 3298 pflag |= MSG_IPEEK; 3299 flags &= ~MSG_WAITALL; 3300 } 3301 if (so->so_mode & SM_ATOMIC) 3302 pflag |= MSG_DISCARDTAIL; 3303 3304 if (flags & MSG_DONTWAIT) 3305 timout = 0; 3306 else 3307 timout = -1; 3308 opflag = pflag; 3309 3310 suiop = sod_rcv_init(so, flags, &uiop); 3311 retry: 3312 saved_resid = uiop->uio_resid; 3313 pri = 0; 3314 mp = NULL; 3315 if (sti->sti_nl7c_rcv_mp != NULL) { 3316 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3317 error = nl7c_sorecv(so, &mp, uiop, &rval); 3318 } else { 3319 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3320 timout, &rval); 3321 } 3322 if (error != 0) { 3323 /* kstrgetmsg returns ETIME when timeout expires */ 3324 if (error == ETIME) 3325 error = EWOULDBLOCK; 3326 goto out; 3327 } 3328 /* 3329 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3330 * For non-datagrams MOREDATA is used to set MSG_EOR. 3331 */ 3332 ASSERT(!(rval.r_val1 & MORECTL)); 3333 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3334 msg->msg_flags |= MSG_TRUNC; 3335 3336 if (mp == NULL) { 3337 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3338 /* 3339 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3340 * The draft Posix socket spec states that the mark should 3341 * not be cleared when peeking. We follow the latter. 3342 */ 3343 if ((so->so_state & 3344 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3345 (uiop->uio_resid != saved_resid) && 3346 !(flags & MSG_PEEK)) { 3347 sorecv_update_oobstate(so); 3348 } 3349 3350 mutex_enter(&so->so_lock); 3351 /* Set MSG_EOR based on MOREDATA */ 3352 if (!(rval.r_val1 & MOREDATA)) { 3353 if (so->so_state & SS_SAVEDEOR) { 3354 msg->msg_flags |= MSG_EOR; 3355 so->so_state &= ~SS_SAVEDEOR; 3356 } 3357 } 3358 /* 3359 * If some data was received (i.e. not EOF) and the 3360 * read/recv* has not been satisfied wait for some more. 3361 */ 3362 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3363 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3364 mutex_exit(&so->so_lock); 3365 pflag = opflag | MSG_NOMARK; 3366 goto retry; 3367 } 3368 goto out_locked; 3369 } 3370 3371 /* strsock_proto has already verified length and alignment */ 3372 tpr = (union T_primitives *)mp->b_rptr; 3373 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3374 3375 switch (tpr->type) { 3376 case T_DATA_IND: { 3377 if ((so->so_state & 3378 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3379 (uiop->uio_resid != saved_resid) && 3380 !(flags & MSG_PEEK)) { 3381 sorecv_update_oobstate(so); 3382 } 3383 3384 /* 3385 * Set msg_flags to MSG_EOR based on 3386 * MORE_flag and MOREDATA. 3387 */ 3388 mutex_enter(&so->so_lock); 3389 so->so_state &= ~SS_SAVEDEOR; 3390 if (!(tpr->data_ind.MORE_flag & 1)) { 3391 if (!(rval.r_val1 & MOREDATA)) 3392 msg->msg_flags |= MSG_EOR; 3393 else 3394 so->so_state |= SS_SAVEDEOR; 3395 } 3396 freemsg(mp); 3397 /* 3398 * If some data was received (i.e. not EOF) and the 3399 * read/recv* has not been satisfied wait for some more. 3400 */ 3401 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3402 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3403 mutex_exit(&so->so_lock); 3404 pflag = opflag | MSG_NOMARK; 3405 goto retry; 3406 } 3407 goto out_locked; 3408 } 3409 case T_UNITDATA_IND: { 3410 void *addr; 3411 t_uscalar_t addrlen; 3412 void *abuf; 3413 t_uscalar_t optlen; 3414 void *opt; 3415 3416 if ((so->so_state & 3417 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3418 (uiop->uio_resid != saved_resid) && 3419 !(flags & MSG_PEEK)) { 3420 sorecv_update_oobstate(so); 3421 } 3422 3423 if (namelen != 0) { 3424 /* Caller wants source address */ 3425 addrlen = tpr->unitdata_ind.SRC_length; 3426 addr = sogetoff(mp, 3427 tpr->unitdata_ind.SRC_offset, 3428 addrlen, 1); 3429 if (addr == NULL) { 3430 freemsg(mp); 3431 error = EPROTO; 3432 eprintsoline(so, error); 3433 goto out; 3434 } 3435 if (so->so_family == AF_UNIX) { 3436 /* 3437 * Can not use the transport level address. 3438 * If there is a SO_SRCADDR option carrying 3439 * the socket level address it will be 3440 * extracted below. 3441 */ 3442 addr = NULL; 3443 addrlen = 0; 3444 } 3445 } 3446 optlen = tpr->unitdata_ind.OPT_length; 3447 if (optlen != 0) { 3448 t_uscalar_t ncontrollen; 3449 3450 /* 3451 * Extract any source address option. 3452 * Determine how large cmsg buffer is needed. 3453 */ 3454 opt = sogetoff(mp, 3455 tpr->unitdata_ind.OPT_offset, 3456 optlen, __TPI_ALIGN_SIZE); 3457 3458 if (opt == NULL) { 3459 freemsg(mp); 3460 error = EPROTO; 3461 eprintsoline(so, error); 3462 goto out; 3463 } 3464 if (so->so_family == AF_UNIX) 3465 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3466 ncontrollen = so_cmsglen(mp, opt, optlen, 3467 !(flags & MSG_XPG4_2)); 3468 if (controllen != 0) 3469 controllen = ncontrollen; 3470 else if (ncontrollen != 0) 3471 msg->msg_flags |= MSG_CTRUNC; 3472 } else { 3473 controllen = 0; 3474 } 3475 3476 if (namelen != 0) { 3477 /* 3478 * Return address to caller. 3479 * Caller handles truncation if length 3480 * exceeds msg_namelen. 3481 * NOTE: AF_UNIX NUL termination is ensured by 3482 * the sender's copyin_name(). 3483 */ 3484 abuf = kmem_alloc(addrlen, KM_SLEEP); 3485 3486 bcopy(addr, abuf, addrlen); 3487 msg->msg_name = abuf; 3488 msg->msg_namelen = addrlen; 3489 } 3490 3491 if (controllen != 0) { 3492 /* 3493 * Return control msg to caller. 3494 * Caller handles truncation if length 3495 * exceeds msg_controllen. 3496 */ 3497 control = kmem_zalloc(controllen, KM_SLEEP); 3498 3499 error = so_opt2cmsg(mp, opt, optlen, 3500 !(flags & MSG_XPG4_2), 3501 control, controllen); 3502 if (error) { 3503 freemsg(mp); 3504 if (msg->msg_namelen != 0) 3505 kmem_free(msg->msg_name, 3506 msg->msg_namelen); 3507 kmem_free(control, controllen); 3508 eprintsoline(so, error); 3509 goto out; 3510 } 3511 msg->msg_control = control; 3512 msg->msg_controllen = controllen; 3513 } 3514 3515 freemsg(mp); 3516 goto out; 3517 } 3518 case T_OPTDATA_IND: { 3519 struct T_optdata_req *tdr; 3520 void *opt; 3521 t_uscalar_t optlen; 3522 3523 if ((so->so_state & 3524 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3525 (uiop->uio_resid != saved_resid) && 3526 !(flags & MSG_PEEK)) { 3527 sorecv_update_oobstate(so); 3528 } 3529 3530 tdr = (struct T_optdata_req *)mp->b_rptr; 3531 optlen = tdr->OPT_length; 3532 if (optlen != 0) { 3533 t_uscalar_t ncontrollen; 3534 /* 3535 * Determine how large cmsg buffer is needed. 3536 */ 3537 opt = sogetoff(mp, 3538 tpr->optdata_ind.OPT_offset, 3539 optlen, __TPI_ALIGN_SIZE); 3540 3541 if (opt == NULL) { 3542 freemsg(mp); 3543 error = EPROTO; 3544 eprintsoline(so, error); 3545 goto out; 3546 } 3547 3548 ncontrollen = so_cmsglen(mp, opt, optlen, 3549 !(flags & MSG_XPG4_2)); 3550 if (controllen != 0) 3551 controllen = ncontrollen; 3552 else if (ncontrollen != 0) 3553 msg->msg_flags |= MSG_CTRUNC; 3554 } else { 3555 controllen = 0; 3556 } 3557 3558 if (controllen != 0) { 3559 /* 3560 * Return control msg to caller. 3561 * Caller handles truncation if length 3562 * exceeds msg_controllen. 3563 */ 3564 control = kmem_zalloc(controllen, KM_SLEEP); 3565 3566 error = so_opt2cmsg(mp, opt, optlen, 3567 !(flags & MSG_XPG4_2), 3568 control, controllen); 3569 if (error) { 3570 freemsg(mp); 3571 kmem_free(control, controllen); 3572 eprintsoline(so, error); 3573 goto out; 3574 } 3575 msg->msg_control = control; 3576 msg->msg_controllen = controllen; 3577 } 3578 3579 /* 3580 * Set msg_flags to MSG_EOR based on 3581 * DATA_flag and MOREDATA. 3582 */ 3583 mutex_enter(&so->so_lock); 3584 so->so_state &= ~SS_SAVEDEOR; 3585 if (!(tpr->data_ind.MORE_flag & 1)) { 3586 if (!(rval.r_val1 & MOREDATA)) 3587 msg->msg_flags |= MSG_EOR; 3588 else 3589 so->so_state |= SS_SAVEDEOR; 3590 } 3591 freemsg(mp); 3592 /* 3593 * If some data was received (i.e. not EOF) and the 3594 * read/recv* has not been satisfied wait for some more. 3595 * Not possible to wait if control info was received. 3596 */ 3597 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3598 controllen == 0 && 3599 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3600 mutex_exit(&so->so_lock); 3601 pflag = opflag | MSG_NOMARK; 3602 goto retry; 3603 } 3604 goto out_locked; 3605 } 3606 case T_EXDATA_IND: { 3607 dprintso(so, 1, 3608 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3609 "state %s\n", 3610 sti->sti_oobsigcnt, sti->sti_oobcnt, 3611 saved_resid - uiop->uio_resid, 3612 pr_state(so->so_state, so->so_mode))); 3613 /* 3614 * kstrgetmsg handles MSGMARK so there is nothing to 3615 * inspect in the T_EXDATA_IND. 3616 * strsock_proto makes the stream head queue the T_EXDATA_IND 3617 * as a separate message with no M_DATA component. Furthermore, 3618 * the stream head does not consolidate M_DATA messages onto 3619 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3620 * remains a message by itself. This is needed since MSGMARK 3621 * marks both the whole message as well as the last byte 3622 * of the message. 3623 */ 3624 freemsg(mp); 3625 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3626 if (flags & MSG_PEEK) { 3627 /* 3628 * Even though we are peeking we consume the 3629 * T_EXDATA_IND thereby moving the mark information 3630 * to SS_RCVATMARK. Then the oob code below will 3631 * retry the peeking kstrgetmsg. 3632 * Note that the stream head read queue is 3633 * never flushed without holding SOREADLOCKED 3634 * thus the T_EXDATA_IND can not disappear 3635 * underneath us. 3636 */ 3637 dprintso(so, 1, 3638 ("sotpi_recvmsg: consume EXDATA_IND " 3639 "counts %d/%d state %s\n", 3640 sti->sti_oobsigcnt, 3641 sti->sti_oobcnt, 3642 pr_state(so->so_state, so->so_mode))); 3643 3644 pflag = MSG_ANY | MSG_DELAYERROR; 3645 if (so->so_mode & SM_ATOMIC) 3646 pflag |= MSG_DISCARDTAIL; 3647 3648 pri = 0; 3649 mp = NULL; 3650 3651 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3652 &pri, &pflag, (clock_t)-1, &rval); 3653 ASSERT(uiop->uio_resid == saved_resid); 3654 3655 if (error) { 3656 #ifdef SOCK_DEBUG 3657 if (error != EWOULDBLOCK && error != EINTR) { 3658 eprintsoline(so, error); 3659 } 3660 #endif /* SOCK_DEBUG */ 3661 goto out; 3662 } 3663 ASSERT(mp); 3664 tpr = (union T_primitives *)mp->b_rptr; 3665 ASSERT(tpr->type == T_EXDATA_IND); 3666 freemsg(mp); 3667 } /* end "if (flags & MSG_PEEK)" */ 3668 3669 /* 3670 * Decrement the number of queued and pending oob. 3671 * 3672 * SS_RCVATMARK is cleared when we read past a mark. 3673 * SS_HAVEOOBDATA is cleared when we've read past the 3674 * last mark. 3675 * SS_OOBPEND is cleared if we've read past the last 3676 * mark and no (new) SIGURG has been posted. 3677 */ 3678 mutex_enter(&so->so_lock); 3679 ASSERT(so_verify_oobstate(so)); 3680 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3681 ASSERT(sti->sti_oobsigcnt > 0); 3682 sti->sti_oobsigcnt--; 3683 ASSERT(sti->sti_oobcnt > 0); 3684 sti->sti_oobcnt--; 3685 /* 3686 * Since the T_EXDATA_IND has been removed from the stream 3687 * head, but we have not read data past the mark, 3688 * sockfs needs to track that the socket is still at the mark. 3689 * 3690 * Since no data was received call kstrgetmsg again to wait 3691 * for data. 3692 */ 3693 so->so_state |= SS_RCVATMARK; 3694 mutex_exit(&so->so_lock); 3695 dprintso(so, 1, 3696 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3697 sti->sti_oobsigcnt, sti->sti_oobcnt, 3698 pr_state(so->so_state, so->so_mode))); 3699 pflag = opflag; 3700 goto retry; 3701 } 3702 default: 3703 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3704 (void *)so, tpr->type, (void *)mp); 3705 ASSERT(0); 3706 freemsg(mp); 3707 error = EPROTO; 3708 eprintsoline(so, error); 3709 goto out; 3710 } 3711 /* NOTREACHED */ 3712 out: 3713 mutex_enter(&so->so_lock); 3714 out_locked: 3715 reterr = sod_rcv_done(so, suiop, uiop); 3716 if (reterr != 0 && error == 0) 3717 error = reterr; 3718 so_unlock_read(so); /* Clear SOREADLOCKED */ 3719 mutex_exit(&so->so_lock); 3720 return (error); 3721 } 3722 3723 /* 3724 * Sending data with options on a datagram socket. 3725 * Assumes caller has verified that SS_ISBOUND etc. are set. 3726 */ 3727 static int 3728 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3729 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3730 { 3731 struct T_unitdata_req tudr; 3732 mblk_t *mp; 3733 int error; 3734 void *addr; 3735 socklen_t addrlen; 3736 void *src; 3737 socklen_t srclen; 3738 ssize_t len; 3739 int size; 3740 struct T_opthdr toh; 3741 struct fdbuf *fdbuf; 3742 t_uscalar_t optlen; 3743 void *fds; 3744 int fdlen; 3745 sotpi_info_t *sti = SOTOTPI(so); 3746 3747 ASSERT(name && namelen); 3748 ASSERT(control && controllen); 3749 3750 len = uiop->uio_resid; 3751 if (len > (ssize_t)sti->sti_tidu_size) { 3752 return (EMSGSIZE); 3753 } 3754 3755 /* 3756 * For AF_UNIX the destination address is translated to an internal 3757 * name and the source address is passed as an option. 3758 * Also, file descriptors are passed as file pointers in an 3759 * option. 3760 */ 3761 3762 /* 3763 * Length and family checks. 3764 */ 3765 error = so_addr_verify(so, name, namelen); 3766 if (error) { 3767 eprintsoline(so, error); 3768 return (error); 3769 } 3770 if (so->so_family == AF_UNIX) { 3771 if (sti->sti_faddr_noxlate) { 3772 /* 3773 * Already have a transport internal address. Do not 3774 * pass any (transport internal) source address. 3775 */ 3776 addr = name; 3777 addrlen = namelen; 3778 src = NULL; 3779 srclen = 0; 3780 } else { 3781 /* 3782 * Pass the sockaddr_un source address as an option 3783 * and translate the remote address. 3784 * 3785 * Note that this code does not prevent sti_laddr_sa 3786 * from changing while it is being used. Thus 3787 * if an unbind+bind occurs concurrently with this 3788 * send the peer might see a partially new and a 3789 * partially old "from" address. 3790 */ 3791 src = sti->sti_laddr_sa; 3792 srclen = (t_uscalar_t)sti->sti_laddr_len; 3793 dprintso(so, 1, 3794 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3795 srclen, src)); 3796 error = so_ux_addr_xlate(so, name, namelen, 3797 (flags & MSG_XPG4_2), 3798 &addr, &addrlen); 3799 if (error) { 3800 eprintsoline(so, error); 3801 return (error); 3802 } 3803 } 3804 } else { 3805 addr = name; 3806 addrlen = namelen; 3807 src = NULL; 3808 srclen = 0; 3809 } 3810 optlen = so_optlen(control, controllen, 3811 !(flags & MSG_XPG4_2)); 3812 tudr.PRIM_type = T_UNITDATA_REQ; 3813 tudr.DEST_length = addrlen; 3814 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3815 if (srclen != 0) 3816 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3817 _TPI_ALIGN_TOPT(srclen)); 3818 else 3819 tudr.OPT_length = optlen; 3820 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3821 _TPI_ALIGN_TOPT(addrlen)); 3822 3823 size = tudr.OPT_offset + tudr.OPT_length; 3824 3825 /* 3826 * File descriptors only when SM_FDPASSING set. 3827 */ 3828 error = so_getfdopt(control, controllen, 3829 !(flags & MSG_XPG4_2), &fds, &fdlen); 3830 if (error) 3831 return (error); 3832 if (fdlen != -1) { 3833 if (!(so->so_mode & SM_FDPASSING)) 3834 return (EOPNOTSUPP); 3835 3836 error = fdbuf_create(fds, fdlen, &fdbuf); 3837 if (error) 3838 return (error); 3839 mp = fdbuf_allocmsg(size, fdbuf); 3840 } else { 3841 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3842 if (mp == NULL) { 3843 /* 3844 * Caught a signal waiting for memory. 3845 * Let send* return EINTR. 3846 */ 3847 return (EINTR); 3848 } 3849 } 3850 soappendmsg(mp, &tudr, sizeof (tudr)); 3851 soappendmsg(mp, addr, addrlen); 3852 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3853 3854 if (fdlen != -1) { 3855 ASSERT(fdbuf != NULL); 3856 toh.level = SOL_SOCKET; 3857 toh.name = SO_FILEP; 3858 toh.len = fdbuf->fd_size + 3859 (t_uscalar_t)sizeof (struct T_opthdr); 3860 toh.status = 0; 3861 soappendmsg(mp, &toh, sizeof (toh)); 3862 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3863 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3864 } 3865 if (srclen != 0) { 3866 /* 3867 * There is a AF_UNIX sockaddr_un to include as a source 3868 * address option. 3869 */ 3870 toh.level = SOL_SOCKET; 3871 toh.name = SO_SRCADDR; 3872 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3873 toh.status = 0; 3874 soappendmsg(mp, &toh, sizeof (toh)); 3875 soappendmsg(mp, src, srclen); 3876 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3877 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3878 } 3879 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3880 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3881 /* At most 3 bytes left in the message */ 3882 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3883 ASSERT(MBLKL(mp) <= (ssize_t)size); 3884 3885 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3886 if (audit_active) 3887 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3888 3889 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3890 #ifdef SOCK_DEBUG 3891 if (error) { 3892 eprintsoline(so, error); 3893 } 3894 #endif /* SOCK_DEBUG */ 3895 return (error); 3896 } 3897 3898 /* 3899 * Sending data with options on a connected stream socket. 3900 * Assumes caller has verified that SS_ISCONNECTED is set. 3901 */ 3902 static int 3903 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3904 t_uscalar_t controllen, int flags) 3905 { 3906 struct T_optdata_req tdr; 3907 mblk_t *mp; 3908 int error; 3909 ssize_t iosize; 3910 int size; 3911 struct fdbuf *fdbuf; 3912 t_uscalar_t optlen; 3913 void *fds; 3914 int fdlen; 3915 struct T_opthdr toh; 3916 sotpi_info_t *sti = SOTOTPI(so); 3917 3918 dprintso(so, 1, 3919 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3920 3921 /* 3922 * Has to be bound and connected. However, since no locks are 3923 * held the state could have changed after sotpi_sendmsg checked it 3924 * thus it is not possible to ASSERT on the state. 3925 */ 3926 3927 /* Options on connection-oriented only when SM_OPTDATA set. */ 3928 if (!(so->so_mode & SM_OPTDATA)) 3929 return (EOPNOTSUPP); 3930 3931 do { 3932 /* 3933 * Set the MORE flag if uio_resid does not fit in this 3934 * message or if the caller passed in "more". 3935 * Error for transports with zero tidu_size. 3936 */ 3937 tdr.PRIM_type = T_OPTDATA_REQ; 3938 iosize = sti->sti_tidu_size; 3939 if (iosize <= 0) 3940 return (EMSGSIZE); 3941 if (uiop->uio_resid > iosize) { 3942 tdr.DATA_flag = 1; 3943 } else { 3944 if (more) 3945 tdr.DATA_flag = 1; 3946 else 3947 tdr.DATA_flag = 0; 3948 iosize = uiop->uio_resid; 3949 } 3950 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3951 tdr.DATA_flag, iosize)); 3952 3953 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3954 tdr.OPT_length = optlen; 3955 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3956 3957 size = (int)sizeof (tdr) + optlen; 3958 /* 3959 * File descriptors only when SM_FDPASSING set. 3960 */ 3961 error = so_getfdopt(control, controllen, 3962 !(flags & MSG_XPG4_2), &fds, &fdlen); 3963 if (error) 3964 return (error); 3965 if (fdlen != -1) { 3966 if (!(so->so_mode & SM_FDPASSING)) 3967 return (EOPNOTSUPP); 3968 3969 error = fdbuf_create(fds, fdlen, &fdbuf); 3970 if (error) 3971 return (error); 3972 mp = fdbuf_allocmsg(size, fdbuf); 3973 } else { 3974 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3975 if (mp == NULL) { 3976 /* 3977 * Caught a signal waiting for memory. 3978 * Let send* return EINTR. 3979 */ 3980 return (EINTR); 3981 } 3982 } 3983 soappendmsg(mp, &tdr, sizeof (tdr)); 3984 3985 if (fdlen != -1) { 3986 ASSERT(fdbuf != NULL); 3987 toh.level = SOL_SOCKET; 3988 toh.name = SO_FILEP; 3989 toh.len = fdbuf->fd_size + 3990 (t_uscalar_t)sizeof (struct T_opthdr); 3991 toh.status = 0; 3992 soappendmsg(mp, &toh, sizeof (toh)); 3993 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3994 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3995 } 3996 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3997 /* At most 3 bytes left in the message */ 3998 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3999 ASSERT(MBLKL(mp) <= (ssize_t)size); 4000 4001 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4002 4003 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4004 0, MSG_BAND, 0); 4005 if (error) { 4006 eprintsoline(so, error); 4007 return (error); 4008 } 4009 control = NULL; 4010 if (uiop->uio_resid > 0) { 4011 /* 4012 * Recheck for fatal errors. Fail write even though 4013 * some data have been written. This is consistent 4014 * with strwrite semantics and BSD sockets semantics. 4015 */ 4016 if (so->so_state & SS_CANTSENDMORE) { 4017 eprintsoline(so, error); 4018 return (EPIPE); 4019 } 4020 if (so->so_error != 0) { 4021 mutex_enter(&so->so_lock); 4022 error = sogeterr(so, B_TRUE); 4023 mutex_exit(&so->so_lock); 4024 if (error != 0) { 4025 eprintsoline(so, error); 4026 return (error); 4027 } 4028 } 4029 } 4030 } while (uiop->uio_resid > 0); 4031 return (0); 4032 } 4033 4034 /* 4035 * Sending data on a datagram socket. 4036 * Assumes caller has verified that SS_ISBOUND etc. are set. 4037 * 4038 * For AF_UNIX the destination address is translated to an internal 4039 * name and the source address is passed as an option. 4040 */ 4041 int 4042 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4043 struct uio *uiop, int flags) 4044 { 4045 struct T_unitdata_req tudr; 4046 mblk_t *mp; 4047 int error; 4048 void *addr; 4049 socklen_t addrlen; 4050 void *src; 4051 socklen_t srclen; 4052 ssize_t len; 4053 sotpi_info_t *sti = SOTOTPI(so); 4054 4055 ASSERT(name != NULL && namelen != 0); 4056 4057 len = uiop->uio_resid; 4058 if (len > sti->sti_tidu_size) { 4059 error = EMSGSIZE; 4060 goto done; 4061 } 4062 4063 /* Length and family checks */ 4064 error = so_addr_verify(so, name, namelen); 4065 if (error != 0) 4066 goto done; 4067 4068 if (sti->sti_direct) 4069 return (sodgram_direct(so, name, namelen, uiop, flags)); 4070 4071 if (so->so_family == AF_UNIX) { 4072 if (sti->sti_faddr_noxlate) { 4073 /* 4074 * Already have a transport internal address. Do not 4075 * pass any (transport internal) source address. 4076 */ 4077 addr = name; 4078 addrlen = namelen; 4079 src = NULL; 4080 srclen = 0; 4081 } else { 4082 /* 4083 * Pass the sockaddr_un source address as an option 4084 * and translate the remote address. 4085 * 4086 * Note that this code does not prevent sti_laddr_sa 4087 * from changing while it is being used. Thus 4088 * if an unbind+bind occurs concurrently with this 4089 * send the peer might see a partially new and a 4090 * partially old "from" address. 4091 */ 4092 src = sti->sti_laddr_sa; 4093 srclen = (socklen_t)sti->sti_laddr_len; 4094 dprintso(so, 1, 4095 ("sosend_dgram UNIX: srclen %d, src %p\n", 4096 srclen, src)); 4097 error = so_ux_addr_xlate(so, name, namelen, 4098 (flags & MSG_XPG4_2), 4099 &addr, &addrlen); 4100 if (error) { 4101 eprintsoline(so, error); 4102 goto done; 4103 } 4104 } 4105 } else { 4106 addr = name; 4107 addrlen = namelen; 4108 src = NULL; 4109 srclen = 0; 4110 } 4111 tudr.PRIM_type = T_UNITDATA_REQ; 4112 tudr.DEST_length = addrlen; 4113 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4114 if (srclen == 0) { 4115 tudr.OPT_length = 0; 4116 tudr.OPT_offset = 0; 4117 4118 mp = soallocproto2(&tudr, sizeof (tudr), 4119 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4120 if (mp == NULL) { 4121 /* 4122 * Caught a signal waiting for memory. 4123 * Let send* return EINTR. 4124 */ 4125 error = EINTR; 4126 goto done; 4127 } 4128 } else { 4129 /* 4130 * There is a AF_UNIX sockaddr_un to include as a source 4131 * address option. 4132 */ 4133 struct T_opthdr toh; 4134 ssize_t size; 4135 4136 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4137 _TPI_ALIGN_TOPT(srclen)); 4138 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4139 _TPI_ALIGN_TOPT(addrlen)); 4140 4141 toh.level = SOL_SOCKET; 4142 toh.name = SO_SRCADDR; 4143 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4144 toh.status = 0; 4145 4146 size = tudr.OPT_offset + tudr.OPT_length; 4147 mp = soallocproto2(&tudr, sizeof (tudr), 4148 addr, addrlen, size, _ALLOC_INTR, CRED()); 4149 if (mp == NULL) { 4150 /* 4151 * Caught a signal waiting for memory. 4152 * Let send* return EINTR. 4153 */ 4154 error = EINTR; 4155 goto done; 4156 } 4157 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4158 soappendmsg(mp, &toh, sizeof (toh)); 4159 soappendmsg(mp, src, srclen); 4160 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4161 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4162 } 4163 4164 if (audit_active) 4165 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4166 4167 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4168 done: 4169 #ifdef SOCK_DEBUG 4170 if (error) { 4171 eprintsoline(so, error); 4172 } 4173 #endif /* SOCK_DEBUG */ 4174 return (error); 4175 } 4176 4177 /* 4178 * Sending data on a connected stream socket. 4179 * Assumes caller has verified that SS_ISCONNECTED is set. 4180 */ 4181 int 4182 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4183 int sflag) 4184 { 4185 struct T_data_req tdr; 4186 mblk_t *mp; 4187 int error; 4188 ssize_t iosize; 4189 sotpi_info_t *sti = SOTOTPI(so); 4190 4191 dprintso(so, 1, 4192 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4193 (void *)so, uiop->uio_resid, prim, sflag)); 4194 4195 /* 4196 * Has to be bound and connected. However, since no locks are 4197 * held the state could have changed after sotpi_sendmsg checked it 4198 * thus it is not possible to ASSERT on the state. 4199 */ 4200 4201 do { 4202 /* 4203 * Set the MORE flag if uio_resid does not fit in this 4204 * message or if the caller passed in "more". 4205 * Error for transports with zero tidu_size. 4206 */ 4207 tdr.PRIM_type = prim; 4208 iosize = sti->sti_tidu_size; 4209 if (iosize <= 0) 4210 return (EMSGSIZE); 4211 if (uiop->uio_resid > iosize) { 4212 tdr.MORE_flag = 1; 4213 } else { 4214 if (more) 4215 tdr.MORE_flag = 1; 4216 else 4217 tdr.MORE_flag = 0; 4218 iosize = uiop->uio_resid; 4219 } 4220 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4221 prim, tdr.MORE_flag, iosize)); 4222 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4223 if (mp == NULL) { 4224 /* 4225 * Caught a signal waiting for memory. 4226 * Let send* return EINTR. 4227 */ 4228 return (EINTR); 4229 } 4230 4231 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4232 0, sflag | MSG_BAND, 0); 4233 if (error) { 4234 eprintsoline(so, error); 4235 return (error); 4236 } 4237 if (uiop->uio_resid > 0) { 4238 /* 4239 * Recheck for fatal errors. Fail write even though 4240 * some data have been written. This is consistent 4241 * with strwrite semantics and BSD sockets semantics. 4242 */ 4243 if (so->so_state & SS_CANTSENDMORE) { 4244 eprintsoline(so, error); 4245 return (EPIPE); 4246 } 4247 if (so->so_error != 0) { 4248 mutex_enter(&so->so_lock); 4249 error = sogeterr(so, B_TRUE); 4250 mutex_exit(&so->so_lock); 4251 if (error != 0) { 4252 eprintsoline(so, error); 4253 return (error); 4254 } 4255 } 4256 } 4257 } while (uiop->uio_resid > 0); 4258 return (0); 4259 } 4260 4261 /* 4262 * Check the state for errors and call the appropriate send function. 4263 * 4264 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4265 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4266 * after sending the message. 4267 */ 4268 static int 4269 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4270 struct cred *cr) 4271 { 4272 int so_state; 4273 int so_mode; 4274 int error; 4275 struct sockaddr *name; 4276 t_uscalar_t namelen; 4277 int dontroute; 4278 int flags; 4279 sotpi_info_t *sti = SOTOTPI(so); 4280 4281 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4282 (void *)so, (void *)msg, msg->msg_flags, 4283 pr_state(so->so_state, so->so_mode), so->so_error)); 4284 4285 if (so->so_version == SOV_STREAM) { 4286 /* The imaginary "sockmod" has been popped - act as a stream */ 4287 so_update_attrs(so, SOMOD); 4288 return (strwrite(SOTOV(so), uiop, cr)); 4289 } 4290 4291 mutex_enter(&so->so_lock); 4292 so_state = so->so_state; 4293 4294 if (so_state & SS_CANTSENDMORE) { 4295 mutex_exit(&so->so_lock); 4296 return (EPIPE); 4297 } 4298 4299 if (so->so_error != 0) { 4300 error = sogeterr(so, B_TRUE); 4301 if (error != 0) { 4302 mutex_exit(&so->so_lock); 4303 return (error); 4304 } 4305 } 4306 4307 name = (struct sockaddr *)msg->msg_name; 4308 namelen = msg->msg_namelen; 4309 4310 so_mode = so->so_mode; 4311 4312 if (name == NULL) { 4313 if (!(so_state & SS_ISCONNECTED)) { 4314 mutex_exit(&so->so_lock); 4315 if (so_mode & SM_CONNREQUIRED) 4316 return (ENOTCONN); 4317 else 4318 return (EDESTADDRREQ); 4319 } 4320 if (so_mode & SM_CONNREQUIRED) { 4321 name = NULL; 4322 namelen = 0; 4323 } else { 4324 /* 4325 * Note that this code does not prevent sti_faddr_sa 4326 * from changing while it is being used. Thus 4327 * if an "unconnect"+connect occurs concurrently with 4328 * this send the datagram might be delivered to a 4329 * garbaled address. 4330 */ 4331 ASSERT(sti->sti_faddr_sa); 4332 name = sti->sti_faddr_sa; 4333 namelen = (t_uscalar_t)sti->sti_faddr_len; 4334 } 4335 } else { 4336 if (!(so_state & SS_ISCONNECTED) && 4337 (so_mode & SM_CONNREQUIRED)) { 4338 /* Required but not connected */ 4339 mutex_exit(&so->so_lock); 4340 return (ENOTCONN); 4341 } 4342 /* 4343 * Ignore the address on connection-oriented sockets. 4344 * Just like BSD this code does not generate an error for 4345 * TCP (a CONNREQUIRED socket) when sending to an address 4346 * passed in with sendto/sendmsg. Instead the data is 4347 * delivered on the connection as if no address had been 4348 * supplied. 4349 */ 4350 if ((so_state & SS_ISCONNECTED) && 4351 !(so_mode & SM_CONNREQUIRED)) { 4352 mutex_exit(&so->so_lock); 4353 return (EISCONN); 4354 } 4355 if (!(so_state & SS_ISBOUND)) { 4356 so_lock_single(so); /* Set SOLOCKED */ 4357 error = sotpi_bind(so, NULL, 0, 4358 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4359 so_unlock_single(so, SOLOCKED); 4360 if (error) { 4361 mutex_exit(&so->so_lock); 4362 eprintsoline(so, error); 4363 return (error); 4364 } 4365 } 4366 /* 4367 * Handle delayed datagram errors. These are only queued 4368 * when the application sets SO_DGRAM_ERRIND. 4369 * Return the error if we are sending to the address 4370 * that was returned in the last T_UDERROR_IND. 4371 * If sending to some other address discard the delayed 4372 * error indication. 4373 */ 4374 if (sti->sti_delayed_error) { 4375 struct T_uderror_ind *tudi; 4376 void *addr; 4377 t_uscalar_t addrlen; 4378 boolean_t match = B_FALSE; 4379 4380 ASSERT(sti->sti_eaddr_mp); 4381 error = sti->sti_delayed_error; 4382 sti->sti_delayed_error = 0; 4383 tudi = 4384 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4385 addrlen = tudi->DEST_length; 4386 addr = sogetoff(sti->sti_eaddr_mp, 4387 tudi->DEST_offset, addrlen, 1); 4388 ASSERT(addr); /* Checked by strsock_proto */ 4389 switch (so->so_family) { 4390 case AF_INET: { 4391 /* Compare just IP address and port */ 4392 sin_t *sin1 = (sin_t *)name; 4393 sin_t *sin2 = (sin_t *)addr; 4394 4395 if (addrlen == sizeof (sin_t) && 4396 namelen == addrlen && 4397 sin1->sin_port == sin2->sin_port && 4398 sin1->sin_addr.s_addr == 4399 sin2->sin_addr.s_addr) 4400 match = B_TRUE; 4401 break; 4402 } 4403 case AF_INET6: { 4404 /* Compare just IP address and port. Not flow */ 4405 sin6_t *sin1 = (sin6_t *)name; 4406 sin6_t *sin2 = (sin6_t *)addr; 4407 4408 if (addrlen == sizeof (sin6_t) && 4409 namelen == addrlen && 4410 sin1->sin6_port == sin2->sin6_port && 4411 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4412 &sin2->sin6_addr)) 4413 match = B_TRUE; 4414 break; 4415 } 4416 case AF_UNIX: 4417 default: 4418 if (namelen == addrlen && 4419 bcmp(name, addr, namelen) == 0) 4420 match = B_TRUE; 4421 } 4422 if (match) { 4423 freemsg(sti->sti_eaddr_mp); 4424 sti->sti_eaddr_mp = NULL; 4425 mutex_exit(&so->so_lock); 4426 #ifdef DEBUG 4427 dprintso(so, 0, 4428 ("sockfs delayed error %d for %s\n", 4429 error, 4430 pr_addr(so->so_family, name, namelen))); 4431 #endif /* DEBUG */ 4432 return (error); 4433 } 4434 freemsg(sti->sti_eaddr_mp); 4435 sti->sti_eaddr_mp = NULL; 4436 } 4437 } 4438 mutex_exit(&so->so_lock); 4439 4440 flags = msg->msg_flags; 4441 dontroute = 0; 4442 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4443 uint32_t val; 4444 4445 val = 1; 4446 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4447 &val, (t_uscalar_t)sizeof (val), cr); 4448 if (error) 4449 return (error); 4450 dontroute = 1; 4451 } 4452 4453 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4454 error = EOPNOTSUPP; 4455 goto done; 4456 } 4457 if (msg->msg_controllen != 0) { 4458 if (!(so_mode & SM_CONNREQUIRED)) { 4459 so_update_attrs(so, SOMOD); 4460 error = sosend_dgramcmsg(so, name, namelen, uiop, 4461 msg->msg_control, msg->msg_controllen, flags); 4462 } else { 4463 if (flags & MSG_OOB) { 4464 /* Can't generate T_EXDATA_REQ with options */ 4465 error = EOPNOTSUPP; 4466 goto done; 4467 } 4468 so_update_attrs(so, SOMOD); 4469 error = sosend_svccmsg(so, uiop, 4470 !(flags & MSG_EOR), 4471 msg->msg_control, msg->msg_controllen, 4472 flags); 4473 } 4474 goto done; 4475 } 4476 4477 so_update_attrs(so, SOMOD); 4478 if (!(so_mode & SM_CONNREQUIRED)) { 4479 /* 4480 * If there is no SO_DONTROUTE to turn off return immediately 4481 * from send_dgram. This can allow tail-call optimizations. 4482 */ 4483 if (!dontroute) { 4484 return (sosend_dgram(so, name, namelen, uiop, flags)); 4485 } 4486 error = sosend_dgram(so, name, namelen, uiop, flags); 4487 } else { 4488 t_scalar_t prim; 4489 int sflag; 4490 4491 /* Ignore msg_name in the connected state */ 4492 if (flags & MSG_OOB) { 4493 prim = T_EXDATA_REQ; 4494 /* 4495 * Send down T_EXDATA_REQ even if there is flow 4496 * control for data. 4497 */ 4498 sflag = MSG_IGNFLOW; 4499 } else { 4500 if (so_mode & SM_BYTESTREAM) { 4501 /* Byte stream transport - use write */ 4502 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4503 4504 /* Send M_DATA messages */ 4505 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4506 (error = nl7c_data(so, uiop)) >= 0) { 4507 /* NL7C consumed the data */ 4508 return (error); 4509 } 4510 /* 4511 * If there is no SO_DONTROUTE to turn off, 4512 * sti_direct is on, and there is no flow 4513 * control, we can take the fast path. 4514 */ 4515 if (!dontroute && sti->sti_direct != 0 && 4516 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4517 return (sostream_direct(so, uiop, 4518 NULL, cr)); 4519 } 4520 error = strwrite(SOTOV(so), uiop, cr); 4521 goto done; 4522 } 4523 prim = T_DATA_REQ; 4524 sflag = 0; 4525 } 4526 /* 4527 * If there is no SO_DONTROUTE to turn off return immediately 4528 * from sosend_svc. This can allow tail-call optimizations. 4529 */ 4530 if (!dontroute) 4531 return (sosend_svc(so, uiop, prim, 4532 !(flags & MSG_EOR), sflag)); 4533 error = sosend_svc(so, uiop, prim, 4534 !(flags & MSG_EOR), sflag); 4535 } 4536 ASSERT(dontroute); 4537 done: 4538 if (dontroute) { 4539 uint32_t val; 4540 4541 val = 0; 4542 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4543 &val, (t_uscalar_t)sizeof (val), cr); 4544 } 4545 return (error); 4546 } 4547 4548 /* 4549 * kstrwritemp() has very similar semantics as that of strwrite(). 4550 * The main difference is it obtains mblks from the caller and also 4551 * does not do any copy as done in strwrite() from user buffers to 4552 * kernel buffers. 4553 * 4554 * Currently, this routine is used by sendfile to send data allocated 4555 * within the kernel without any copying. This interface does not use the 4556 * synchronous stream interface as synch. stream interface implies 4557 * copying. 4558 */ 4559 int 4560 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4561 { 4562 struct stdata *stp; 4563 struct queue *wqp; 4564 mblk_t *newmp; 4565 char waitflag; 4566 int tempmode; 4567 int error = 0; 4568 int done = 0; 4569 struct sonode *so; 4570 boolean_t direct; 4571 4572 ASSERT(vp->v_stream); 4573 stp = vp->v_stream; 4574 4575 so = VTOSO(vp); 4576 direct = _SOTOTPI(so)->sti_direct; 4577 4578 /* 4579 * This is the sockfs direct fast path. canputnext() need 4580 * not be accurate so we don't grab the sd_lock here. If 4581 * we get flow-controlled, we grab sd_lock just before the 4582 * do..while loop below to emulate what strwrite() does. 4583 */ 4584 wqp = stp->sd_wrq; 4585 if (canputnext(wqp) && direct && 4586 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4587 return (sostream_direct(so, NULL, mp, CRED())); 4588 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4589 /* Fast check of flags before acquiring the lock */ 4590 mutex_enter(&stp->sd_lock); 4591 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4592 mutex_exit(&stp->sd_lock); 4593 if (error != 0) { 4594 if (!(stp->sd_flag & STPLEX) && 4595 (stp->sd_wput_opt & SW_SIGPIPE)) { 4596 error = EPIPE; 4597 } 4598 return (error); 4599 } 4600 } 4601 4602 waitflag = WRITEWAIT; 4603 if (stp->sd_flag & OLDNDELAY) 4604 tempmode = fmode & ~FNDELAY; 4605 else 4606 tempmode = fmode; 4607 4608 mutex_enter(&stp->sd_lock); 4609 do { 4610 if (canputnext(wqp)) { 4611 mutex_exit(&stp->sd_lock); 4612 if (stp->sd_wputdatafunc != NULL) { 4613 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4614 NULL, NULL, NULL); 4615 if (newmp == NULL) { 4616 /* The caller will free mp */ 4617 return (ECOMM); 4618 } 4619 mp = newmp; 4620 } 4621 putnext(wqp, mp); 4622 return (0); 4623 } 4624 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4625 &done); 4626 } while (error == 0 && !done); 4627 4628 mutex_exit(&stp->sd_lock); 4629 /* 4630 * EAGAIN tells the application to try again. ENOMEM 4631 * is returned only if the memory allocation size 4632 * exceeds the physical limits of the system. ENOMEM 4633 * can't be true here. 4634 */ 4635 if (error == ENOMEM) 4636 error = EAGAIN; 4637 return (error); 4638 } 4639 4640 /* ARGSUSED */ 4641 static int 4642 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4643 struct cred *cr, mblk_t **mpp) 4644 { 4645 int error; 4646 4647 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4648 return (EAFNOSUPPORT); 4649 4650 if (so->so_state & SS_CANTSENDMORE) 4651 return (EPIPE); 4652 4653 if (so->so_type != SOCK_STREAM) 4654 return (EOPNOTSUPP); 4655 4656 if ((so->so_state & SS_ISCONNECTED) == 0) 4657 return (ENOTCONN); 4658 4659 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4660 if (error == 0) 4661 *mpp = NULL; 4662 return (error); 4663 } 4664 4665 /* 4666 * Sending data on a datagram socket. 4667 * Assumes caller has verified that SS_ISBOUND etc. are set. 4668 */ 4669 /* ARGSUSED */ 4670 static int 4671 sodgram_direct(struct sonode *so, struct sockaddr *name, 4672 socklen_t namelen, struct uio *uiop, int flags) 4673 { 4674 struct T_unitdata_req tudr; 4675 mblk_t *mp = NULL; 4676 int error = 0; 4677 void *addr; 4678 socklen_t addrlen; 4679 ssize_t len; 4680 struct stdata *stp = SOTOV(so)->v_stream; 4681 int so_state; 4682 queue_t *udp_wq; 4683 boolean_t connected; 4684 mblk_t *mpdata = NULL; 4685 sotpi_info_t *sti = SOTOTPI(so); 4686 4687 ASSERT(name != NULL && namelen != 0); 4688 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4689 ASSERT(!(so->so_mode & SM_EXDATA)); 4690 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4691 ASSERT(SOTOV(so)->v_type == VSOCK); 4692 4693 /* Caller checked for proper length */ 4694 len = uiop->uio_resid; 4695 ASSERT(len <= sti->sti_tidu_size); 4696 4697 /* Length and family checks have been done by caller */ 4698 ASSERT(name->sa_family == so->so_family); 4699 ASSERT(so->so_family == AF_INET || 4700 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4701 ASSERT(so->so_family == AF_INET6 || 4702 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4703 4704 addr = name; 4705 addrlen = namelen; 4706 4707 if (stp->sd_sidp != NULL && 4708 (error = straccess(stp, JCWRITE)) != 0) 4709 goto done; 4710 4711 so_state = so->so_state; 4712 4713 connected = so_state & SS_ISCONNECTED; 4714 if (!connected) { 4715 tudr.PRIM_type = T_UNITDATA_REQ; 4716 tudr.DEST_length = addrlen; 4717 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4718 tudr.OPT_length = 0; 4719 tudr.OPT_offset = 0; 4720 4721 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4722 _ALLOC_INTR, CRED()); 4723 if (mp == NULL) { 4724 /* 4725 * Caught a signal waiting for memory. 4726 * Let send* return EINTR. 4727 */ 4728 error = EINTR; 4729 goto done; 4730 } 4731 } 4732 4733 /* 4734 * For UDP we don't break up the copyin into smaller pieces 4735 * as in the TCP case. That means if ENOMEM is returned by 4736 * mcopyinuio() then the uio vector has not been modified at 4737 * all and we fallback to either strwrite() or kstrputmsg() 4738 * below. Note also that we never generate priority messages 4739 * from here. 4740 */ 4741 udp_wq = stp->sd_wrq->q_next; 4742 if (canput(udp_wq) && 4743 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4744 ASSERT(DB_TYPE(mpdata) == M_DATA); 4745 ASSERT(uiop->uio_resid == 0); 4746 if (!connected) 4747 linkb(mp, mpdata); 4748 else 4749 mp = mpdata; 4750 if (audit_active) 4751 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4752 4753 udp_wput(udp_wq, mp); 4754 return (0); 4755 } 4756 4757 ASSERT(mpdata == NULL); 4758 if (error != 0 && error != ENOMEM) { 4759 freemsg(mp); 4760 return (error); 4761 } 4762 4763 /* 4764 * For connected, let strwrite() handle the blocking case. 4765 * Otherwise we fall thru and use kstrputmsg(). 4766 */ 4767 if (connected) 4768 return (strwrite(SOTOV(so), uiop, CRED())); 4769 4770 if (audit_active) 4771 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4772 4773 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4774 done: 4775 #ifdef SOCK_DEBUG 4776 if (error != 0) { 4777 eprintsoline(so, error); 4778 } 4779 #endif /* SOCK_DEBUG */ 4780 return (error); 4781 } 4782 4783 int 4784 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4785 { 4786 struct stdata *stp = SOTOV(so)->v_stream; 4787 ssize_t iosize, rmax, maxblk; 4788 queue_t *tcp_wq = stp->sd_wrq->q_next; 4789 mblk_t *newmp; 4790 int error = 0, wflag = 0; 4791 4792 ASSERT(so->so_mode & SM_BYTESTREAM); 4793 ASSERT(SOTOV(so)->v_type == VSOCK); 4794 4795 if (stp->sd_sidp != NULL && 4796 (error = straccess(stp, JCWRITE)) != 0) 4797 return (error); 4798 4799 if (uiop == NULL) { 4800 /* 4801 * kstrwritemp() should have checked sd_flag and 4802 * flow-control before coming here. If we end up 4803 * here it means that we can simply pass down the 4804 * data to tcp. 4805 */ 4806 ASSERT(mp != NULL); 4807 if (stp->sd_wputdatafunc != NULL) { 4808 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4809 NULL, NULL, NULL); 4810 if (newmp == NULL) { 4811 /* The caller will free mp */ 4812 return (ECOMM); 4813 } 4814 mp = newmp; 4815 } 4816 tcp_wput(tcp_wq, mp); 4817 return (0); 4818 } 4819 4820 /* Fallback to strwrite() to do proper error handling */ 4821 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4822 return (strwrite(SOTOV(so), uiop, cr)); 4823 4824 rmax = stp->sd_qn_maxpsz; 4825 ASSERT(rmax >= 0 || rmax == INFPSZ); 4826 if (rmax == 0 || uiop->uio_resid <= 0) 4827 return (0); 4828 4829 if (rmax == INFPSZ) 4830 rmax = uiop->uio_resid; 4831 4832 maxblk = stp->sd_maxblk; 4833 4834 for (;;) { 4835 iosize = MIN(uiop->uio_resid, rmax); 4836 4837 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4838 if (mp == NULL) { 4839 /* 4840 * Fallback to strwrite() for ENOMEM; if this 4841 * is our first time in this routine and the uio 4842 * vector has not been modified, we will end up 4843 * calling strwrite() without any flag set. 4844 */ 4845 if (error == ENOMEM) 4846 goto slow_send; 4847 else 4848 return (error); 4849 } 4850 ASSERT(uiop->uio_resid >= 0); 4851 /* 4852 * If mp is non-NULL and ENOMEM is set, it means that 4853 * mcopyinuio() was able to break down some of the user 4854 * data into one or more mblks. Send the partial data 4855 * to tcp and let the rest be handled in strwrite(). 4856 */ 4857 ASSERT(error == 0 || error == ENOMEM); 4858 if (stp->sd_wputdatafunc != NULL) { 4859 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4860 NULL, NULL, NULL); 4861 if (newmp == NULL) { 4862 /* The caller will free mp */ 4863 return (ECOMM); 4864 } 4865 mp = newmp; 4866 } 4867 tcp_wput(tcp_wq, mp); 4868 4869 wflag |= NOINTR; 4870 4871 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4872 ASSERT(error == 0); 4873 break; 4874 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4875 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4876 slow_send: 4877 /* 4878 * We were able to send down partial data using 4879 * the direct call interface, but are now relying 4880 * on strwrite() to handle the non-fastpath cases. 4881 * If the socket is blocking we will sleep in 4882 * strwaitq() until write is permitted, otherwise, 4883 * we will need to return the amount of bytes 4884 * written so far back to the app. This is the 4885 * reason why we pass NOINTR flag to strwrite() 4886 * for non-blocking socket, because we don't want 4887 * to return EAGAIN when portion of the user data 4888 * has actually been sent down. 4889 */ 4890 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4891 } 4892 } 4893 return (0); 4894 } 4895 4896 /* 4897 * Update sti_faddr by asking the transport (unless AF_UNIX). 4898 */ 4899 /* ARGSUSED */ 4900 int 4901 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4902 boolean_t accept, struct cred *cr) 4903 { 4904 struct strbuf strbuf; 4905 int error = 0, res; 4906 void *addr; 4907 t_uscalar_t addrlen; 4908 k_sigset_t smask; 4909 sotpi_info_t *sti = SOTOTPI(so); 4910 4911 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4912 (void *)so, pr_state(so->so_state, so->so_mode))); 4913 4914 ASSERT(*namelen > 0); 4915 mutex_enter(&so->so_lock); 4916 so_lock_single(so); /* Set SOLOCKED */ 4917 4918 if (accept) { 4919 bcopy(sti->sti_faddr_sa, name, 4920 MIN(*namelen, sti->sti_faddr_len)); 4921 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4922 goto done; 4923 } 4924 4925 if (!(so->so_state & SS_ISCONNECTED)) { 4926 error = ENOTCONN; 4927 goto done; 4928 } 4929 /* Added this check for X/Open */ 4930 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4931 error = EINVAL; 4932 if (xnet_check_print) { 4933 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4934 } 4935 goto done; 4936 } 4937 4938 if (sti->sti_faddr_valid) { 4939 bcopy(sti->sti_faddr_sa, name, 4940 MIN(*namelen, sti->sti_faddr_len)); 4941 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4942 goto done; 4943 } 4944 4945 #ifdef DEBUG 4946 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4947 pr_addr(so->so_family, sti->sti_faddr_sa, 4948 (t_uscalar_t)sti->sti_faddr_len))); 4949 #endif /* DEBUG */ 4950 4951 if (so->so_family == AF_UNIX) { 4952 /* Transport has different name space - return local info */ 4953 if (sti->sti_faddr_noxlate) 4954 *namelen = 0; 4955 error = 0; 4956 goto done; 4957 } 4958 4959 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4960 4961 ASSERT(sti->sti_faddr_sa); 4962 /* Allocate local buffer to use with ioctl */ 4963 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4964 mutex_exit(&so->so_lock); 4965 addr = kmem_alloc(addrlen, KM_SLEEP); 4966 4967 /* 4968 * Issue TI_GETPEERNAME with signals masked. 4969 * Put the result in sti_faddr_sa so that getpeername works after 4970 * a shutdown(output). 4971 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4972 * back to the socket. 4973 */ 4974 strbuf.buf = addr; 4975 strbuf.maxlen = addrlen; 4976 strbuf.len = 0; 4977 4978 sigintr(&smask, 0); 4979 res = 0; 4980 ASSERT(cr); 4981 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4982 0, K_TO_K, cr, &res); 4983 sigunintr(&smask); 4984 4985 mutex_enter(&so->so_lock); 4986 /* 4987 * If there is an error record the error in so_error put don't fail 4988 * the getpeername. Instead fallback on the recorded 4989 * sti->sti_faddr_sa. 4990 */ 4991 if (error) { 4992 /* 4993 * Various stream head errors can be returned to the ioctl. 4994 * However, it is impossible to determine which ones of 4995 * these are really socket level errors that were incorrectly 4996 * consumed by the ioctl. Thus this code silently ignores the 4997 * error - to code explicitly does not reinstate the error 4998 * using soseterror(). 4999 * Experiments have shows that at least this set of 5000 * errors are reported and should not be reinstated on the 5001 * socket: 5002 * EINVAL E.g. if an I_LINK was in effect when 5003 * getpeername was called. 5004 * EPIPE The ioctl error semantics prefer the write 5005 * side error over the read side error. 5006 * ENOTCONN The transport just got disconnected but 5007 * sockfs had not yet seen the T_DISCON_IND 5008 * when issuing the ioctl. 5009 */ 5010 error = 0; 5011 } else if (res == 0 && strbuf.len > 0 && 5012 (so->so_state & SS_ISCONNECTED)) { 5013 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 5014 sti->sti_faddr_len = (socklen_t)strbuf.len; 5015 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 5016 sti->sti_faddr_valid = 1; 5017 5018 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 5019 *namelen = sti->sti_faddr_len; 5020 } 5021 kmem_free(addr, addrlen); 5022 #ifdef DEBUG 5023 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 5024 pr_addr(so->so_family, sti->sti_faddr_sa, 5025 (t_uscalar_t)sti->sti_faddr_len))); 5026 #endif /* DEBUG */ 5027 done: 5028 so_unlock_single(so, SOLOCKED); 5029 mutex_exit(&so->so_lock); 5030 return (error); 5031 } 5032 5033 /* 5034 * Update sti_laddr by asking the transport (unless AF_UNIX). 5035 */ 5036 int 5037 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5038 struct cred *cr) 5039 { 5040 struct strbuf strbuf; 5041 int error = 0, res; 5042 void *addr; 5043 t_uscalar_t addrlen; 5044 k_sigset_t smask; 5045 sotpi_info_t *sti = SOTOTPI(so); 5046 5047 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5048 (void *)so, pr_state(so->so_state, so->so_mode))); 5049 5050 ASSERT(*namelen > 0); 5051 mutex_enter(&so->so_lock); 5052 so_lock_single(so); /* Set SOLOCKED */ 5053 5054 #ifdef DEBUG 5055 5056 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5057 pr_addr(so->so_family, sti->sti_laddr_sa, 5058 (t_uscalar_t)sti->sti_laddr_len))); 5059 #endif /* DEBUG */ 5060 if (sti->sti_laddr_valid) { 5061 bcopy(sti->sti_laddr_sa, name, 5062 MIN(*namelen, sti->sti_laddr_len)); 5063 *namelen = sti->sti_laddr_len; 5064 goto done; 5065 } 5066 5067 if (so->so_family == AF_UNIX) { 5068 /* Transport has different name space - return local info */ 5069 error = 0; 5070 *namelen = 0; 5071 goto done; 5072 } 5073 if (!(so->so_state & SS_ISBOUND)) { 5074 /* If not bound, then nothing to return. */ 5075 error = 0; 5076 goto done; 5077 } 5078 5079 /* Allocate local buffer to use with ioctl */ 5080 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5081 mutex_exit(&so->so_lock); 5082 addr = kmem_alloc(addrlen, KM_SLEEP); 5083 5084 /* 5085 * Issue TI_GETMYNAME with signals masked. 5086 * Put the result in sti_laddr_sa so that getsockname works after 5087 * a shutdown(output). 5088 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5089 * back to the socket. 5090 */ 5091 strbuf.buf = addr; 5092 strbuf.maxlen = addrlen; 5093 strbuf.len = 0; 5094 5095 sigintr(&smask, 0); 5096 res = 0; 5097 ASSERT(cr); 5098 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5099 0, K_TO_K, cr, &res); 5100 sigunintr(&smask); 5101 5102 mutex_enter(&so->so_lock); 5103 /* 5104 * If there is an error record the error in so_error put don't fail 5105 * the getsockname. Instead fallback on the recorded 5106 * sti->sti_laddr_sa. 5107 */ 5108 if (error) { 5109 /* 5110 * Various stream head errors can be returned to the ioctl. 5111 * However, it is impossible to determine which ones of 5112 * these are really socket level errors that were incorrectly 5113 * consumed by the ioctl. Thus this code silently ignores the 5114 * error - to code explicitly does not reinstate the error 5115 * using soseterror(). 5116 * Experiments have shows that at least this set of 5117 * errors are reported and should not be reinstated on the 5118 * socket: 5119 * EINVAL E.g. if an I_LINK was in effect when 5120 * getsockname was called. 5121 * EPIPE The ioctl error semantics prefer the write 5122 * side error over the read side error. 5123 */ 5124 error = 0; 5125 } else if (res == 0 && strbuf.len > 0 && 5126 (so->so_state & SS_ISBOUND)) { 5127 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5128 sti->sti_laddr_len = (socklen_t)strbuf.len; 5129 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5130 sti->sti_laddr_valid = 1; 5131 5132 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5133 *namelen = sti->sti_laddr_len; 5134 } 5135 kmem_free(addr, addrlen); 5136 #ifdef DEBUG 5137 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5138 pr_addr(so->so_family, sti->sti_laddr_sa, 5139 (t_uscalar_t)sti->sti_laddr_len))); 5140 #endif /* DEBUG */ 5141 done: 5142 so_unlock_single(so, SOLOCKED); 5143 mutex_exit(&so->so_lock); 5144 return (error); 5145 } 5146 5147 /* 5148 * Get socket options. For SOL_SOCKET options some options are handled 5149 * by the sockfs while others use the value recorded in the sonode as a 5150 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5151 * 5152 * On the return most *optlenp bytes are copied to optval. 5153 */ 5154 /* ARGSUSED */ 5155 int 5156 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5157 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5158 { 5159 struct T_optmgmt_req optmgmt_req; 5160 struct T_optmgmt_ack *optmgmt_ack; 5161 struct opthdr oh; 5162 struct opthdr *opt_res; 5163 mblk_t *mp = NULL; 5164 int error = 0; 5165 void *option = NULL; /* Set if fallback value */ 5166 t_uscalar_t maxlen = *optlenp; 5167 t_uscalar_t len; 5168 uint32_t value; 5169 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5170 struct timeval32 tmo_val32; 5171 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5172 5173 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5174 (void *)so, level, option_name, optval, (void *)optlenp, 5175 pr_state(so->so_state, so->so_mode))); 5176 5177 mutex_enter(&so->so_lock); 5178 so_lock_single(so); /* Set SOLOCKED */ 5179 5180 /* 5181 * Check for SOL_SOCKET options. 5182 * Certain SOL_SOCKET options are returned directly whereas 5183 * others only provide a default (fallback) value should 5184 * the T_SVR4_OPTMGMT_REQ fail. 5185 */ 5186 if (level == SOL_SOCKET) { 5187 /* Check parameters */ 5188 switch (option_name) { 5189 case SO_TYPE: 5190 case SO_ERROR: 5191 case SO_DEBUG: 5192 case SO_ACCEPTCONN: 5193 case SO_REUSEADDR: 5194 case SO_KEEPALIVE: 5195 case SO_DONTROUTE: 5196 case SO_BROADCAST: 5197 case SO_USELOOPBACK: 5198 case SO_OOBINLINE: 5199 case SO_SNDBUF: 5200 case SO_RCVBUF: 5201 #ifdef notyet 5202 case SO_SNDLOWAT: 5203 case SO_RCVLOWAT: 5204 #endif /* notyet */ 5205 case SO_DOMAIN: 5206 case SO_DGRAM_ERRIND: 5207 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5208 error = EINVAL; 5209 eprintsoline(so, error); 5210 goto done2; 5211 } 5212 break; 5213 case SO_RCVTIMEO: 5214 case SO_SNDTIMEO: 5215 if (get_udatamodel() == DATAMODEL_NONE || 5216 get_udatamodel() == DATAMODEL_NATIVE) { 5217 if (maxlen < sizeof (struct timeval)) { 5218 error = EINVAL; 5219 eprintsoline(so, error); 5220 goto done2; 5221 } 5222 } else { 5223 if (maxlen < sizeof (struct timeval32)) { 5224 error = EINVAL; 5225 eprintsoline(so, error); 5226 goto done2; 5227 } 5228 5229 } 5230 break; 5231 case SO_LINGER: 5232 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5233 error = EINVAL; 5234 eprintsoline(so, error); 5235 goto done2; 5236 } 5237 break; 5238 case SO_SND_BUFINFO: 5239 if (maxlen < (t_uscalar_t) 5240 sizeof (struct so_snd_bufinfo)) { 5241 error = EINVAL; 5242 eprintsoline(so, error); 5243 goto done2; 5244 } 5245 break; 5246 } 5247 5248 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5249 5250 switch (option_name) { 5251 case SO_TYPE: 5252 value = so->so_type; 5253 option = &value; 5254 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5255 5256 case SO_ERROR: 5257 value = sogeterr(so, B_TRUE); 5258 option = &value; 5259 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5260 5261 case SO_ACCEPTCONN: 5262 if (so->so_state & SS_ACCEPTCONN) 5263 value = SO_ACCEPTCONN; 5264 else 5265 value = 0; 5266 #ifdef DEBUG 5267 if (value) { 5268 dprintso(so, 1, 5269 ("sotpi_getsockopt: 0x%x is set\n", 5270 option_name)); 5271 } else { 5272 dprintso(so, 1, 5273 ("sotpi_getsockopt: 0x%x not set\n", 5274 option_name)); 5275 } 5276 #endif /* DEBUG */ 5277 option = &value; 5278 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5279 5280 case SO_DEBUG: 5281 case SO_REUSEADDR: 5282 case SO_KEEPALIVE: 5283 case SO_DONTROUTE: 5284 case SO_BROADCAST: 5285 case SO_USELOOPBACK: 5286 case SO_OOBINLINE: 5287 case SO_DGRAM_ERRIND: 5288 value = (so->so_options & option_name); 5289 #ifdef DEBUG 5290 if (value) { 5291 dprintso(so, 1, 5292 ("sotpi_getsockopt: 0x%x is set\n", 5293 option_name)); 5294 } else { 5295 dprintso(so, 1, 5296 ("sotpi_getsockopt: 0x%x not set\n", 5297 option_name)); 5298 } 5299 #endif /* DEBUG */ 5300 option = &value; 5301 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5302 5303 /* 5304 * The following options are only returned by sockfs when the 5305 * T_SVR4_OPTMGMT_REQ fails. 5306 */ 5307 case SO_LINGER: 5308 option = &so->so_linger; 5309 len = (t_uscalar_t)sizeof (struct linger); 5310 break; 5311 case SO_SNDBUF: { 5312 ssize_t lvalue; 5313 5314 /* 5315 * If the option has not been set then get a default 5316 * value from the read queue. This value is 5317 * returned if the transport fails 5318 * the T_SVR4_OPTMGMT_REQ. 5319 */ 5320 lvalue = so->so_sndbuf; 5321 if (lvalue == 0) { 5322 mutex_exit(&so->so_lock); 5323 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5324 QHIWAT, 0, &lvalue); 5325 mutex_enter(&so->so_lock); 5326 dprintso(so, 1, 5327 ("got SO_SNDBUF %ld from q\n", lvalue)); 5328 } 5329 value = (int)lvalue; 5330 option = &value; 5331 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5332 break; 5333 } 5334 case SO_RCVBUF: { 5335 ssize_t lvalue; 5336 5337 /* 5338 * If the option has not been set then get a default 5339 * value from the read queue. This value is 5340 * returned if the transport fails 5341 * the T_SVR4_OPTMGMT_REQ. 5342 * 5343 * XXX If SO_RCVBUF has been set and this is an 5344 * XPG 4.2 application then do not ask the transport 5345 * since the transport might adjust the value and not 5346 * return exactly what was set by the application. 5347 * For non-XPG 4.2 application we return the value 5348 * that the transport is actually using. 5349 */ 5350 lvalue = so->so_rcvbuf; 5351 if (lvalue == 0) { 5352 mutex_exit(&so->so_lock); 5353 (void) strqget(RD(strvp2wq(SOTOV(so))), 5354 QHIWAT, 0, &lvalue); 5355 mutex_enter(&so->so_lock); 5356 dprintso(so, 1, 5357 ("got SO_RCVBUF %ld from q\n", lvalue)); 5358 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5359 value = (int)lvalue; 5360 option = &value; 5361 goto copyout; /* skip asking transport */ 5362 } 5363 value = (int)lvalue; 5364 option = &value; 5365 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5366 break; 5367 } 5368 case SO_DOMAIN: 5369 value = so->so_family; 5370 option = &value; 5371 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5372 5373 #ifdef notyet 5374 /* 5375 * We do not implement the semantics of these options 5376 * thus we shouldn't implement the options either. 5377 */ 5378 case SO_SNDLOWAT: 5379 value = so->so_sndlowat; 5380 option = &value; 5381 break; 5382 case SO_RCVLOWAT: 5383 value = so->so_rcvlowat; 5384 option = &value; 5385 break; 5386 #endif /* notyet */ 5387 case SO_SNDTIMEO: 5388 case SO_RCVTIMEO: { 5389 clock_t val; 5390 5391 if (option_name == SO_RCVTIMEO) 5392 val = drv_hztousec(so->so_rcvtimeo); 5393 else 5394 val = drv_hztousec(so->so_sndtimeo); 5395 tmo_val.tv_sec = val / (1000 * 1000); 5396 tmo_val.tv_usec = val % (1000 * 1000); 5397 if (get_udatamodel() == DATAMODEL_NONE || 5398 get_udatamodel() == DATAMODEL_NATIVE) { 5399 option = &tmo_val; 5400 len = sizeof (struct timeval); 5401 } else { 5402 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5403 option = &tmo_val32; 5404 len = sizeof (struct timeval32); 5405 } 5406 break; 5407 } 5408 case SO_SND_BUFINFO: { 5409 snd_bufinfo.sbi_wroff = 5410 (so->so_proto_props).sopp_wroff; 5411 snd_bufinfo.sbi_maxblk = 5412 (so->so_proto_props).sopp_maxblk; 5413 snd_bufinfo.sbi_maxpsz = 5414 (so->so_proto_props).sopp_maxpsz; 5415 snd_bufinfo.sbi_tail = 5416 (so->so_proto_props).sopp_tail; 5417 option = &snd_bufinfo; 5418 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5419 break; 5420 } 5421 } 5422 } 5423 5424 mutex_exit(&so->so_lock); 5425 5426 /* Send request */ 5427 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5428 optmgmt_req.MGMT_flags = T_CHECK; 5429 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5430 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5431 5432 oh.level = level; 5433 oh.name = option_name; 5434 oh.len = maxlen; 5435 5436 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5437 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5438 /* Let option management work in the presence of data flow control */ 5439 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5440 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5441 mp = NULL; 5442 mutex_enter(&so->so_lock); 5443 if (error) { 5444 eprintsoline(so, error); 5445 goto done2; 5446 } 5447 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5448 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5449 if (error) { 5450 if (option != NULL) { 5451 /* We have a fallback value */ 5452 error = 0; 5453 goto copyout; 5454 } 5455 eprintsoline(so, error); 5456 goto done2; 5457 } 5458 ASSERT(mp); 5459 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5460 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5461 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5462 if (opt_res == NULL) { 5463 if (option != NULL) { 5464 /* We have a fallback value */ 5465 error = 0; 5466 goto copyout; 5467 } 5468 error = EPROTO; 5469 eprintsoline(so, error); 5470 goto done; 5471 } 5472 option = &opt_res[1]; 5473 5474 /* check to ensure that the option is within bounds */ 5475 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5476 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5477 if (option != NULL) { 5478 /* We have a fallback value */ 5479 error = 0; 5480 goto copyout; 5481 } 5482 error = EPROTO; 5483 eprintsoline(so, error); 5484 goto done; 5485 } 5486 5487 len = opt_res->len; 5488 5489 copyout: { 5490 t_uscalar_t size = MIN(len, maxlen); 5491 bcopy(option, optval, size); 5492 bcopy(&size, optlenp, sizeof (size)); 5493 } 5494 done: 5495 freemsg(mp); 5496 done2: 5497 so_unlock_single(so, SOLOCKED); 5498 mutex_exit(&so->so_lock); 5499 5500 return (error); 5501 } 5502 5503 /* 5504 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5505 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5506 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5507 * setsockopt has to work even if the transport does not support the option. 5508 */ 5509 /* ARGSUSED */ 5510 int 5511 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5512 const void *optval, t_uscalar_t optlen, struct cred *cr) 5513 { 5514 struct T_optmgmt_req optmgmt_req; 5515 struct opthdr oh; 5516 mblk_t *mp; 5517 int error = 0; 5518 boolean_t handled = B_FALSE; 5519 5520 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5521 (void *)so, level, option_name, optval, optlen, 5522 pr_state(so->so_state, so->so_mode))); 5523 5524 /* X/Open requires this check */ 5525 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5526 if (xnet_check_print) 5527 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5528 return (EINVAL); 5529 } 5530 5531 mutex_enter(&so->so_lock); 5532 so_lock_single(so); /* Set SOLOCKED */ 5533 mutex_exit(&so->so_lock); 5534 5535 /* 5536 * For SOCKET or TCP level options, try to set it here itself 5537 * provided socket has not been popped and we know the tcp 5538 * structure (stored in so_priv). 5539 */ 5540 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5541 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5542 (so->so_version == SOV_SOCKSTREAM) && 5543 (so->so_proto_handle != NULL)) { 5544 tcp_t *tcp = (tcp_t *)so->so_proto_handle; 5545 boolean_t onoff; 5546 5547 #define intvalue (*(int32_t *)optval) 5548 5549 switch (level) { 5550 case SOL_SOCKET: 5551 switch (option_name) { /* Check length param */ 5552 case SO_DEBUG: 5553 case SO_REUSEADDR: 5554 case SO_DONTROUTE: 5555 case SO_BROADCAST: 5556 case SO_USELOOPBACK: 5557 case SO_OOBINLINE: 5558 case SO_DGRAM_ERRIND: 5559 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5560 error = EINVAL; 5561 eprintsoline(so, error); 5562 mutex_enter(&so->so_lock); 5563 goto done2; 5564 } 5565 ASSERT(optval); 5566 onoff = intvalue != 0; 5567 handled = B_TRUE; 5568 break; 5569 case SO_SNDTIMEO: 5570 case SO_RCVTIMEO: 5571 if (get_udatamodel() == DATAMODEL_NONE || 5572 get_udatamodel() == DATAMODEL_NATIVE) { 5573 if (optlen != 5574 sizeof (struct timeval)) { 5575 error = EINVAL; 5576 eprintsoline(so, error); 5577 mutex_enter(&so->so_lock); 5578 goto done2; 5579 } 5580 } else { 5581 if (optlen != 5582 sizeof (struct timeval32)) { 5583 error = EINVAL; 5584 eprintsoline(so, error); 5585 mutex_enter(&so->so_lock); 5586 goto done2; 5587 } 5588 } 5589 ASSERT(optval); 5590 handled = B_TRUE; 5591 break; 5592 case SO_LINGER: 5593 if (optlen != 5594 (t_uscalar_t)sizeof (struct linger)) { 5595 error = EINVAL; 5596 eprintsoline(so, error); 5597 mutex_enter(&so->so_lock); 5598 goto done2; 5599 } 5600 ASSERT(optval); 5601 handled = B_TRUE; 5602 break; 5603 } 5604 5605 switch (option_name) { /* Do actions */ 5606 case SO_LINGER: { 5607 struct linger *lgr = (struct linger *)optval; 5608 5609 if (lgr->l_onoff) { 5610 tcp->tcp_linger = 1; 5611 tcp->tcp_lingertime = lgr->l_linger; 5612 so->so_linger.l_onoff = SO_LINGER; 5613 so->so_options |= SO_LINGER; 5614 } else { 5615 tcp->tcp_linger = 0; 5616 tcp->tcp_lingertime = 0; 5617 so->so_linger.l_onoff = 0; 5618 so->so_options &= ~SO_LINGER; 5619 } 5620 so->so_linger.l_linger = lgr->l_linger; 5621 handled = B_TRUE; 5622 break; 5623 } 5624 case SO_SNDTIMEO: 5625 case SO_RCVTIMEO: { 5626 struct timeval tl; 5627 clock_t val; 5628 5629 if (get_udatamodel() == DATAMODEL_NONE || 5630 get_udatamodel() == DATAMODEL_NATIVE) 5631 bcopy(&tl, (struct timeval *)optval, 5632 sizeof (struct timeval)); 5633 else 5634 TIMEVAL32_TO_TIMEVAL(&tl, 5635 (struct timeval32 *)optval); 5636 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5637 if (option_name == SO_RCVTIMEO) 5638 so->so_rcvtimeo = drv_usectohz(val); 5639 else 5640 so->so_sndtimeo = drv_usectohz(val); 5641 break; 5642 } 5643 5644 case SO_DEBUG: 5645 tcp->tcp_debug = onoff; 5646 #ifdef SOCK_TEST 5647 if (intvalue & 2) 5648 sock_test_timelimit = 10 * hz; 5649 else 5650 sock_test_timelimit = 0; 5651 5652 if (intvalue & 4) 5653 do_useracc = 0; 5654 else 5655 do_useracc = 1; 5656 #endif /* SOCK_TEST */ 5657 break; 5658 case SO_DONTROUTE: 5659 /* 5660 * SO_DONTROUTE, SO_USELOOPBACK and 5661 * SO_BROADCAST are only of interest to IP. 5662 * We track them here only so 5663 * that we can report their current value. 5664 */ 5665 tcp->tcp_dontroute = onoff; 5666 if (onoff) 5667 so->so_options |= option_name; 5668 else 5669 so->so_options &= ~option_name; 5670 break; 5671 case SO_USELOOPBACK: 5672 tcp->tcp_useloopback = onoff; 5673 if (onoff) 5674 so->so_options |= option_name; 5675 else 5676 so->so_options &= ~option_name; 5677 break; 5678 case SO_BROADCAST: 5679 tcp->tcp_broadcast = onoff; 5680 if (onoff) 5681 so->so_options |= option_name; 5682 else 5683 so->so_options &= ~option_name; 5684 break; 5685 case SO_REUSEADDR: 5686 tcp->tcp_reuseaddr = onoff; 5687 if (onoff) 5688 so->so_options |= option_name; 5689 else 5690 so->so_options &= ~option_name; 5691 break; 5692 case SO_OOBINLINE: 5693 tcp->tcp_oobinline = onoff; 5694 if (onoff) 5695 so->so_options |= option_name; 5696 else 5697 so->so_options &= ~option_name; 5698 break; 5699 case SO_DGRAM_ERRIND: 5700 tcp->tcp_dgram_errind = onoff; 5701 if (onoff) 5702 so->so_options |= option_name; 5703 else 5704 so->so_options &= ~option_name; 5705 break; 5706 } 5707 break; 5708 case IPPROTO_TCP: 5709 switch (option_name) { 5710 case TCP_NODELAY: 5711 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5712 error = EINVAL; 5713 eprintsoline(so, error); 5714 mutex_enter(&so->so_lock); 5715 goto done2; 5716 } 5717 ASSERT(optval); 5718 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5719 handled = B_TRUE; 5720 break; 5721 } 5722 break; 5723 default: 5724 handled = B_FALSE; 5725 break; 5726 } 5727 } 5728 5729 if (handled) { 5730 mutex_enter(&so->so_lock); 5731 goto done2; 5732 } 5733 5734 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5735 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5736 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5737 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5738 5739 oh.level = level; 5740 oh.name = option_name; 5741 oh.len = optlen; 5742 5743 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5744 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5745 /* Let option management work in the presence of data flow control */ 5746 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5747 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5748 mp = NULL; 5749 mutex_enter(&so->so_lock); 5750 if (error) { 5751 eprintsoline(so, error); 5752 goto done2; 5753 } 5754 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5755 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5756 if (error) { 5757 eprintsoline(so, error); 5758 goto done; 5759 } 5760 ASSERT(mp); 5761 /* No need to verify T_optmgmt_ack */ 5762 freemsg(mp); 5763 done: 5764 /* 5765 * Check for SOL_SOCKET options and record their values. 5766 * If we know about a SOL_SOCKET parameter and the transport 5767 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5768 * EPROTO) we let the setsockopt succeed. 5769 */ 5770 if (level == SOL_SOCKET) { 5771 /* Check parameters */ 5772 switch (option_name) { 5773 case SO_DEBUG: 5774 case SO_REUSEADDR: 5775 case SO_KEEPALIVE: 5776 case SO_DONTROUTE: 5777 case SO_BROADCAST: 5778 case SO_USELOOPBACK: 5779 case SO_OOBINLINE: 5780 case SO_SNDBUF: 5781 case SO_RCVBUF: 5782 #ifdef notyet 5783 case SO_SNDLOWAT: 5784 case SO_RCVLOWAT: 5785 #endif /* notyet */ 5786 case SO_DGRAM_ERRIND: 5787 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5788 error = EINVAL; 5789 eprintsoline(so, error); 5790 goto done2; 5791 } 5792 ASSERT(optval); 5793 handled = B_TRUE; 5794 break; 5795 case SO_SNDTIMEO: 5796 case SO_RCVTIMEO: 5797 if (get_udatamodel() == DATAMODEL_NONE || 5798 get_udatamodel() == DATAMODEL_NATIVE) { 5799 if (optlen != sizeof (struct timeval)) { 5800 error = EINVAL; 5801 eprintsoline(so, error); 5802 goto done2; 5803 } 5804 } else { 5805 if (optlen != sizeof (struct timeval32)) { 5806 error = EINVAL; 5807 eprintsoline(so, error); 5808 goto done2; 5809 } 5810 } 5811 ASSERT(optval); 5812 handled = B_TRUE; 5813 break; 5814 case SO_LINGER: 5815 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5816 error = EINVAL; 5817 eprintsoline(so, error); 5818 goto done2; 5819 } 5820 ASSERT(optval); 5821 handled = B_TRUE; 5822 break; 5823 } 5824 5825 #define intvalue (*(int32_t *)optval) 5826 5827 switch (option_name) { 5828 case SO_TYPE: 5829 case SO_ERROR: 5830 case SO_ACCEPTCONN: 5831 /* Can't be set */ 5832 error = ENOPROTOOPT; 5833 goto done2; 5834 case SO_LINGER: { 5835 struct linger *l = (struct linger *)optval; 5836 5837 so->so_linger.l_linger = l->l_linger; 5838 if (l->l_onoff) { 5839 so->so_linger.l_onoff = SO_LINGER; 5840 so->so_options |= SO_LINGER; 5841 } else { 5842 so->so_linger.l_onoff = 0; 5843 so->so_options &= ~SO_LINGER; 5844 } 5845 break; 5846 } 5847 5848 case SO_DEBUG: 5849 #ifdef SOCK_TEST 5850 if (intvalue & 2) 5851 sock_test_timelimit = 10 * hz; 5852 else 5853 sock_test_timelimit = 0; 5854 5855 if (intvalue & 4) 5856 do_useracc = 0; 5857 else 5858 do_useracc = 1; 5859 #endif /* SOCK_TEST */ 5860 /* FALLTHRU */ 5861 case SO_REUSEADDR: 5862 case SO_KEEPALIVE: 5863 case SO_DONTROUTE: 5864 case SO_BROADCAST: 5865 case SO_USELOOPBACK: 5866 case SO_OOBINLINE: 5867 case SO_DGRAM_ERRIND: 5868 if (intvalue != 0) { 5869 dprintso(so, 1, 5870 ("socket_setsockopt: setting 0x%x\n", 5871 option_name)); 5872 so->so_options |= option_name; 5873 } else { 5874 dprintso(so, 1, 5875 ("socket_setsockopt: clearing 0x%x\n", 5876 option_name)); 5877 so->so_options &= ~option_name; 5878 } 5879 break; 5880 /* 5881 * The following options are only returned by us when the 5882 * transport layer fails. 5883 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5884 * since the transport might adjust the value and not 5885 * return exactly what was set by the application. 5886 */ 5887 case SO_SNDBUF: 5888 so->so_sndbuf = intvalue; 5889 break; 5890 case SO_RCVBUF: 5891 so->so_rcvbuf = intvalue; 5892 break; 5893 case SO_RCVPSH: 5894 so->so_rcv_timer_interval = intvalue; 5895 break; 5896 #ifdef notyet 5897 /* 5898 * We do not implement the semantics of these options 5899 * thus we shouldn't implement the options either. 5900 */ 5901 case SO_SNDLOWAT: 5902 so->so_sndlowat = intvalue; 5903 break; 5904 case SO_RCVLOWAT: 5905 so->so_rcvlowat = intvalue; 5906 break; 5907 #endif /* notyet */ 5908 case SO_SNDTIMEO: 5909 case SO_RCVTIMEO: { 5910 struct timeval tl; 5911 clock_t val; 5912 5913 if (get_udatamodel() == DATAMODEL_NONE || 5914 get_udatamodel() == DATAMODEL_NATIVE) 5915 bcopy(&tl, (struct timeval *)optval, 5916 sizeof (struct timeval)); 5917 else 5918 TIMEVAL32_TO_TIMEVAL(&tl, 5919 (struct timeval32 *)optval); 5920 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5921 if (option_name == SO_RCVTIMEO) 5922 so->so_rcvtimeo = drv_usectohz(val); 5923 else 5924 so->so_sndtimeo = drv_usectohz(val); 5925 break; 5926 } 5927 } 5928 #undef intvalue 5929 5930 if (error) { 5931 if ((error == ENOPROTOOPT || error == EPROTO || 5932 error == EINVAL) && handled) { 5933 dprintso(so, 1, 5934 ("setsockopt: ignoring error %d for 0x%x\n", 5935 error, option_name)); 5936 error = 0; 5937 } 5938 } 5939 } 5940 done2: 5941 so_unlock_single(so, SOLOCKED); 5942 mutex_exit(&so->so_lock); 5943 return (error); 5944 } 5945 5946 /* 5947 * sotpi_close() is called when the last open reference goes away. 5948 */ 5949 /* ARGSUSED */ 5950 int 5951 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5952 { 5953 struct vnode *vp = SOTOV(so); 5954 dev_t dev; 5955 int error = 0; 5956 sotpi_info_t *sti = SOTOTPI(so); 5957 5958 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5959 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5960 5961 dev = sti->sti_dev; 5962 5963 ASSERT(STREAMSTAB(getmajor(dev))); 5964 5965 mutex_enter(&so->so_lock); 5966 so_lock_single(so); /* Set SOLOCKED */ 5967 5968 ASSERT(so_verify_oobstate(so)); 5969 5970 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5971 sti->sti_nl7c_flags = 0; 5972 nl7c_close(so); 5973 } 5974 5975 if (vp->v_stream != NULL) { 5976 vnode_t *ux_vp; 5977 5978 if (so->so_family == AF_UNIX) { 5979 /* Could avoid this when CANTSENDMORE for !dgram */ 5980 so_unix_close(so); 5981 } 5982 5983 mutex_exit(&so->so_lock); 5984 /* 5985 * Disassemble the linkage from the AF_UNIX underlying file 5986 * system vnode to this socket (by atomically clearing 5987 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5988 * and frees the stream head. 5989 */ 5990 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5991 ASSERT(ux_vp->v_stream); 5992 sti->sti_ux_bound_vp = NULL; 5993 vn_rele_stream(ux_vp); 5994 } 5995 if (so->so_family == AF_INET || so->so_family == AF_INET6) { 5996 strsetrwputdatahooks(SOTOV(so), NULL, NULL); 5997 if (sti->sti_kssl_ent != NULL) { 5998 kssl_release_ent(sti->sti_kssl_ent, so, 5999 sti->sti_kssl_type); 6000 sti->sti_kssl_ent = NULL; 6001 } 6002 if (sti->sti_kssl_ctx != NULL) { 6003 kssl_release_ctx(sti->sti_kssl_ctx); 6004 sti->sti_kssl_ctx = NULL; 6005 } 6006 sti->sti_kssl_type = KSSL_NO_PROXY; 6007 } 6008 error = strclose(vp, flag, cr); 6009 vp->v_stream = NULL; 6010 mutex_enter(&so->so_lock); 6011 } 6012 6013 /* 6014 * Flush the T_DISCON_IND on sti_discon_ind_mp. 6015 */ 6016 so_flush_discon_ind(so); 6017 6018 so_unlock_single(so, SOLOCKED); 6019 mutex_exit(&so->so_lock); 6020 6021 /* 6022 * Needed for STREAMs. 6023 * Decrement the device driver's reference count for streams 6024 * opened via the clone dip. The driver was held in clone_open(). 6025 * The absence of clone_close() forces this asymmetry. 6026 */ 6027 if (so->so_flag & SOCLONE) 6028 ddi_rele_driver(getmajor(dev)); 6029 6030 return (error); 6031 } 6032 6033 static int 6034 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 6035 struct cred *cr, int32_t *rvalp) 6036 { 6037 struct vnode *vp = SOTOV(so); 6038 sotpi_info_t *sti = SOTOTPI(so); 6039 int error = 0; 6040 6041 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 6042 cmd, arg, pr_state(so->so_state, so->so_mode))); 6043 6044 switch (cmd) { 6045 case SIOCSQPTR: 6046 /* 6047 * SIOCSQPTR is valid only when helper stream is created 6048 * by the protocol. 6049 */ 6050 case _I_INSERT: 6051 case _I_REMOVE: 6052 /* 6053 * Since there's no compelling reason to support these ioctls 6054 * on sockets, and doing so would increase the complexity 6055 * markedly, prevent it. 6056 */ 6057 return (EOPNOTSUPP); 6058 6059 case I_FIND: 6060 case I_LIST: 6061 case I_LOOK: 6062 case I_POP: 6063 case I_PUSH: 6064 /* 6065 * To prevent races and inconsistencies between the actual 6066 * state of the stream and the state according to the sonode, 6067 * we serialize all operations which modify or operate on the 6068 * list of modules on the socket's stream. 6069 */ 6070 mutex_enter(&sti->sti_plumb_lock); 6071 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 6072 mutex_exit(&sti->sti_plumb_lock); 6073 return (error); 6074 6075 default: 6076 if (so->so_version != SOV_STREAM) 6077 break; 6078 6079 /* 6080 * The imaginary "sockmod" has been popped; act as a stream. 6081 */ 6082 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6083 } 6084 6085 ASSERT(so->so_version != SOV_STREAM); 6086 6087 /* 6088 * Process socket-specific ioctls. 6089 */ 6090 switch (cmd) { 6091 case FIONBIO: { 6092 int32_t value; 6093 6094 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6095 (mode & (int)FKIOCTL))) 6096 return (EFAULT); 6097 6098 mutex_enter(&so->so_lock); 6099 if (value) { 6100 so->so_state |= SS_NDELAY; 6101 } else { 6102 so->so_state &= ~SS_NDELAY; 6103 } 6104 mutex_exit(&so->so_lock); 6105 return (0); 6106 } 6107 6108 case FIOASYNC: { 6109 int32_t value; 6110 6111 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6112 (mode & (int)FKIOCTL))) 6113 return (EFAULT); 6114 6115 mutex_enter(&so->so_lock); 6116 /* 6117 * SS_ASYNC flag not already set correctly? 6118 * (!value != !(so->so_state & SS_ASYNC)) 6119 * but some engineers find that too hard to read. 6120 */ 6121 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 6122 value != 0 && (so->so_state & SS_ASYNC) == 0) 6123 error = so_flip_async(so, vp, mode, cr); 6124 mutex_exit(&so->so_lock); 6125 return (error); 6126 } 6127 6128 case SIOCSPGRP: 6129 case FIOSETOWN: { 6130 pid_t pgrp; 6131 6132 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 6133 (mode & (int)FKIOCTL))) 6134 return (EFAULT); 6135 6136 mutex_enter(&so->so_lock); 6137 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 6138 /* Any change? */ 6139 if (pgrp != so->so_pgrp) 6140 error = so_set_siggrp(so, vp, pgrp, mode, cr); 6141 mutex_exit(&so->so_lock); 6142 return (error); 6143 } 6144 case SIOCGPGRP: 6145 case FIOGETOWN: 6146 if (so_copyout(&so->so_pgrp, (void *)arg, 6147 sizeof (pid_t), (mode & (int)FKIOCTL))) 6148 return (EFAULT); 6149 return (0); 6150 6151 case SIOCATMARK: { 6152 int retval; 6153 uint_t so_state; 6154 6155 /* 6156 * strwaitmark has a finite timeout after which it 6157 * returns -1 if the mark state is undetermined. 6158 * In order to avoid any race between the mark state 6159 * in sockfs and the mark state in the stream head this 6160 * routine loops until the mark state can be determined 6161 * (or the urgent data indication has been removed by some 6162 * other thread). 6163 */ 6164 do { 6165 mutex_enter(&so->so_lock); 6166 so_state = so->so_state; 6167 mutex_exit(&so->so_lock); 6168 if (so_state & SS_RCVATMARK) { 6169 retval = 1; 6170 } else if (!(so_state & SS_OOBPEND)) { 6171 /* 6172 * No SIGURG has been generated -- there is no 6173 * pending or present urgent data. Thus can't 6174 * possibly be at the mark. 6175 */ 6176 retval = 0; 6177 } else { 6178 /* 6179 * Have the stream head wait until there is 6180 * either some messages on the read queue, or 6181 * STRATMARK or STRNOTATMARK gets set. The 6182 * STRNOTATMARK flag is used so that the 6183 * transport can send up a MSGNOTMARKNEXT 6184 * M_DATA to indicate that it is not 6185 * at the mark and additional data is not about 6186 * to be send upstream. 6187 * 6188 * If the mark state is undetermined this will 6189 * return -1 and we will loop rechecking the 6190 * socket state. 6191 */ 6192 retval = strwaitmark(vp); 6193 } 6194 } while (retval == -1); 6195 6196 if (so_copyout(&retval, (void *)arg, sizeof (int), 6197 (mode & (int)FKIOCTL))) 6198 return (EFAULT); 6199 return (0); 6200 } 6201 6202 case I_FDINSERT: 6203 case I_SENDFD: 6204 case I_RECVFD: 6205 case I_ATMARK: 6206 case _SIOCSOCKFALLBACK: 6207 /* 6208 * These ioctls do not apply to sockets. I_FDINSERT can be 6209 * used to send M_PROTO messages without modifying the socket 6210 * state. I_SENDFD/RECVFD should not be used for socket file 6211 * descriptor passing since they assume a twisted stream. 6212 * SIOCATMARK must be used instead of I_ATMARK. 6213 * 6214 * _SIOCSOCKFALLBACK from an application should never be 6215 * processed. It is only generated by socktpi_open() or 6216 * in response to I_POP or I_PUSH. 6217 */ 6218 #ifdef DEBUG 6219 zcmn_err(getzoneid(), CE_WARN, 6220 "Unsupported STREAMS ioctl 0x%x on socket. " 6221 "Pid = %d\n", cmd, curproc->p_pid); 6222 #endif /* DEBUG */ 6223 return (EOPNOTSUPP); 6224 6225 case _I_GETPEERCRED: 6226 if ((mode & FKIOCTL) == 0) 6227 return (EINVAL); 6228 6229 mutex_enter(&so->so_lock); 6230 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6231 error = ENOTSUP; 6232 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6233 error = ENOTCONN; 6234 } else if (so->so_peercred != NULL) { 6235 k_peercred_t *kp = (k_peercred_t *)arg; 6236 kp->pc_cr = so->so_peercred; 6237 kp->pc_cpid = so->so_cpid; 6238 crhold(so->so_peercred); 6239 } else { 6240 error = EINVAL; 6241 } 6242 mutex_exit(&so->so_lock); 6243 return (error); 6244 6245 default: 6246 /* 6247 * Do the higher-order bits of the ioctl cmd indicate 6248 * that it is an I_* streams ioctl? 6249 */ 6250 if ((cmd & 0xffffff00U) == STR && 6251 so->so_version == SOV_SOCKBSD) { 6252 #ifdef DEBUG 6253 zcmn_err(getzoneid(), CE_WARN, 6254 "Unsupported STREAMS ioctl 0x%x on socket. " 6255 "Pid = %d\n", cmd, curproc->p_pid); 6256 #endif /* DEBUG */ 6257 return (EOPNOTSUPP); 6258 } 6259 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6260 } 6261 } 6262 6263 /* 6264 * Handle plumbing-related ioctls. 6265 */ 6266 static int 6267 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6268 struct cred *cr, int32_t *rvalp) 6269 { 6270 static const char sockmod_name[] = "sockmod"; 6271 struct sonode *so = VTOSO(vp); 6272 char mname[FMNAMESZ + 1]; 6273 int error; 6274 sotpi_info_t *sti = SOTOTPI(so); 6275 6276 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6277 6278 if (so->so_version == SOV_SOCKBSD) 6279 return (EOPNOTSUPP); 6280 6281 if (so->so_version == SOV_STREAM) { 6282 /* 6283 * The imaginary "sockmod" has been popped - act as a stream. 6284 * If this is a push of sockmod then change back to a socket. 6285 */ 6286 if (cmd == I_PUSH) { 6287 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6288 (void *)arg, mname, sizeof (mname), NULL); 6289 6290 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6291 dprintso(so, 0, ("socktpi_ioctl: going to " 6292 "socket version\n")); 6293 so_stream2sock(so); 6294 return (0); 6295 } 6296 } 6297 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6298 } 6299 6300 switch (cmd) { 6301 case I_PUSH: 6302 if (sti->sti_direct) { 6303 mutex_enter(&so->so_lock); 6304 so_lock_single(so); 6305 mutex_exit(&so->so_lock); 6306 6307 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6308 cr, rvalp); 6309 6310 mutex_enter(&so->so_lock); 6311 if (error == 0) 6312 sti->sti_direct = 0; 6313 so_unlock_single(so, SOLOCKED); 6314 mutex_exit(&so->so_lock); 6315 6316 if (error != 0) 6317 return (error); 6318 } 6319 6320 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6321 if (error == 0) 6322 sti->sti_pushcnt++; 6323 return (error); 6324 6325 case I_POP: 6326 if (sti->sti_pushcnt == 0) { 6327 /* Emulate sockmod being popped */ 6328 dprintso(so, 0, 6329 ("socktpi_ioctl: going to STREAMS version\n")); 6330 return (so_sock2stream(so)); 6331 } 6332 6333 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6334 if (error == 0) 6335 sti->sti_pushcnt--; 6336 return (error); 6337 6338 case I_LIST: { 6339 struct str_mlist *kmlistp, *umlistp; 6340 struct str_list kstrlist; 6341 ssize_t kstrlistsize; 6342 int i, nmods; 6343 6344 STRUCT_DECL(str_list, ustrlist); 6345 STRUCT_INIT(ustrlist, mode); 6346 6347 if (arg == NULL) { 6348 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6349 if (error == 0) 6350 (*rvalp)++; /* Add one for sockmod */ 6351 return (error); 6352 } 6353 6354 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6355 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6356 if (error != 0) 6357 return (error); 6358 6359 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6360 if (nmods <= 0) 6361 return (EINVAL); 6362 /* 6363 * Ceiling nmods at nstrpush to prevent someone from 6364 * maliciously consuming lots of kernel memory. 6365 */ 6366 nmods = MIN(nmods, nstrpush); 6367 6368 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6369 kstrlist.sl_nmods = nmods; 6370 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6371 6372 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6373 cr, rvalp); 6374 if (error != 0) 6375 goto done; 6376 6377 /* 6378 * Considering the module list as a 0-based array of sl_nmods 6379 * modules, sockmod should conceptually exist at slot 6380 * sti_pushcnt. Insert sockmod at this location by sliding all 6381 * of the module names after so_pushcnt over by one. We know 6382 * that there will be room to do this since we allocated 6383 * sl_modlist with an additional slot. 6384 */ 6385 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6386 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6387 6388 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6389 kstrlist.sl_nmods++; 6390 6391 /* 6392 * Copy all of the entries out to ustrlist. 6393 */ 6394 kmlistp = kstrlist.sl_modlist; 6395 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6396 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6397 error = so_copyout(kmlistp++, umlistp++, 6398 sizeof (struct str_mlist), mode & FKIOCTL); 6399 if (error != 0) 6400 goto done; 6401 } 6402 6403 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6404 mode & FKIOCTL); 6405 if (error == 0) 6406 *rvalp = 0; 6407 done: 6408 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6409 return (error); 6410 } 6411 case I_LOOK: 6412 if (sti->sti_pushcnt == 0) { 6413 return (so_copyout(sockmod_name, (void *)arg, 6414 sizeof (sockmod_name), mode & FKIOCTL)); 6415 } 6416 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6417 6418 case I_FIND: 6419 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6420 if (error && error != EINVAL) 6421 return (error); 6422 6423 /* if not found and string was sockmod return 1 */ 6424 if (*rvalp == 0 || error == EINVAL) { 6425 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6426 (void *)arg, mname, sizeof (mname), NULL); 6427 if (error == ENAMETOOLONG) 6428 error = EINVAL; 6429 6430 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6431 *rvalp = 1; 6432 } 6433 return (error); 6434 6435 default: 6436 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6437 break; 6438 } 6439 6440 return (0); 6441 } 6442 6443 /* 6444 * Wrapper around the streams poll routine that implements socket poll 6445 * semantics. 6446 * The sockfs never calls pollwakeup itself - the stream head take care 6447 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6448 * stream head there can never be a deadlock due to holding so_lock across 6449 * pollwakeup and acquiring so_lock in this routine. 6450 * 6451 * However, since the performance of VOP_POLL is critical we avoid 6452 * acquiring so_lock here. This is based on two assumptions: 6453 * - The poll implementation holds locks to serialize the VOP_POLL call 6454 * and a pollwakeup for the same pollhead. This ensures that should 6455 * e.g. so_state change during a socktpi_poll call the pollwakeup 6456 * (which strsock_* and strrput conspire to issue) is issued after 6457 * the state change. Thus the pollwakeup will block until VOP_POLL has 6458 * returned and then wake up poll and have it call VOP_POLL again. 6459 * - The reading of so_state without holding so_lock does not result in 6460 * stale data that is older than the latest state change that has dropped 6461 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6462 * memory barrier to force the data into the coherency domain. 6463 */ 6464 static int 6465 sotpi_poll( 6466 struct sonode *so, 6467 short events, 6468 int anyyet, 6469 short *reventsp, 6470 struct pollhead **phpp) 6471 { 6472 short origevents = events; 6473 struct vnode *vp = SOTOV(so); 6474 int error; 6475 int so_state = so->so_state; /* snapshot */ 6476 sotpi_info_t *sti = SOTOTPI(so); 6477 6478 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6479 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6480 6481 ASSERT(vp->v_type == VSOCK); 6482 ASSERT(vp->v_stream != NULL); 6483 6484 if (so->so_version == SOV_STREAM) { 6485 /* The imaginary "sockmod" has been popped - act as a stream */ 6486 return (strpoll(vp->v_stream, events, anyyet, 6487 reventsp, phpp)); 6488 } 6489 6490 if (!(so_state & SS_ISCONNECTED) && 6491 (so->so_mode & SM_CONNREQUIRED)) { 6492 /* Not connected yet - turn off write side events */ 6493 events &= ~(POLLOUT|POLLWRBAND); 6494 } 6495 /* 6496 * Check for errors without calling strpoll if the caller wants them. 6497 * In sockets the errors are represented as input/output events 6498 * and there is no need to ask the stream head for this information. 6499 */ 6500 if (so->so_error != 0 && 6501 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6502 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6503 return (0); 6504 } 6505 /* 6506 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6507 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6508 * will not trigger a POLLIN event with POLLRDDATA set. 6509 * The handling of urgent data (causing POLLRDBAND) is done by 6510 * inspecting SS_OOBPEND below. 6511 */ 6512 events |= POLLRDDATA; 6513 6514 /* 6515 * After shutdown(output) a stream head write error is set. 6516 * However, we should not return output events. 6517 */ 6518 events |= POLLNOERR; 6519 error = strpoll(vp->v_stream, events, anyyet, 6520 reventsp, phpp); 6521 if (error) 6522 return (error); 6523 6524 ASSERT(!(*reventsp & POLLERR)); 6525 6526 /* 6527 * Notes on T_CONN_IND handling for sockets. 6528 * 6529 * If strpoll() returned without events, SR_POLLIN is guaranteed 6530 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6531 * 6532 * Since the so_lock is not held, soqueueconnind() may have run 6533 * and a T_CONN_IND may be waiting. We now check for any queued 6534 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6535 * to ensure poll returns. 6536 * 6537 * However: 6538 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6539 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6540 * the following actions will occur; taken together they ensure the 6541 * syscall will return. 6542 * 6543 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6544 * the accept() was run on a non-blocking socket sowaitconnind() 6545 * may have already returned EWOULDBLOCK, so not be waiting to 6546 * process the message. Additionally socktpi_poll() has probably 6547 * proceeded past the sti_conn_ind_head check below. 6548 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6549 * this thread, however that could occur before poll_common() 6550 * has entered cv_wait. 6551 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6552 * 6553 * Before proceeding to cv_wait() in poll_common() for an event, 6554 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6555 * and if set, re-calls strpoll() to ensure the late arriving 6556 * T_CONN_IND is recognized, and pollsys() returns. 6557 */ 6558 6559 if (sti->sti_conn_ind_head != NULL) 6560 *reventsp |= (POLLIN|POLLRDNORM) & events; 6561 6562 if (so->so_state & SS_OOBPEND) 6563 *reventsp |= POLLRDBAND & events; 6564 6565 if (sti->sti_nl7c_rcv_mp != NULL) { 6566 *reventsp |= (POLLIN|POLLRDNORM) & events; 6567 } 6568 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6569 ((POLLIN|POLLRDNORM) & *reventsp)) { 6570 sti->sti_nl7c_flags |= NL7C_POLLIN; 6571 } 6572 6573 return (0); 6574 } 6575 6576 /*ARGSUSED*/ 6577 static int 6578 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6579 { 6580 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6581 int error = 0; 6582 6583 error = sonode_constructor(buf, cdrarg, kmflags); 6584 if (error != 0) 6585 return (error); 6586 6587 error = i_sotpi_info_constructor(&st->st_info); 6588 if (error != 0) 6589 sonode_destructor(buf, cdrarg); 6590 6591 st->st_sonode.so_priv = &st->st_info; 6592 6593 return (error); 6594 } 6595 6596 /*ARGSUSED1*/ 6597 static void 6598 socktpi_destructor(void *buf, void *cdrarg) 6599 { 6600 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6601 6602 ASSERT(st->st_sonode.so_priv == &st->st_info); 6603 st->st_sonode.so_priv = NULL; 6604 6605 i_sotpi_info_destructor(&st->st_info); 6606 sonode_destructor(buf, cdrarg); 6607 } 6608 6609 static int 6610 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6611 { 6612 int retval; 6613 6614 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6615 struct sonode *so = (struct sonode *)buf; 6616 sotpi_info_t *sti = SOTOTPI(so); 6617 6618 mutex_enter(&socklist.sl_lock); 6619 6620 sti->sti_next_so = socklist.sl_list; 6621 sti->sti_prev_so = NULL; 6622 if (sti->sti_next_so != NULL) 6623 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6624 socklist.sl_list = so; 6625 6626 mutex_exit(&socklist.sl_lock); 6627 6628 } 6629 return (retval); 6630 } 6631 6632 static void 6633 socktpi_unix_destructor(void *buf, void *cdrarg) 6634 { 6635 struct sonode *so = (struct sonode *)buf; 6636 sotpi_info_t *sti = SOTOTPI(so); 6637 6638 mutex_enter(&socklist.sl_lock); 6639 6640 if (sti->sti_next_so != NULL) 6641 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6642 if (sti->sti_prev_so != NULL) 6643 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6644 else 6645 socklist.sl_list = sti->sti_next_so; 6646 6647 mutex_exit(&socklist.sl_lock); 6648 6649 socktpi_destructor(buf, cdrarg); 6650 } 6651 6652 int 6653 socktpi_init(void) 6654 { 6655 /* 6656 * Create sonode caches. We create a special one for AF_UNIX so 6657 * that we can track them for netstat(1m). 6658 */ 6659 socktpi_cache = kmem_cache_create("socktpi_cache", 6660 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6661 socktpi_destructor, NULL, NULL, NULL, 0); 6662 6663 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6664 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6665 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6666 6667 return (0); 6668 } 6669 6670 /* 6671 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6672 * 6673 * Caller must still update state and mode using sotpi_update_state(). 6674 */ 6675 int 6676 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6677 boolean_t *direct, queue_t **qp, struct cred *cr) 6678 { 6679 sotpi_info_t *sti; 6680 struct sockparams *origsp = so->so_sockparams; 6681 sock_lower_handle_t handle = so->so_proto_handle; 6682 struct stdata *stp; 6683 struct vnode *vp; 6684 queue_t *q; 6685 int error = 0; 6686 6687 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6688 SS_FALLBACK_PENDING); 6689 ASSERT(SOCK_IS_NONSTR(so)); 6690 6691 *qp = NULL; 6692 *direct = B_FALSE; 6693 so->so_sockparams = newsp; 6694 /* 6695 * Allocate and initalize fields required by TPI. 6696 */ 6697 (void) sotpi_info_create(so, KM_SLEEP); 6698 sotpi_info_init(so); 6699 6700 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6701 sotpi_info_fini(so); 6702 sotpi_info_destroy(so); 6703 return (error); 6704 } 6705 ASSERT(handle == so->so_proto_handle); 6706 sti = SOTOTPI(so); 6707 if (sti->sti_direct != 0) 6708 *direct = B_TRUE; 6709 6710 /* 6711 * When it comes to urgent data we have two cases to deal with; 6712 * (1) The oob byte has already arrived, or (2) the protocol has 6713 * notified that oob data is pending, but it has not yet arrived. 6714 * 6715 * For (1) all we need to do is send a T_EXDATA_IND to indicate were 6716 * in the byte stream the oob byte is. For (2) we have to send a 6717 * SIGURG (M_PCSIG), followed by a zero-length mblk indicating whether 6718 * the oob byte will be the next byte from the protocol. 6719 * 6720 * So in the worst case we need two mblks, one for the signal, another 6721 * for mark indication. In that case we use the exdata_mp for the sig. 6722 */ 6723 sti->sti_exdata_mp = allocb_wait(sizeof (struct T_exdata_ind), BPRI_MED, 6724 STR_NOSIG, NULL); 6725 sti->sti_urgmark_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL); 6726 6727 /* 6728 * Keep the original sp around so we can properly dispose of the 6729 * sonode when the socket is being closed. 6730 */ 6731 sti->sti_orig_sp = origsp; 6732 6733 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6734 so_alloc_addr(so, so->so_max_addr_len); 6735 6736 /* 6737 * If the application has done a SIOCSPGRP, make sure the 6738 * STREAM head is aware. This needs to take place before 6739 * the protocol start sending up messages. Otherwise we 6740 * might miss to generate SIGPOLL. 6741 * 6742 * It is possible that the application will receive duplicate 6743 * signals if some were already generated for either data or 6744 * connection indications. 6745 */ 6746 if (so->so_pgrp != 0) { 6747 if (so_set_events(so, so->so_vnode, cr) != 0) 6748 so->so_pgrp = 0; 6749 } 6750 6751 /* 6752 * Determine which queue to use. 6753 */ 6754 vp = SOTOV(so); 6755 stp = vp->v_stream; 6756 ASSERT(stp != NULL); 6757 q = stp->sd_wrq->q_next; 6758 6759 /* 6760 * Skip any modules that may have been auto pushed when the device 6761 * was opened 6762 */ 6763 while (q->q_next != NULL) 6764 q = q->q_next; 6765 *qp = _RD(q); 6766 6767 /* This is now a STREAMS sockets */ 6768 so->so_not_str = B_FALSE; 6769 6770 return (error); 6771 } 6772 6773 /* 6774 * Revert a TPI sonode. It is only allowed to revert the sonode during 6775 * the fallback process. 6776 */ 6777 void 6778 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6779 { 6780 vnode_t *vp = SOTOV(so); 6781 6782 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6783 SS_FALLBACK_PENDING); 6784 ASSERT(!SOCK_IS_NONSTR(so)); 6785 ASSERT(vp->v_stream != NULL); 6786 6787 if (SOTOTPI(so)->sti_exdata_mp != NULL) { 6788 freeb(SOTOTPI(so)->sti_exdata_mp); 6789 SOTOTPI(so)->sti_exdata_mp = NULL; 6790 } 6791 6792 if (SOTOTPI(so)->sti_urgmark_mp != NULL) { 6793 freeb(SOTOTPI(so)->sti_urgmark_mp); 6794 SOTOTPI(so)->sti_urgmark_mp = NULL; 6795 } 6796 6797 strclean(vp); 6798 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6799 6800 /* 6801 * Restore the original sockparams. The caller is responsible for 6802 * dropping the ref to the new sp. 6803 */ 6804 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6805 6806 sotpi_info_fini(so); 6807 sotpi_info_destroy(so); 6808 6809 /* This is no longer a STREAMS sockets */ 6810 so->so_not_str = B_TRUE; 6811 } 6812 6813 void 6814 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6815 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6816 socklen_t faddrlen, short opts) 6817 { 6818 sotpi_info_t *sti = SOTOTPI(so); 6819 6820 so_proc_tcapability_ack(so, tcap); 6821 6822 so->so_options |= opts; 6823 6824 /* 6825 * Determine whether the foreign and local address are valid 6826 */ 6827 if (laddrlen != 0) { 6828 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6829 sti->sti_laddr_len = laddrlen; 6830 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6831 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6832 } 6833 6834 if (faddrlen != 0) { 6835 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6836 sti->sti_faddr_len = faddrlen; 6837 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6838 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6839 } 6840 6841 } 6842 6843 /* 6844 * Allocate enough space to cache the local and foreign addresses. 6845 */ 6846 void 6847 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6848 { 6849 sotpi_info_t *sti = SOTOTPI(so); 6850 6851 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6852 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6853 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6854 P2ROUNDUP(maxlen, KMEM_ALIGN); 6855 so->so_max_addr_len = sti->sti_laddr_maxlen; 6856 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6857 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6858 + sti->sti_laddr_maxlen); 6859 6860 if (so->so_family == AF_UNIX) { 6861 /* 6862 * Initialize AF_UNIX related fields. 6863 */ 6864 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6865 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6866 } 6867 } 6868 6869 6870 sotpi_info_t * 6871 sotpi_sototpi(struct sonode *so) 6872 { 6873 sotpi_info_t *sti; 6874 6875 ASSERT(so != NULL); 6876 6877 sti = (sotpi_info_t *)so->so_priv; 6878 6879 ASSERT(sti != NULL); 6880 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6881 6882 return (sti); 6883 } 6884 6885 static int 6886 i_sotpi_info_constructor(sotpi_info_t *sti) 6887 { 6888 sti->sti_magic = SOTPI_INFO_MAGIC; 6889 sti->sti_ack_mp = NULL; 6890 sti->sti_discon_ind_mp = NULL; 6891 sti->sti_ux_bound_vp = NULL; 6892 sti->sti_unbind_mp = NULL; 6893 6894 sti->sti_conn_ind_head = NULL; 6895 sti->sti_conn_ind_tail = NULL; 6896 6897 sti->sti_laddr_sa = NULL; 6898 sti->sti_faddr_sa = NULL; 6899 6900 sti->sti_nl7c_flags = 0; 6901 sti->sti_nl7c_uri = NULL; 6902 sti->sti_nl7c_rcv_mp = NULL; 6903 6904 sti->sti_exdata_mp = NULL; 6905 sti->sti_urgmark_mp = NULL; 6906 6907 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6908 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6909 6910 return (0); 6911 } 6912 6913 static void 6914 i_sotpi_info_destructor(sotpi_info_t *sti) 6915 { 6916 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6917 ASSERT(sti->sti_ack_mp == NULL); 6918 ASSERT(sti->sti_discon_ind_mp == NULL); 6919 ASSERT(sti->sti_ux_bound_vp == NULL); 6920 ASSERT(sti->sti_unbind_mp == NULL); 6921 6922 ASSERT(sti->sti_conn_ind_head == NULL); 6923 ASSERT(sti->sti_conn_ind_tail == NULL); 6924 6925 ASSERT(sti->sti_laddr_sa == NULL); 6926 ASSERT(sti->sti_faddr_sa == NULL); 6927 6928 ASSERT(sti->sti_nl7c_flags == 0); 6929 ASSERT(sti->sti_nl7c_uri == NULL); 6930 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6931 6932 ASSERT(sti->sti_exdata_mp == NULL); 6933 ASSERT(sti->sti_urgmark_mp == NULL); 6934 6935 mutex_destroy(&sti->sti_plumb_lock); 6936 cv_destroy(&sti->sti_ack_cv); 6937 } 6938 6939 /* 6940 * Creates and attaches TPI information to the given sonode 6941 */ 6942 static boolean_t 6943 sotpi_info_create(struct sonode *so, int kmflags) 6944 { 6945 sotpi_info_t *sti; 6946 6947 ASSERT(so->so_priv == NULL); 6948 6949 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6950 return (B_FALSE); 6951 6952 if (i_sotpi_info_constructor(sti) != 0) { 6953 kmem_free(sti, sizeof (*sti)); 6954 return (B_FALSE); 6955 } 6956 6957 so->so_priv = (void *)sti; 6958 return (B_TRUE); 6959 } 6960 6961 /* 6962 * Initializes the TPI information. 6963 */ 6964 static void 6965 sotpi_info_init(struct sonode *so) 6966 { 6967 struct vnode *vp = SOTOV(so); 6968 sotpi_info_t *sti = SOTOTPI(so); 6969 time_t now; 6970 6971 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6972 vp->v_rdev = sti->sti_dev; 6973 6974 sti->sti_orig_sp = NULL; 6975 6976 sti->sti_pushcnt = 0; 6977 6978 now = gethrestime_sec(); 6979 sti->sti_atime = now; 6980 sti->sti_mtime = now; 6981 sti->sti_ctime = now; 6982 6983 sti->sti_eaddr_mp = NULL; 6984 sti->sti_delayed_error = 0; 6985 6986 sti->sti_provinfo = NULL; 6987 6988 sti->sti_oobcnt = 0; 6989 sti->sti_oobsigcnt = 0; 6990 6991 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6992 6993 sti->sti_laddr_sa = 0; 6994 sti->sti_faddr_sa = 0; 6995 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6996 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6997 6998 sti->sti_laddr_valid = 0; 6999 sti->sti_faddr_valid = 0; 7000 sti->sti_faddr_noxlate = 0; 7001 7002 sti->sti_direct = 0; 7003 7004 ASSERT(sti->sti_ack_mp == NULL); 7005 ASSERT(sti->sti_ux_bound_vp == NULL); 7006 ASSERT(sti->sti_unbind_mp == NULL); 7007 7008 ASSERT(sti->sti_conn_ind_head == NULL); 7009 ASSERT(sti->sti_conn_ind_tail == NULL); 7010 7011 /* Initialize the kernel SSL proxy fields */ 7012 sti->sti_kssl_type = KSSL_NO_PROXY; 7013 sti->sti_kssl_ent = NULL; 7014 sti->sti_kssl_ctx = NULL; 7015 } 7016 7017 /* 7018 * Given a sonode, grab the TPI info and free any data. 7019 */ 7020 static void 7021 sotpi_info_fini(struct sonode *so) 7022 { 7023 sotpi_info_t *sti = SOTOTPI(so); 7024 mblk_t *mp; 7025 7026 ASSERT(sti->sti_discon_ind_mp == NULL); 7027 7028 if ((mp = sti->sti_conn_ind_head) != NULL) { 7029 mblk_t *mp1; 7030 7031 while (mp) { 7032 mp1 = mp->b_next; 7033 mp->b_next = NULL; 7034 freemsg(mp); 7035 mp = mp1; 7036 } 7037 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 7038 } 7039 7040 /* 7041 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 7042 * indirect them. It also uses so_count as a validity test. 7043 */ 7044 mutex_enter(&so->so_lock); 7045 7046 if (sti->sti_laddr_sa) { 7047 ASSERT((caddr_t)sti->sti_faddr_sa == 7048 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 7049 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 7050 sti->sti_laddr_valid = 0; 7051 sti->sti_faddr_valid = 0; 7052 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 7053 sti->sti_laddr_sa = NULL; 7054 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 7055 sti->sti_faddr_sa = NULL; 7056 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 7057 } 7058 7059 mutex_exit(&so->so_lock); 7060 7061 if ((mp = sti->sti_eaddr_mp) != NULL) { 7062 freemsg(mp); 7063 sti->sti_eaddr_mp = NULL; 7064 sti->sti_delayed_error = 0; 7065 } 7066 7067 if ((mp = sti->sti_ack_mp) != NULL) { 7068 freemsg(mp); 7069 sti->sti_ack_mp = NULL; 7070 } 7071 7072 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 7073 sti->sti_nl7c_rcv_mp = NULL; 7074 freemsg(mp); 7075 } 7076 sti->sti_nl7c_rcv_rval = 0; 7077 if (sti->sti_nl7c_uri != NULL) { 7078 nl7c_urifree(so); 7079 /* urifree() cleared nl7c_uri */ 7080 } 7081 if (sti->sti_nl7c_flags) { 7082 sti->sti_nl7c_flags = 0; 7083 } 7084 7085 ASSERT(sti->sti_ux_bound_vp == NULL); 7086 if ((mp = sti->sti_unbind_mp) != NULL) { 7087 freemsg(mp); 7088 sti->sti_unbind_mp = NULL; 7089 } 7090 } 7091 7092 /* 7093 * Destroys the TPI information attached to a sonode. 7094 */ 7095 static void 7096 sotpi_info_destroy(struct sonode *so) 7097 { 7098 sotpi_info_t *sti = SOTOTPI(so); 7099 7100 i_sotpi_info_destructor(sti); 7101 kmem_free(sti, sizeof (*sti)); 7102 7103 so->so_priv = NULL; 7104 } 7105 7106 /* 7107 * Create the global sotpi socket module entry. It will never be freed. 7108 */ 7109 smod_info_t * 7110 sotpi_smod_create(void) 7111 { 7112 smod_info_t *smodp; 7113 7114 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 7115 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 7116 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 7117 /* 7118 * Initialize the smod_refcnt to 1 so it will never be freed. 7119 */ 7120 smodp->smod_refcnt = 1; 7121 smodp->smod_uc_version = SOCK_UC_VERSION; 7122 smodp->smod_dc_version = SOCK_DC_VERSION; 7123 smodp->smod_sock_create_func = &sotpi_create; 7124 smodp->smod_sock_destroy_func = &sotpi_destroy; 7125 return (smodp); 7126 } 7127