1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/kmem_impl.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/open.h> 44 #include <sys/user.h> 45 #include <sys/termios.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/suntpi.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <sys/sodirect.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <inet/kssl/ksslapi.h> 85 86 #include <fs/sockfs/sockcommon.h> 87 #include <fs/sockfs/socktpi.h> 88 #include <fs/sockfs/socktpi_impl.h> 89 90 /* 91 * Possible failures when memory can't be allocated. The documented behavior: 92 * 93 * 5.5: 4.X: XNET: 94 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 95 * EINTR 96 * (4.X does not document EINTR but returns it) 97 * bind: ENOSR - ENOBUFS/ENOSR 98 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 99 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 100 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 101 * (4.X getpeername and getsockname do not fail in practice) 102 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 103 * listen: - - ENOBUFS 104 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 107 * EINTR 108 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 109 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 110 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 111 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 112 * 113 * Resolution. When allocation fails: 114 * recv: return EINTR 115 * send: return EINTR 116 * connect, accept: EINTR 117 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 118 * socket, socketpair: ENOBUFS 119 * getpeername, getsockname: sleep 120 * getsockopt, setsockopt: sleep 121 */ 122 123 #ifdef SOCK_TEST 124 /* 125 * Variables that make sockfs do something other than the standard TPI 126 * for the AF_INET transports. 127 * 128 * solisten_tpi_tcp: 129 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 130 * the transport is already bound. This is needed to avoid loosing the 131 * port number should listen() do a T_UNBIND_REQ followed by a 132 * O_T_BIND_REQ. 133 * 134 * soconnect_tpi_udp: 135 * UDP and ICMP can handle a T_CONN_REQ. 136 * This is needed to make the sequence of connect(), getsockname() 137 * return the local IP address used to send packets to the connected to 138 * destination. 139 * 140 * soconnect_tpi_tcp: 141 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 142 * Set this to non-zero to send TPI conformant messages to TCP in this 143 * respect. This is a performance optimization. 144 * 145 * soaccept_tpi_tcp: 146 * TCP can handle a T_CONN_REQ without the acceptor being bound. 147 * This is a performance optimization that has been picked up in XTI. 148 * 149 * soaccept_tpi_multioptions: 150 * When inheriting SOL_SOCKET options from the listener to the accepting 151 * socket send them as a single message for AF_INET{,6}. 152 */ 153 int solisten_tpi_tcp = 0; 154 int soconnect_tpi_udp = 0; 155 int soconnect_tpi_tcp = 0; 156 int soaccept_tpi_tcp = 0; 157 int soaccept_tpi_multioptions = 1; 158 #else /* SOCK_TEST */ 159 #define soconnect_tpi_tcp 0 160 #define soconnect_tpi_udp 0 161 #define solisten_tpi_tcp 0 162 #define soaccept_tpi_tcp 0 163 #define soaccept_tpi_multioptions 1 164 #endif /* SOCK_TEST */ 165 166 #ifdef SOCK_TEST 167 extern int do_useracc; 168 extern clock_t sock_test_timelimit; 169 #endif /* SOCK_TEST */ 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 /* Sockets acting as an in-kernel SSL proxy */ 203 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 204 strsigset_t *, strsigset_t *, strpollset_t *); 205 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 206 strsigset_t *, strsigset_t *, strpollset_t *); 207 208 static int sotpi_unbind(struct sonode *, int); 209 210 extern int sodput(sodirect_t *, mblk_t *); 211 extern void sodwakeup(sodirect_t *); 212 213 /* TPI sockfs sonode operations */ 214 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 215 int); 216 static int sotpi_accept(struct sonode *, int, struct cred *, 217 struct sonode **); 218 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 219 int, struct cred *); 220 static int sotpi_listen(struct sonode *, int, struct cred *); 221 static int sotpi_connect(struct sonode *, const struct sockaddr *, 222 socklen_t, int, int, struct cred *); 223 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 224 struct uio *, struct cred *); 225 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 226 struct uio *, struct cred *); 227 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 228 struct cred *, mblk_t **); 229 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 230 struct uio *, void *, t_uscalar_t, int); 231 static int sodgram_direct(struct sonode *, struct sockaddr *, 232 socklen_t, struct uio *, int); 233 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 234 socklen_t *, boolean_t, struct cred *); 235 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 236 socklen_t *, struct cred *); 237 static int sotpi_shutdown(struct sonode *, int, struct cred *); 238 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 239 socklen_t *, int, struct cred *); 240 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 241 socklen_t, struct cred *); 242 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 243 int32_t *); 244 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 245 struct cred *, int32_t *); 246 static int sotpi_poll(struct sonode *, short, int, short *, 247 struct pollhead **); 248 static int sotpi_close(struct sonode *, int, struct cred *); 249 250 static int i_sotpi_info_constructor(sotpi_info_t *); 251 static void i_sotpi_info_destructor(sotpi_info_t *); 252 253 sonodeops_t sotpi_sonodeops = { 254 sotpi_init, /* sop_init */ 255 sotpi_accept, /* sop_accept */ 256 sotpi_bind, /* sop_bind */ 257 sotpi_listen, /* sop_listen */ 258 sotpi_connect, /* sop_connect */ 259 sotpi_recvmsg, /* sop_recvmsg */ 260 sotpi_sendmsg, /* sop_sendmsg */ 261 sotpi_sendmblk, /* sop_sendmblk */ 262 sotpi_getpeername, /* sop_getpeername */ 263 sotpi_getsockname, /* sop_getsockname */ 264 sotpi_shutdown, /* sop_shutdown */ 265 sotpi_getsockopt, /* sop_getsockopt */ 266 sotpi_setsockopt, /* sop_setsockopt */ 267 sotpi_ioctl, /* sop_ioctl */ 268 sotpi_poll, /* sop_poll */ 269 sotpi_close, /* sop_close */ 270 }; 271 272 /* 273 * Return a TPI socket vnode. 274 * 275 * Note that sockets assume that the driver will clone (either itself 276 * or by using the clone driver) i.e. a socket() call will always 277 * result in a new vnode being created. 278 */ 279 280 /* 281 * Common create code for socket and accept. If tso is set the values 282 * from that node is used instead of issuing a T_INFO_REQ. 283 */ 284 285 /* ARGSUSED */ 286 static struct sonode * 287 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 288 int version, int sflags, int *errorp, cred_t *cr) 289 { 290 struct sonode *so; 291 kmem_cache_t *cp; 292 int sfamily = family; 293 294 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 295 296 if (family == AF_NCA) { 297 /* 298 * The request is for an NCA socket so for NL7C use the 299 * INET domain instead and mark NL7C_AF_NCA below. 300 */ 301 family = AF_INET; 302 /* 303 * NL7C is not supported in the non-global zone, 304 * we enforce this restriction here. 305 */ 306 if (getzoneid() != GLOBAL_ZONEID) { 307 *errorp = ENOTSUP; 308 return (NULL); 309 } 310 } 311 312 /* 313 * to be compatible with old tpi socket implementation ignore 314 * sleep flag (sflags) passed in 315 */ 316 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 317 so = kmem_cache_alloc(cp, KM_SLEEP); 318 if (so == NULL) { 319 *errorp = ENOMEM; 320 return (NULL); 321 } 322 323 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 324 sotpi_info_init(so); 325 326 if (sfamily == AF_NCA) { 327 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 328 } 329 330 if (version == SOV_DEFAULT) 331 version = so_default_version; 332 333 so->so_version = (short)version; 334 *errorp = 0; 335 336 return (so); 337 } 338 339 static void 340 sotpi_destroy(struct sonode *so) 341 { 342 kmem_cache_t *cp; 343 struct sockparams *origsp; 344 345 /* 346 * If there is a new dealloc function (ie. smod_destroy_func), 347 * then it should check the correctness of the ops. 348 */ 349 350 ASSERT(so->so_ops == &sotpi_sonodeops); 351 352 origsp = SOTOTPI(so)->sti_orig_sp; 353 354 sotpi_info_fini(so); 355 356 if (so->so_state & SS_FALLBACK_COMP) { 357 /* 358 * A fallback happend, which means that a sotpi_info_t struct 359 * was allocated (as opposed to being allocated from the TPI 360 * sonode cache. Therefore we explicitly free the struct 361 * here. 362 */ 363 sotpi_info_destroy(so); 364 ASSERT(origsp != NULL); 365 366 origsp->sp_smod_info->smod_sock_destroy_func(so); 367 SOCKPARAMS_DEC_REF(origsp); 368 } else { 369 sonode_fini(so); 370 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 371 socktpi_cache; 372 kmem_cache_free(cp, so); 373 } 374 } 375 376 /* ARGSUSED1 */ 377 int 378 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 379 { 380 major_t maj; 381 dev_t newdev; 382 struct vnode *vp; 383 int error = 0; 384 struct stdata *stp; 385 386 sotpi_info_t *sti = SOTOTPI(so); 387 388 dprint(1, ("sotpi_init()\n")); 389 390 /* 391 * over write the sleep flag passed in but that is ok 392 * as tpi socket does not honor sleep flag. 393 */ 394 flags |= FREAD|FWRITE; 395 396 /* 397 * Record in so_flag that it is a clone. 398 */ 399 if (getmajor(sti->sti_dev) == clone_major) 400 so->so_flag |= SOCLONE; 401 402 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 403 (so->so_family == AF_INET || so->so_family == AF_INET6) && 404 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 405 so->so_protocol == IPPROTO_IP)) { 406 /* Tell tcp or udp that it's talking to sockets */ 407 flags |= SO_SOCKSTR; 408 409 /* 410 * Here we indicate to socktpi_open() our attempt to 411 * make direct calls between sockfs and transport. 412 * The final decision is left to socktpi_open(). 413 */ 414 sti->sti_direct = 1; 415 416 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 417 if (so->so_type == SOCK_STREAM && tso != NULL) { 418 if (SOTOTPI(tso)->sti_direct) { 419 /* 420 * Inherit sti_direct from listener and pass 421 * SO_ACCEPTOR open flag to tcp, indicating 422 * that this is an accept fast-path instance. 423 */ 424 flags |= SO_ACCEPTOR; 425 } else { 426 /* 427 * sti_direct is not set on listener, meaning 428 * that the listener has been converted from 429 * a socket to a stream. Ensure that the 430 * acceptor inherits these settings. 431 */ 432 sti->sti_direct = 0; 433 flags &= ~SO_SOCKSTR; 434 } 435 } 436 } 437 438 /* 439 * Tell local transport that it is talking to sockets. 440 */ 441 if (so->so_family == AF_UNIX) { 442 flags |= SO_SOCKSTR; 443 } 444 445 vp = SOTOV(so); 446 newdev = vp->v_rdev; 447 maj = getmajor(newdev); 448 ASSERT(STREAMSTAB(maj)); 449 450 error = stropen(vp, &newdev, flags, cr); 451 452 stp = vp->v_stream; 453 if (error == 0) { 454 if (so->so_flag & SOCLONE) 455 ASSERT(newdev != vp->v_rdev); 456 mutex_enter(&so->so_lock); 457 sti->sti_dev = newdev; 458 vp->v_rdev = newdev; 459 mutex_exit(&so->so_lock); 460 461 if (stp->sd_flag & STRISTTY) { 462 /* 463 * this is a post SVR4 tty driver - a socket can not 464 * be a controlling terminal. Fail the open. 465 */ 466 (void) sotpi_close(so, flags, cr); 467 return (ENOTTY); /* XXX */ 468 } 469 470 ASSERT(stp->sd_wrq != NULL); 471 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 472 473 /* 474 * If caller is interested in doing direct function call 475 * interface to/from transport module, probe the module 476 * directly beneath the streamhead to see if it qualifies. 477 * 478 * We turn off the direct interface when qualifications fail. 479 * In the acceptor case, we simply turn off the sti_direct 480 * flag on the socket. We do the fallback after the accept 481 * has completed, before the new socket is returned to the 482 * application. 483 */ 484 if (sti->sti_direct) { 485 queue_t *tq = stp->sd_wrq->q_next; 486 487 /* 488 * sti_direct is currently supported and tested 489 * only for tcp/udp; this is the main reason to 490 * have the following assertions. 491 */ 492 ASSERT(so->so_family == AF_INET || 493 so->so_family == AF_INET6); 494 ASSERT(so->so_protocol == IPPROTO_UDP || 495 so->so_protocol == IPPROTO_TCP || 496 so->so_protocol == IPPROTO_IP); 497 ASSERT(so->so_type == SOCK_DGRAM || 498 so->so_type == SOCK_STREAM); 499 500 /* 501 * Abort direct call interface if the module directly 502 * underneath the stream head is not defined with the 503 * _D_DIRECT flag. This could happen in the tcp or 504 * udp case, when some other module is autopushed 505 * above it, or for some reasons the expected module 506 * isn't purely D_MP (which is the main requirement). 507 * 508 * Else, SS_DIRECT is valid. If the read-side Q has 509 * _QSODIRECT set then and uioasync is enabled then 510 * set SS_SODIRECT to enable sodirect. 511 */ 512 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 513 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 514 int rval; 515 516 /* Continue on without direct calls */ 517 sti->sti_direct = 0; 518 519 /* 520 * Cannot issue ioctl on fallback socket since 521 * there is no conn associated with the queue. 522 * The fallback downcall will notify the proto 523 * of the change. 524 */ 525 if (!(flags & SO_ACCEPTOR) && 526 !(flags & SO_FALLBACK)) { 527 if ((error = strioctl(vp, 528 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 529 cr, &rval)) != 0) { 530 (void) sotpi_close(so, flags, 531 cr); 532 return (error); 533 } 534 } 535 } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) && 536 uioasync.enabled) { 537 /* Enable sodirect */ 538 so->so_state |= SS_SODIRECT; 539 } 540 } 541 542 if (flags & SO_FALLBACK) { 543 /* 544 * The stream created does not have a conn. 545 * do stream set up after conn has been assigned 546 */ 547 return (error); 548 } 549 if (error = so_strinit(so, tso)) { 550 (void) sotpi_close(so, flags, cr); 551 return (error); 552 } 553 554 /* Wildcard */ 555 if (so->so_protocol != so->so_sockparams->sp_protocol) { 556 int protocol = so->so_protocol; 557 /* 558 * Issue SO_PROTOTYPE setsockopt. 559 */ 560 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 561 &protocol, (t_uscalar_t)sizeof (protocol), cr); 562 if (error != 0) { 563 (void) sotpi_close(so, flags, cr); 564 /* 565 * Setsockopt often fails with ENOPROTOOPT but 566 * socket() should fail with 567 * EPROTONOSUPPORT/EPROTOTYPE. 568 */ 569 return (EPROTONOSUPPORT); 570 } 571 } 572 573 } else { 574 /* 575 * While the same socket can not be reopened (unlike specfs) 576 * the stream head sets STREOPENFAIL when the autopush fails. 577 */ 578 if ((stp != NULL) && 579 (stp->sd_flag & STREOPENFAIL)) { 580 /* 581 * Open failed part way through. 582 */ 583 mutex_enter(&stp->sd_lock); 584 stp->sd_flag &= ~STREOPENFAIL; 585 mutex_exit(&stp->sd_lock); 586 (void) sotpi_close(so, flags, cr); 587 return (error); 588 /*NOTREACHED*/ 589 } 590 ASSERT(stp == NULL); 591 } 592 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 593 "sockfs open:maj %d vp %p so %p error %d", 594 maj, vp, so, error); 595 return (error); 596 } 597 598 /* 599 * Bind the socket to an unspecified address in sockfs only. 600 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 601 * required in all cases. 602 */ 603 static void 604 so_automatic_bind(struct sonode *so) 605 { 606 sotpi_info_t *sti = SOTOTPI(so); 607 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 608 609 ASSERT(MUTEX_HELD(&so->so_lock)); 610 ASSERT(!(so->so_state & SS_ISBOUND)); 611 ASSERT(sti->sti_unbind_mp); 612 613 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 614 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 615 sti->sti_laddr_sa->sa_family = so->so_family; 616 so->so_state |= SS_ISBOUND; 617 } 618 619 620 /* 621 * bind the socket. 622 * 623 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 624 * are passed in we allow rebinding. Note that for backwards compatibility 625 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 626 * Thus the rebinding code is currently not executed. 627 * 628 * The constraints for rebinding are: 629 * - it is a SOCK_DGRAM, or 630 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 631 * and no listen() has been done. 632 * This rebinding code was added based on some language in the XNET book 633 * about not returning EINVAL it the protocol allows rebinding. However, 634 * this language is not present in the Posix socket draft. Thus maybe the 635 * rebinding logic should be deleted from the source. 636 * 637 * A null "name" can be used to unbind the socket if: 638 * - it is a SOCK_DGRAM, or 639 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 640 * and no listen() has been done. 641 */ 642 /* ARGSUSED */ 643 static int 644 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 645 socklen_t namelen, int backlog, int flags, struct cred *cr) 646 { 647 struct T_bind_req bind_req; 648 struct T_bind_ack *bind_ack; 649 int error = 0; 650 mblk_t *mp; 651 void *addr; 652 t_uscalar_t addrlen; 653 int unbind_on_err = 1; 654 boolean_t clear_acceptconn_on_err = B_FALSE; 655 boolean_t restore_backlog_on_err = B_FALSE; 656 int save_so_backlog; 657 t_scalar_t PRIM_type = O_T_BIND_REQ; 658 boolean_t tcp_udp_xport; 659 void *nl7c = NULL; 660 sotpi_info_t *sti = SOTOTPI(so); 661 662 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 663 (void *)so, (void *)name, namelen, backlog, flags, 664 pr_state(so->so_state, so->so_mode))); 665 666 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 667 668 if (!(flags & _SOBIND_LOCK_HELD)) { 669 mutex_enter(&so->so_lock); 670 so_lock_single(so); /* Set SOLOCKED */ 671 } else { 672 ASSERT(MUTEX_HELD(&so->so_lock)); 673 ASSERT(so->so_flag & SOLOCKED); 674 } 675 676 /* 677 * Make sure that there is a preallocated unbind_req message 678 * before binding. This message allocated when the socket is 679 * created but it might be have been consumed. 680 */ 681 if (sti->sti_unbind_mp == NULL) { 682 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 683 /* NOTE: holding so_lock while sleeping */ 684 sti->sti_unbind_mp = 685 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 686 cr); 687 } 688 689 if (flags & _SOBIND_REBIND) { 690 /* 691 * Called from solisten after doing an sotpi_unbind() or 692 * potentially without the unbind (latter for AF_INET{,6}). 693 */ 694 ASSERT(name == NULL && namelen == 0); 695 696 if (so->so_family == AF_UNIX) { 697 ASSERT(sti->sti_ux_bound_vp); 698 addr = &sti->sti_ux_laddr; 699 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 700 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 701 "addr 0x%p, vp %p\n", 702 addrlen, 703 (void *)((struct so_ux_addr *)addr)->soua_vp, 704 (void *)sti->sti_ux_bound_vp)); 705 } else { 706 addr = sti->sti_laddr_sa; 707 addrlen = (t_uscalar_t)sti->sti_laddr_len; 708 } 709 } else if (flags & _SOBIND_UNSPEC) { 710 ASSERT(name == NULL && namelen == 0); 711 712 /* 713 * The caller checked SS_ISBOUND but not necessarily 714 * under so_lock 715 */ 716 if (so->so_state & SS_ISBOUND) { 717 /* No error */ 718 goto done; 719 } 720 721 /* Set an initial local address */ 722 switch (so->so_family) { 723 case AF_UNIX: 724 /* 725 * Use an address with same size as struct sockaddr 726 * just like BSD. 727 */ 728 sti->sti_laddr_len = 729 (socklen_t)sizeof (struct sockaddr); 730 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 731 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 732 sti->sti_laddr_sa->sa_family = so->so_family; 733 734 /* 735 * Pass down an address with the implicit bind 736 * magic number and the rest all zeros. 737 * The transport will return a unique address. 738 */ 739 sti->sti_ux_laddr.soua_vp = NULL; 740 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 741 addr = &sti->sti_ux_laddr; 742 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 743 break; 744 745 case AF_INET: 746 case AF_INET6: 747 /* 748 * An unspecified bind in TPI has a NULL address. 749 * Set the address in sockfs to have the sa_family. 750 */ 751 sti->sti_laddr_len = (so->so_family == AF_INET) ? 752 (socklen_t)sizeof (sin_t) : 753 (socklen_t)sizeof (sin6_t); 754 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 755 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 756 sti->sti_laddr_sa->sa_family = so->so_family; 757 addr = NULL; 758 addrlen = 0; 759 break; 760 761 default: 762 /* 763 * An unspecified bind in TPI has a NULL address. 764 * Set the address in sockfs to be zero length. 765 * 766 * Can not assume there is a sa_family for all 767 * protocol families. For example, AF_X25 does not 768 * have a family field. 769 */ 770 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 771 sti->sti_laddr_len = 0; /* XXX correct? */ 772 addr = NULL; 773 addrlen = 0; 774 break; 775 } 776 777 } else { 778 if (so->so_state & SS_ISBOUND) { 779 /* 780 * If it is ok to rebind the socket, first unbind 781 * with the transport. A rebind to the NULL address 782 * is interpreted as an unbind. 783 * Note that a bind to NULL in BSD does unbind the 784 * socket but it fails with EINVAL. 785 * Note that regular sockets set SOV_SOCKBSD i.e. 786 * _SOBIND_SOCKBSD gets set here hence no type of 787 * socket does currently allow rebinding. 788 * 789 * If the name is NULL just do an unbind. 790 */ 791 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 792 name != NULL) { 793 error = EINVAL; 794 unbind_on_err = 0; 795 eprintsoline(so, error); 796 goto done; 797 } 798 if ((so->so_mode & SM_CONNREQUIRED) && 799 (so->so_state & SS_CANTREBIND)) { 800 error = EINVAL; 801 unbind_on_err = 0; 802 eprintsoline(so, error); 803 goto done; 804 } 805 error = sotpi_unbind(so, 0); 806 if (error) { 807 eprintsoline(so, error); 808 goto done; 809 } 810 ASSERT(!(so->so_state & SS_ISBOUND)); 811 if (name == NULL) { 812 so->so_state &= 813 ~(SS_ISCONNECTED|SS_ISCONNECTING); 814 goto done; 815 } 816 } 817 818 /* X/Open requires this check */ 819 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 820 if (xnet_check_print) { 821 printf("sockfs: X/Open bind state check " 822 "caused EINVAL\n"); 823 } 824 error = EINVAL; 825 goto done; 826 } 827 828 switch (so->so_family) { 829 case AF_UNIX: 830 /* 831 * All AF_UNIX addresses are nul terminated 832 * when copied (copyin_name) in so the minimum 833 * length is 3 bytes. 834 */ 835 if (name == NULL || 836 (ssize_t)namelen <= sizeof (short) + 1) { 837 error = EISDIR; 838 eprintsoline(so, error); 839 goto done; 840 } 841 /* 842 * Verify so_family matches the bound family. 843 * BSD does not check this for AF_UNIX resulting 844 * in funny mknods. 845 */ 846 if (name->sa_family != so->so_family) { 847 error = EAFNOSUPPORT; 848 goto done; 849 } 850 break; 851 case AF_INET: 852 if (name == NULL) { 853 error = EINVAL; 854 eprintsoline(so, error); 855 goto done; 856 } 857 if ((size_t)namelen != sizeof (sin_t)) { 858 error = name->sa_family != so->so_family ? 859 EAFNOSUPPORT : EINVAL; 860 eprintsoline(so, error); 861 goto done; 862 } 863 if ((flags & _SOBIND_XPG4_2) && 864 (name->sa_family != so->so_family)) { 865 /* 866 * This check has to be made for X/Open 867 * sockets however application failures have 868 * been observed when it is applied to 869 * all sockets. 870 */ 871 error = EAFNOSUPPORT; 872 eprintsoline(so, error); 873 goto done; 874 } 875 /* 876 * Force a zero sa_family to match so_family. 877 * 878 * Some programs like inetd(1M) don't set the 879 * family field. Other programs leave 880 * sin_family set to garbage - SunOS 4.X does 881 * not check the family field on a bind. 882 * We use the family field that 883 * was passed in to the socket() call. 884 */ 885 name->sa_family = so->so_family; 886 break; 887 888 case AF_INET6: { 889 #ifdef DEBUG 890 sin6_t *sin6 = (sin6_t *)name; 891 #endif /* DEBUG */ 892 893 if (name == NULL) { 894 error = EINVAL; 895 eprintsoline(so, error); 896 goto done; 897 } 898 if ((size_t)namelen != sizeof (sin6_t)) { 899 error = name->sa_family != so->so_family ? 900 EAFNOSUPPORT : EINVAL; 901 eprintsoline(so, error); 902 goto done; 903 } 904 if (name->sa_family != so->so_family) { 905 /* 906 * With IPv6 we require the family to match 907 * unlike in IPv4. 908 */ 909 error = EAFNOSUPPORT; 910 eprintsoline(so, error); 911 goto done; 912 } 913 #ifdef DEBUG 914 /* 915 * Verify that apps don't forget to clear 916 * sin6_scope_id etc 917 */ 918 if (sin6->sin6_scope_id != 0 && 919 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 920 zcmn_err(getzoneid(), CE_WARN, 921 "bind with uninitialized sin6_scope_id " 922 "(%d) on socket. Pid = %d\n", 923 (int)sin6->sin6_scope_id, 924 (int)curproc->p_pid); 925 } 926 if (sin6->__sin6_src_id != 0) { 927 zcmn_err(getzoneid(), CE_WARN, 928 "bind with uninitialized __sin6_src_id " 929 "(%d) on socket. Pid = %d\n", 930 (int)sin6->__sin6_src_id, 931 (int)curproc->p_pid); 932 } 933 #endif /* DEBUG */ 934 break; 935 } 936 default: 937 /* 938 * Don't do any length or sa_family check to allow 939 * non-sockaddr style addresses. 940 */ 941 if (name == NULL) { 942 error = EINVAL; 943 eprintsoline(so, error); 944 goto done; 945 } 946 break; 947 } 948 949 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 950 error = ENAMETOOLONG; 951 eprintsoline(so, error); 952 goto done; 953 } 954 /* 955 * Save local address. 956 */ 957 sti->sti_laddr_len = (socklen_t)namelen; 958 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 959 bcopy(name, sti->sti_laddr_sa, namelen); 960 961 addr = sti->sti_laddr_sa; 962 addrlen = (t_uscalar_t)sti->sti_laddr_len; 963 switch (so->so_family) { 964 case AF_INET6: 965 case AF_INET: 966 break; 967 case AF_UNIX: { 968 struct sockaddr_un *soun = 969 (struct sockaddr_un *)sti->sti_laddr_sa; 970 struct vnode *vp, *rvp; 971 struct vattr vattr; 972 973 ASSERT(sti->sti_ux_bound_vp == NULL); 974 /* 975 * Create vnode for the specified path name. 976 * Keep vnode held with a reference in sti_ux_bound_vp. 977 * Use the vnode pointer as the address used in the 978 * bind with the transport. 979 * 980 * Use the same mode as in BSD. In particular this does 981 * not observe the umask. 982 */ 983 /* MAXPATHLEN + soun_family + nul termination */ 984 if (sti->sti_laddr_len > 985 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 986 error = ENAMETOOLONG; 987 eprintsoline(so, error); 988 goto done; 989 } 990 vattr.va_type = VSOCK; 991 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 992 vattr.va_mask = AT_TYPE|AT_MODE; 993 /* NOTE: holding so_lock */ 994 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 995 EXCL, 0, &vp, CRMKNOD, 0, 0); 996 if (error) { 997 if (error == EEXIST) 998 error = EADDRINUSE; 999 eprintsoline(so, error); 1000 goto done; 1001 } 1002 /* 1003 * Establish pointer from the underlying filesystem 1004 * vnode to the socket node. 1005 * sti_ux_bound_vp and v_stream->sd_vnode form the 1006 * cross-linkage between the underlying filesystem 1007 * node and the socket node. 1008 */ 1009 1010 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 1011 VN_HOLD(rvp); 1012 VN_RELE(vp); 1013 vp = rvp; 1014 } 1015 1016 ASSERT(SOTOV(so)->v_stream); 1017 mutex_enter(&vp->v_lock); 1018 vp->v_stream = SOTOV(so)->v_stream; 1019 sti->sti_ux_bound_vp = vp; 1020 mutex_exit(&vp->v_lock); 1021 1022 /* 1023 * Use the vnode pointer value as a unique address 1024 * (together with the magic number to avoid conflicts 1025 * with implicit binds) in the transport provider. 1026 */ 1027 sti->sti_ux_laddr.soua_vp = 1028 (void *)sti->sti_ux_bound_vp; 1029 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1030 addr = &sti->sti_ux_laddr; 1031 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1032 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1033 addrlen, 1034 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1035 break; 1036 } 1037 } /* end switch (so->so_family) */ 1038 } 1039 1040 /* 1041 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1042 * the transport can start passing up T_CONN_IND messages 1043 * as soon as it receives the bind req and strsock_proto() 1044 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1045 */ 1046 if (flags & _SOBIND_LISTEN) { 1047 if ((so->so_state & SS_ACCEPTCONN) == 0) 1048 clear_acceptconn_on_err = B_TRUE; 1049 save_so_backlog = so->so_backlog; 1050 restore_backlog_on_err = B_TRUE; 1051 so->so_state |= SS_ACCEPTCONN; 1052 so->so_backlog = backlog; 1053 } 1054 1055 /* 1056 * If NL7C addr(s) have been configured check for addr/port match, 1057 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1058 * 1059 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1060 * family sockets only. If match mark as such. 1061 */ 1062 if (nl7c_enabled && ((addr != NULL && 1063 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1064 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1065 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1066 /* 1067 * NL7C is not supported in non-global zones, 1068 * we enforce this restriction here. 1069 */ 1070 if (so->so_zoneid == GLOBAL_ZONEID) { 1071 /* An NL7C socket, mark it */ 1072 sti->sti_nl7c_flags |= NL7C_ENABLED; 1073 if (nl7c == NULL) { 1074 /* 1075 * Was an AF_NCA bind() so add it to the 1076 * addr list for reporting purposes. 1077 */ 1078 nl7c = nl7c_add_addr(addr, addrlen); 1079 } 1080 } else 1081 nl7c = NULL; 1082 } 1083 1084 /* 1085 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1086 * for other transports we will send in a O_T_BIND_REQ. 1087 */ 1088 if (tcp_udp_xport && 1089 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1090 PRIM_type = T_BIND_REQ; 1091 1092 bind_req.PRIM_type = PRIM_type; 1093 bind_req.ADDR_length = addrlen; 1094 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1095 bind_req.CONIND_number = backlog; 1096 /* NOTE: holding so_lock while sleeping */ 1097 mp = soallocproto2(&bind_req, sizeof (bind_req), 1098 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1099 sti->sti_laddr_valid = 0; 1100 1101 /* Done using sti_laddr_sa - can drop the lock */ 1102 mutex_exit(&so->so_lock); 1103 1104 /* 1105 * Intercept the bind_req message here to check if this <address/port> 1106 * was configured as an SSL proxy server, or if another endpoint was 1107 * already configured to act as a proxy for us. 1108 * 1109 * Note, only if NL7C not enabled for this socket. 1110 */ 1111 if (nl7c == NULL && 1112 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1113 so->so_type == SOCK_STREAM) { 1114 1115 if (sti->sti_kssl_ent != NULL) { 1116 kssl_release_ent(sti->sti_kssl_ent, so, 1117 sti->sti_kssl_type); 1118 sti->sti_kssl_ent = NULL; 1119 } 1120 1121 sti->sti_kssl_type = kssl_check_proxy(mp, so, 1122 &sti->sti_kssl_ent); 1123 switch (sti->sti_kssl_type) { 1124 case KSSL_NO_PROXY: 1125 break; 1126 1127 case KSSL_HAS_PROXY: 1128 mutex_enter(&so->so_lock); 1129 goto skip_transport; 1130 1131 case KSSL_IS_PROXY: 1132 break; 1133 } 1134 } 1135 1136 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1137 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1138 if (error) { 1139 eprintsoline(so, error); 1140 mutex_enter(&so->so_lock); 1141 goto done; 1142 } 1143 1144 mutex_enter(&so->so_lock); 1145 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1146 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1147 if (error) { 1148 eprintsoline(so, error); 1149 goto done; 1150 } 1151 skip_transport: 1152 ASSERT(mp); 1153 /* 1154 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1155 * strsock_proto while the lock was dropped above, the bind 1156 * is allowed to complete. 1157 */ 1158 1159 /* Mark as bound. This will be undone if we detect errors below. */ 1160 if (flags & _SOBIND_NOXLATE) { 1161 ASSERT(so->so_family == AF_UNIX); 1162 sti->sti_faddr_noxlate = 1; 1163 } 1164 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1165 so->so_state |= SS_ISBOUND; 1166 ASSERT(sti->sti_unbind_mp); 1167 1168 /* note that we've already set SS_ACCEPTCONN above */ 1169 1170 /* 1171 * Recompute addrlen - an unspecied bind sent down an 1172 * address of length zero but we expect the appropriate length 1173 * in return. 1174 */ 1175 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1176 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1177 1178 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1179 /* 1180 * The alignment restriction is really too strict but 1181 * we want enough alignment to inspect the fields of 1182 * a sockaddr_in. 1183 */ 1184 addr = sogetoff(mp, bind_ack->ADDR_offset, 1185 bind_ack->ADDR_length, 1186 __TPI_ALIGN_SIZE); 1187 if (addr == NULL) { 1188 freemsg(mp); 1189 error = EPROTO; 1190 eprintsoline(so, error); 1191 goto done; 1192 } 1193 if (!(flags & _SOBIND_UNSPEC)) { 1194 /* 1195 * Verify that the transport didn't return something we 1196 * did not want e.g. an address other than what we asked for. 1197 * 1198 * NOTE: These checks would go away if/when we switch to 1199 * using the new TPI (in which the transport would fail 1200 * the request instead of assigning a different address). 1201 * 1202 * NOTE2: For protocols that we don't know (i.e. any 1203 * other than AF_INET6, AF_INET and AF_UNIX), we 1204 * cannot know if the transport should be expected to 1205 * return the same address as that requested. 1206 * 1207 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1208 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1209 * 1210 * For example, in the case of netatalk it may be 1211 * inappropriate for the transport to return the 1212 * requested address (as it may have allocated a local 1213 * port number in behaviour similar to that of an 1214 * AF_INET bind request with a port number of zero). 1215 * 1216 * Given the definition of O_T_BIND_REQ, where the 1217 * transport may bind to an address other than the 1218 * requested address, it's not possible to determine 1219 * whether a returned address that differs from the 1220 * requested address is a reason to fail (because the 1221 * requested address was not available) or succeed 1222 * (because the transport allocated an appropriate 1223 * address and/or port). 1224 * 1225 * sockfs currently requires that the transport return 1226 * the requested address in the T_BIND_ACK, unless 1227 * there is code here to allow for any discrepancy. 1228 * Such code exists for AF_INET and AF_INET6. 1229 * 1230 * Netatalk chooses to return the requested address 1231 * rather than the (correct) allocated address. This 1232 * means that netatalk violates the TPI specification 1233 * (and would not function correctly if used from a 1234 * TLI application), but it does mean that it works 1235 * with sockfs. 1236 * 1237 * As noted above, using the newer XTI bind primitive 1238 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1239 * allow sockfs to be more sure about whether or not 1240 * the bind request had succeeded (as transports are 1241 * not permitted to bind to a different address than 1242 * that requested - they must return failure). 1243 * Unfortunately, support for T_BIND_REQ may not be 1244 * present in all transport implementations (netatalk, 1245 * for example, doesn't have it), making the 1246 * transition difficult. 1247 */ 1248 if (bind_ack->ADDR_length != addrlen) { 1249 /* Assumes that the requested address was in use */ 1250 freemsg(mp); 1251 error = EADDRINUSE; 1252 eprintsoline(so, error); 1253 goto done; 1254 } 1255 1256 switch (so->so_family) { 1257 case AF_INET6: 1258 case AF_INET: { 1259 sin_t *rname, *aname; 1260 1261 rname = (sin_t *)addr; 1262 aname = (sin_t *)sti->sti_laddr_sa; 1263 1264 /* 1265 * Take advantage of the alignment 1266 * of sin_port and sin6_port which fall 1267 * in the same place in their data structures. 1268 * Just use sin_port for either address family. 1269 * 1270 * This may become a problem if (heaven forbid) 1271 * there's a separate ipv6port_reserved... :-P 1272 * 1273 * Binding to port 0 has the semantics of letting 1274 * the transport bind to any port. 1275 * 1276 * If the transport is TCP or UDP since we had sent 1277 * a T_BIND_REQ we would not get a port other than 1278 * what we asked for. 1279 */ 1280 if (tcp_udp_xport) { 1281 /* 1282 * Pick up the new port number if we bound to 1283 * port 0. 1284 */ 1285 if (aname->sin_port == 0) 1286 aname->sin_port = rname->sin_port; 1287 sti->sti_laddr_valid = 1; 1288 break; 1289 } 1290 if (aname->sin_port != 0 && 1291 aname->sin_port != rname->sin_port) { 1292 freemsg(mp); 1293 error = EADDRINUSE; 1294 eprintsoline(so, error); 1295 goto done; 1296 } 1297 /* 1298 * Pick up the new port number if we bound to port 0. 1299 */ 1300 aname->sin_port = rname->sin_port; 1301 1302 /* 1303 * Unfortunately, addresses aren't _quite_ the same. 1304 */ 1305 if (so->so_family == AF_INET) { 1306 if (aname->sin_addr.s_addr != 1307 rname->sin_addr.s_addr) { 1308 freemsg(mp); 1309 error = EADDRNOTAVAIL; 1310 eprintsoline(so, error); 1311 goto done; 1312 } 1313 } else { 1314 sin6_t *rname6 = (sin6_t *)rname; 1315 sin6_t *aname6 = (sin6_t *)aname; 1316 1317 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1318 &rname6->sin6_addr)) { 1319 freemsg(mp); 1320 error = EADDRNOTAVAIL; 1321 eprintsoline(so, error); 1322 goto done; 1323 } 1324 } 1325 break; 1326 } 1327 case AF_UNIX: 1328 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1329 freemsg(mp); 1330 error = EADDRINUSE; 1331 eprintsoline(so, error); 1332 eprintso(so, 1333 ("addrlen %d, addr 0x%x, vp %p\n", 1334 addrlen, *((int *)addr), 1335 (void *)sti->sti_ux_bound_vp)); 1336 goto done; 1337 } 1338 sti->sti_laddr_valid = 1; 1339 break; 1340 default: 1341 /* 1342 * NOTE: This assumes that addresses can be 1343 * byte-compared for equivalence. 1344 */ 1345 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1346 freemsg(mp); 1347 error = EADDRINUSE; 1348 eprintsoline(so, error); 1349 goto done; 1350 } 1351 /* 1352 * Don't mark sti_laddr_valid, as we cannot be 1353 * sure that the returned address is the real 1354 * bound address when talking to an unknown 1355 * transport. 1356 */ 1357 break; 1358 } 1359 } else { 1360 /* 1361 * Save for returned address for getsockname. 1362 * Needed for unspecific bind unless transport supports 1363 * the TI_GETMYNAME ioctl. 1364 * Do this for AF_INET{,6} even though they do, as 1365 * caching info here is much better performance than 1366 * a TPI/STREAMS trip to the transport for getsockname. 1367 * Any which can't for some reason _must_ _not_ set 1368 * sti_laddr_valid here for the caching version of 1369 * getsockname to not break; 1370 */ 1371 switch (so->so_family) { 1372 case AF_UNIX: 1373 /* 1374 * Record the address bound with the transport 1375 * for use by socketpair. 1376 */ 1377 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1378 sti->sti_laddr_valid = 1; 1379 break; 1380 case AF_INET: 1381 case AF_INET6: 1382 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1383 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1384 sti->sti_laddr_valid = 1; 1385 break; 1386 default: 1387 /* 1388 * Don't mark sti_laddr_valid, as we cannot be 1389 * sure that the returned address is the real 1390 * bound address when talking to an unknown 1391 * transport. 1392 */ 1393 break; 1394 } 1395 } 1396 1397 if (nl7c != NULL) { 1398 /* Register listen()er sonode pointer with NL7C */ 1399 nl7c_listener_addr(nl7c, so); 1400 } 1401 1402 freemsg(mp); 1403 1404 done: 1405 if (error) { 1406 /* reset state & backlog to values held on entry */ 1407 if (clear_acceptconn_on_err == B_TRUE) 1408 so->so_state &= ~SS_ACCEPTCONN; 1409 if (restore_backlog_on_err == B_TRUE) 1410 so->so_backlog = save_so_backlog; 1411 1412 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1413 int err; 1414 1415 err = sotpi_unbind(so, 0); 1416 /* LINTED - statement has no consequent: if */ 1417 if (err) { 1418 eprintsoline(so, error); 1419 } else { 1420 ASSERT(!(so->so_state & SS_ISBOUND)); 1421 } 1422 } 1423 } 1424 if (!(flags & _SOBIND_LOCK_HELD)) { 1425 so_unlock_single(so, SOLOCKED); 1426 mutex_exit(&so->so_lock); 1427 } else { 1428 ASSERT(MUTEX_HELD(&so->so_lock)); 1429 ASSERT(so->so_flag & SOLOCKED); 1430 } 1431 return (error); 1432 } 1433 1434 /* bind the socket */ 1435 static int 1436 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1437 int flags, struct cred *cr) 1438 { 1439 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1440 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1441 1442 flags &= ~_SOBIND_SOCKETPAIR; 1443 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1444 } 1445 1446 /* 1447 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1448 * address, or when listen needs to unbind and bind. 1449 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1450 * so that a sobind can pick them up. 1451 */ 1452 static int 1453 sotpi_unbind(struct sonode *so, int flags) 1454 { 1455 struct T_unbind_req unbind_req; 1456 int error = 0; 1457 mblk_t *mp; 1458 sotpi_info_t *sti = SOTOTPI(so); 1459 1460 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1461 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1462 1463 ASSERT(MUTEX_HELD(&so->so_lock)); 1464 ASSERT(so->so_flag & SOLOCKED); 1465 1466 if (!(so->so_state & SS_ISBOUND)) { 1467 error = EINVAL; 1468 eprintsoline(so, error); 1469 goto done; 1470 } 1471 1472 mutex_exit(&so->so_lock); 1473 1474 /* 1475 * Flush the read and write side (except stream head read queue) 1476 * and send down T_UNBIND_REQ. 1477 */ 1478 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1479 1480 unbind_req.PRIM_type = T_UNBIND_REQ; 1481 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1482 0, _ALLOC_SLEEP, CRED()); 1483 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1484 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1485 mutex_enter(&so->so_lock); 1486 if (error) { 1487 eprintsoline(so, error); 1488 goto done; 1489 } 1490 1491 error = sowaitokack(so, T_UNBIND_REQ); 1492 if (error) { 1493 eprintsoline(so, error); 1494 goto done; 1495 } 1496 1497 /* 1498 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1499 * strsock_proto while the lock was dropped above, the unbind 1500 * is allowed to complete. 1501 */ 1502 if (!(flags & _SOUNBIND_REBIND)) { 1503 /* 1504 * Clear out bound address. 1505 */ 1506 vnode_t *vp; 1507 1508 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1509 1510 /* Undo any SSL proxy setup */ 1511 if ((so->so_family == AF_INET || 1512 so->so_family == AF_INET6) && 1513 (so->so_type == SOCK_STREAM) && 1514 (sti->sti_kssl_ent != NULL)) { 1515 kssl_release_ent(sti->sti_kssl_ent, so, 1516 sti->sti_kssl_type); 1517 sti->sti_kssl_ent = NULL; 1518 sti->sti_kssl_type = KSSL_NO_PROXY; 1519 } 1520 sti->sti_ux_bound_vp = NULL; 1521 vn_rele_stream(vp); 1522 } 1523 /* Clear out address */ 1524 sti->sti_laddr_len = 0; 1525 } 1526 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1527 sti->sti_laddr_valid = 0; 1528 1529 done: 1530 1531 /* If the caller held the lock don't release it here */ 1532 ASSERT(MUTEX_HELD(&so->so_lock)); 1533 ASSERT(so->so_flag & SOLOCKED); 1534 1535 return (error); 1536 } 1537 1538 /* 1539 * listen on the socket. 1540 * For TPI conforming transports this has to first unbind with the transport 1541 * and then bind again using the new backlog. 1542 */ 1543 /* ARGSUSED */ 1544 int 1545 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1546 { 1547 int error = 0; 1548 sotpi_info_t *sti = SOTOTPI(so); 1549 1550 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1551 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1552 1553 if (sti->sti_serv_type == T_CLTS) 1554 return (EOPNOTSUPP); 1555 1556 /* 1557 * If the socket is ready to accept connections already, then 1558 * return without doing anything. This avoids a problem where 1559 * a second listen() call fails if a connection is pending and 1560 * leaves the socket unbound. Only when we are not unbinding 1561 * with the transport can we safely increase the backlog. 1562 */ 1563 if (so->so_state & SS_ACCEPTCONN && 1564 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1565 /*CONSTCOND*/ 1566 !solisten_tpi_tcp)) 1567 return (0); 1568 1569 if (so->so_state & SS_ISCONNECTED) 1570 return (EINVAL); 1571 1572 mutex_enter(&so->so_lock); 1573 so_lock_single(so); /* Set SOLOCKED */ 1574 1575 /* 1576 * If the listen doesn't change the backlog we do nothing. 1577 * This avoids an EPROTO error from the transport. 1578 */ 1579 if ((so->so_state & SS_ACCEPTCONN) && 1580 so->so_backlog == backlog) 1581 goto done; 1582 1583 if (!(so->so_state & SS_ISBOUND)) { 1584 /* 1585 * Must have been explicitly bound in the UNIX domain. 1586 */ 1587 if (so->so_family == AF_UNIX) { 1588 error = EINVAL; 1589 goto done; 1590 } 1591 error = sotpi_bindlisten(so, NULL, 0, backlog, 1592 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1593 } else if (backlog > 0) { 1594 /* 1595 * AF_INET{,6} hack to avoid losing the port. 1596 * Assumes that all AF_INET{,6} transports can handle a 1597 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1598 * has already bound thus it is possible to avoid the unbind. 1599 */ 1600 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1601 /*CONSTCOND*/ 1602 !solisten_tpi_tcp)) { 1603 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1604 if (error) 1605 goto done; 1606 } 1607 error = sotpi_bindlisten(so, NULL, 0, backlog, 1608 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1609 } else { 1610 so->so_state |= SS_ACCEPTCONN; 1611 so->so_backlog = backlog; 1612 } 1613 if (error) 1614 goto done; 1615 ASSERT(so->so_state & SS_ACCEPTCONN); 1616 done: 1617 so_unlock_single(so, SOLOCKED); 1618 mutex_exit(&so->so_lock); 1619 return (error); 1620 } 1621 1622 /* 1623 * Disconnect either a specified seqno or all (-1). 1624 * The former is used on listening sockets only. 1625 * 1626 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1627 * the current use of sodisconnect(seqno == -1) is only for shutdown 1628 * so there is no point (and potentially incorrect) to unbind. 1629 */ 1630 static int 1631 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1632 { 1633 struct T_discon_req discon_req; 1634 int error = 0; 1635 mblk_t *mp; 1636 1637 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1638 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1639 1640 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1641 mutex_enter(&so->so_lock); 1642 so_lock_single(so); /* Set SOLOCKED */ 1643 } else { 1644 ASSERT(MUTEX_HELD(&so->so_lock)); 1645 ASSERT(so->so_flag & SOLOCKED); 1646 } 1647 1648 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1649 error = EINVAL; 1650 eprintsoline(so, error); 1651 goto done; 1652 } 1653 1654 mutex_exit(&so->so_lock); 1655 /* 1656 * Flush the write side (unless this is a listener) 1657 * and then send down a T_DISCON_REQ. 1658 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1659 * and other messages.) 1660 */ 1661 if (!(so->so_state & SS_ACCEPTCONN)) 1662 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1663 1664 discon_req.PRIM_type = T_DISCON_REQ; 1665 discon_req.SEQ_number = seqno; 1666 mp = soallocproto1(&discon_req, sizeof (discon_req), 1667 0, _ALLOC_SLEEP, CRED()); 1668 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1669 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1670 mutex_enter(&so->so_lock); 1671 if (error) { 1672 eprintsoline(so, error); 1673 goto done; 1674 } 1675 1676 error = sowaitokack(so, T_DISCON_REQ); 1677 if (error) { 1678 eprintsoline(so, error); 1679 goto done; 1680 } 1681 /* 1682 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1683 * strsock_proto while the lock was dropped above, the disconnect 1684 * is allowed to complete. However, it is not possible to 1685 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1686 */ 1687 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1688 SOTOTPI(so)->sti_laddr_valid = 0; 1689 SOTOTPI(so)->sti_faddr_valid = 0; 1690 done: 1691 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1692 so_unlock_single(so, SOLOCKED); 1693 mutex_exit(&so->so_lock); 1694 } else { 1695 /* If the caller held the lock don't release it here */ 1696 ASSERT(MUTEX_HELD(&so->so_lock)); 1697 ASSERT(so->so_flag & SOLOCKED); 1698 } 1699 return (error); 1700 } 1701 1702 /* ARGSUSED */ 1703 int 1704 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1705 struct sonode **nsop) 1706 { 1707 struct T_conn_ind *conn_ind; 1708 struct T_conn_res *conn_res; 1709 int error = 0; 1710 mblk_t *mp, *ctxmp, *ack_mp; 1711 struct sonode *nso; 1712 vnode_t *nvp; 1713 void *src; 1714 t_uscalar_t srclen; 1715 void *opt; 1716 t_uscalar_t optlen; 1717 t_scalar_t PRIM_type; 1718 t_scalar_t SEQ_number; 1719 size_t sinlen; 1720 sotpi_info_t *sti = SOTOTPI(so); 1721 sotpi_info_t *nsti; 1722 1723 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1724 (void *)so, fflag, (void *)nsop, 1725 pr_state(so->so_state, so->so_mode))); 1726 1727 /* 1728 * Defer single-threading the accepting socket until 1729 * the T_CONN_IND has been received and parsed and the 1730 * new sonode has been opened. 1731 */ 1732 1733 /* Check that we are not already connected */ 1734 if ((so->so_state & SS_ACCEPTCONN) == 0) 1735 goto conn_bad; 1736 again: 1737 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1738 goto e_bad; 1739 1740 ASSERT(mp != NULL); 1741 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1742 ctxmp = mp->b_cont; 1743 1744 /* 1745 * Save SEQ_number for error paths. 1746 */ 1747 SEQ_number = conn_ind->SEQ_number; 1748 1749 srclen = conn_ind->SRC_length; 1750 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1751 if (src == NULL) { 1752 error = EPROTO; 1753 freemsg(mp); 1754 eprintsoline(so, error); 1755 goto disconnect_unlocked; 1756 } 1757 optlen = conn_ind->OPT_length; 1758 switch (so->so_family) { 1759 case AF_INET: 1760 case AF_INET6: 1761 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1762 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1763 &opt, conn_ind->OPT_length); 1764 } else { 1765 /* 1766 * The transport (in this case TCP) hasn't sent up 1767 * a pointer to an instance for the accept fast-path. 1768 * Disable fast-path completely because the call to 1769 * sotpi_create() below would otherwise create an 1770 * incomplete TCP instance, which would lead to 1771 * problems when sockfs sends a normal T_CONN_RES 1772 * message down the new stream. 1773 */ 1774 if (sti->sti_direct) { 1775 int rval; 1776 /* 1777 * For consistency we inform tcp to disable 1778 * direct interface on the listener, though 1779 * we can certainly live without doing this 1780 * because no data will ever travel upstream 1781 * on the listening socket. 1782 */ 1783 sti->sti_direct = 0; 1784 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1785 0, 0, K_TO_K, cr, &rval); 1786 } 1787 opt = NULL; 1788 optlen = 0; 1789 } 1790 break; 1791 case AF_UNIX: 1792 default: 1793 if (optlen != 0) { 1794 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1795 __TPI_ALIGN_SIZE); 1796 if (opt == NULL) { 1797 error = EPROTO; 1798 freemsg(mp); 1799 eprintsoline(so, error); 1800 goto disconnect_unlocked; 1801 } 1802 } 1803 if (so->so_family == AF_UNIX) { 1804 if (!sti->sti_faddr_noxlate) { 1805 src = NULL; 1806 srclen = 0; 1807 } 1808 /* Extract src address from options */ 1809 if (optlen != 0) 1810 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1811 } 1812 break; 1813 } 1814 1815 /* 1816 * Create the new socket. 1817 */ 1818 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1819 if (nso == NULL) { 1820 ASSERT(error != 0); 1821 /* 1822 * Accept can not fail with ENOBUFS. sotpi_create 1823 * sleeps waiting for memory until a signal is caught 1824 * so return EINTR. 1825 */ 1826 freemsg(mp); 1827 if (error == ENOBUFS) 1828 error = EINTR; 1829 goto e_disc_unl; 1830 } 1831 nvp = SOTOV(nso); 1832 nsti = SOTOTPI(nso); 1833 1834 /* 1835 * If the transport sent up an SSL connection context, then attach 1836 * it the new socket, and set the (sd_wputdatafunc)() and 1837 * (sd_rputdatafunc)() stream head hooks to intercept and process 1838 * SSL records. 1839 */ 1840 if (ctxmp != NULL) { 1841 /* 1842 * This kssl_ctx_t is already held for us by the transport. 1843 * So, we don't need to do a kssl_hold_ctx() here. 1844 */ 1845 nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1846 freemsg(ctxmp); 1847 mp->b_cont = NULL; 1848 strsetrwputdatahooks(nvp, strsock_kssl_input, 1849 strsock_kssl_output); 1850 1851 /* Disable sodirect if any */ 1852 if (nso->so_direct != NULL) { 1853 mutex_enter(nso->so_direct->sod_lockp); 1854 SOD_DISABLE(nso->so_direct); 1855 mutex_exit(nso->so_direct->sod_lockp); 1856 } 1857 } 1858 #ifdef DEBUG 1859 /* 1860 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1861 * it's inherited early to allow debugging of the accept code itself. 1862 */ 1863 nso->so_options |= so->so_options & SO_DEBUG; 1864 #endif /* DEBUG */ 1865 1866 /* 1867 * Save the SRC address from the T_CONN_IND 1868 * for getpeername to work on AF_UNIX and on transports that do not 1869 * support TI_GETPEERNAME. 1870 * 1871 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1872 * copyin_name(). 1873 */ 1874 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1875 error = EINVAL; 1876 freemsg(mp); 1877 eprintsoline(so, error); 1878 goto disconnect_vp_unlocked; 1879 } 1880 nsti->sti_faddr_len = (socklen_t)srclen; 1881 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1882 bcopy(src, nsti->sti_faddr_sa, srclen); 1883 nsti->sti_faddr_valid = 1; 1884 1885 /* 1886 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1887 * Send down a T_CONN_RES without a cred. 1888 */ 1889 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1890 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1891 cred_t *cr; 1892 pid_t cpid; 1893 1894 cr = msg_getcred(mp, &cpid); 1895 if (cr != NULL) { 1896 crhold(cr); 1897 nso->so_peercred = cr; 1898 nso->so_cpid = cpid; 1899 } 1900 freemsg(mp); 1901 1902 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1903 sizeof (intptr_t), 0, _ALLOC_INTR, NULL); 1904 if (mp == NULL) { 1905 /* 1906 * Accept can not fail with ENOBUFS. 1907 * A signal was caught so return EINTR. 1908 */ 1909 error = EINTR; 1910 eprintsoline(so, error); 1911 goto disconnect_vp_unlocked; 1912 } 1913 conn_res = (struct T_conn_res *)mp->b_rptr; 1914 } else { 1915 /* 1916 * For efficency reasons we use msg_extractcred; no crhold 1917 * needed since db_credp is cleared (i.e., we move the cred 1918 * from the message to so_peercred. 1919 */ 1920 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1921 1922 mp->b_rptr = DB_BASE(mp); 1923 conn_res = (struct T_conn_res *)mp->b_rptr; 1924 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1925 } 1926 1927 /* 1928 * New socket must be bound at least in sockfs and, except for AF_INET, 1929 * (or AF_INET6) it also has to be bound in the transport provider. 1930 * We set the local address in the sonode from the T_OK_ACK of the 1931 * T_CONN_RES. For this reason the address we bind to here isn't 1932 * important. 1933 */ 1934 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1935 /*CONSTCOND*/ 1936 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1937 /* 1938 * Optimization for AF_INET{,6} transports 1939 * that can handle a T_CONN_RES without being bound. 1940 */ 1941 mutex_enter(&nso->so_lock); 1942 so_automatic_bind(nso); 1943 mutex_exit(&nso->so_lock); 1944 } else { 1945 /* Perform NULL bind with the transport provider. */ 1946 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1947 cr)) != 0) { 1948 ASSERT(error != ENOBUFS); 1949 freemsg(mp); 1950 eprintsoline(nso, error); 1951 goto disconnect_vp_unlocked; 1952 } 1953 } 1954 1955 /* 1956 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1957 * so that any data arriving on the new socket will cause the 1958 * appropriate signals to be delivered for the new socket. 1959 * 1960 * No other thread (except strsock_proto and strsock_misc) 1961 * can access the new socket thus we relax the locking. 1962 */ 1963 nso->so_pgrp = so->so_pgrp; 1964 nso->so_state |= so->so_state & SS_ASYNC; 1965 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1966 1967 if (nso->so_pgrp != 0) { 1968 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1969 eprintsoline(nso, error); 1970 error = 0; 1971 nso->so_pgrp = 0; 1972 } 1973 } 1974 1975 /* 1976 * Make note of the socket level options. TCP and IP level options 1977 * are already inherited. We could do all this after accept is 1978 * successful but doing it here simplifies code and no harm done 1979 * for error case. 1980 */ 1981 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1982 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1983 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1984 nso->so_sndbuf = so->so_sndbuf; 1985 nso->so_rcvbuf = so->so_rcvbuf; 1986 if (nso->so_options & SO_LINGER) 1987 nso->so_linger = so->so_linger; 1988 1989 /* 1990 * Note that the following sti_direct code path should be 1991 * removed once we are confident that the direct sockets 1992 * do not result in any degradation. 1993 */ 1994 if (sti->sti_direct) { 1995 1996 ASSERT(opt != NULL); 1997 1998 conn_res->OPT_length = optlen; 1999 conn_res->OPT_offset = MBLKL(mp); 2000 bcopy(&opt, mp->b_wptr, optlen); 2001 mp->b_wptr += optlen; 2002 conn_res->PRIM_type = T_CONN_RES; 2003 conn_res->ACCEPTOR_id = 0; 2004 PRIM_type = T_CONN_RES; 2005 2006 /* Send down the T_CONN_RES on acceptor STREAM */ 2007 error = kstrputmsg(SOTOV(nso), mp, NULL, 2008 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2009 if (error) { 2010 mutex_enter(&so->so_lock); 2011 so_lock_single(so); 2012 eprintsoline(so, error); 2013 goto disconnect_vp; 2014 } 2015 mutex_enter(&nso->so_lock); 2016 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 2017 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2018 if (error) { 2019 mutex_exit(&nso->so_lock); 2020 mutex_enter(&so->so_lock); 2021 so_lock_single(so); 2022 eprintsoline(so, error); 2023 goto disconnect_vp; 2024 } 2025 if (nso->so_family == AF_INET) { 2026 sin_t *sin; 2027 2028 sin = (sin_t *)(ack_mp->b_rptr + 2029 sizeof (struct T_ok_ack)); 2030 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 2031 nsti->sti_laddr_len = sizeof (sin_t); 2032 } else { 2033 sin6_t *sin6; 2034 2035 sin6 = (sin6_t *)(ack_mp->b_rptr + 2036 sizeof (struct T_ok_ack)); 2037 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 2038 nsti->sti_laddr_len = sizeof (sin6_t); 2039 } 2040 freemsg(ack_mp); 2041 2042 nso->so_state |= SS_ISCONNECTED; 2043 nso->so_proto_handle = (sock_lower_handle_t)opt; 2044 nsti->sti_laddr_valid = 1; 2045 2046 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 2047 /* 2048 * A NL7C marked listen()er so the new socket 2049 * inherits the listen()er's NL7C state, except 2050 * for NL7C_POLLIN. 2051 * 2052 * Only call NL7C to process the new socket if 2053 * the listen socket allows blocking i/o. 2054 */ 2055 nsti->sti_nl7c_flags = 2056 sti->sti_nl7c_flags & (~NL7C_POLLIN); 2057 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 2058 /* 2059 * Nonblocking accept() just make it 2060 * persist to defer processing to the 2061 * read-side syscall (e.g. read). 2062 */ 2063 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 2064 } else if (nl7c_process(nso, B_FALSE)) { 2065 /* 2066 * NL7C has completed processing on the 2067 * socket, close the socket and back to 2068 * the top to await the next T_CONN_IND. 2069 */ 2070 mutex_exit(&nso->so_lock); 2071 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 2072 cr, NULL); 2073 VN_RELE(nvp); 2074 goto again; 2075 } 2076 /* Pass the new socket out */ 2077 } 2078 2079 mutex_exit(&nso->so_lock); 2080 2081 /* 2082 * It's possible, through the use of autopush for example, 2083 * that the acceptor stream may not support sti_direct 2084 * semantics. If the new socket does not support sti_direct 2085 * we issue a _SIOCSOCKFALLBACK to inform the transport 2086 * as we would in the I_PUSH case. 2087 */ 2088 if (nsti->sti_direct == 0) { 2089 int rval; 2090 2091 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2092 0, 0, K_TO_K, cr, &rval)) != 0) { 2093 mutex_enter(&so->so_lock); 2094 so_lock_single(so); 2095 eprintsoline(so, error); 2096 goto disconnect_vp; 2097 } 2098 } 2099 2100 /* 2101 * Pass out new socket. 2102 */ 2103 if (nsop != NULL) 2104 *nsop = nso; 2105 2106 return (0); 2107 } 2108 2109 /* 2110 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2111 * which don't support the FireEngine accept fast-path. It is also 2112 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2113 * again. Neither sockfs nor TCP attempt to find out if some other 2114 * random module has been inserted in between (in which case we 2115 * should follow TLI accept behaviour). We blindly assume the worst 2116 * case and revert back to old behaviour i.e. TCP will not send us 2117 * any option (eager) and the accept should happen on the listener 2118 * queue. Any queued T_conn_ind have already got their options removed 2119 * by so_sock2_stream() when "sockmod" was I_POP'd. 2120 */ 2121 /* 2122 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2123 */ 2124 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2125 #ifdef _ILP32 2126 queue_t *q; 2127 2128 /* 2129 * Find read queue in driver 2130 * Can safely do this since we "own" nso/nvp. 2131 */ 2132 q = strvp2wq(nvp)->q_next; 2133 while (SAMESTR(q)) 2134 q = q->q_next; 2135 q = RD(q); 2136 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2137 #else 2138 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2139 #endif /* _ILP32 */ 2140 conn_res->PRIM_type = O_T_CONN_RES; 2141 PRIM_type = O_T_CONN_RES; 2142 } else { 2143 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2144 conn_res->PRIM_type = T_CONN_RES; 2145 PRIM_type = T_CONN_RES; 2146 } 2147 conn_res->SEQ_number = SEQ_number; 2148 conn_res->OPT_length = 0; 2149 conn_res->OPT_offset = 0; 2150 2151 mutex_enter(&so->so_lock); 2152 so_lock_single(so); /* Set SOLOCKED */ 2153 mutex_exit(&so->so_lock); 2154 2155 error = kstrputmsg(SOTOV(so), mp, NULL, 2156 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2157 mutex_enter(&so->so_lock); 2158 if (error) { 2159 eprintsoline(so, error); 2160 goto disconnect_vp; 2161 } 2162 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2163 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2164 if (error) { 2165 eprintsoline(so, error); 2166 goto disconnect_vp; 2167 } 2168 /* 2169 * If there is a sin/sin6 appended onto the T_OK_ACK use 2170 * that to set the local address. If this is not present 2171 * then we zero out the address and don't set the 2172 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2173 * the pathname from the listening socket. 2174 */ 2175 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2176 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2177 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2178 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2179 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2180 nsti->sti_laddr_len = sinlen; 2181 nsti->sti_laddr_valid = 1; 2182 } else if (nso->so_family == AF_UNIX) { 2183 ASSERT(so->so_family == AF_UNIX); 2184 nsti->sti_laddr_len = sti->sti_laddr_len; 2185 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2186 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2187 nsti->sti_laddr_len); 2188 nsti->sti_laddr_valid = 1; 2189 } else { 2190 nsti->sti_laddr_len = sti->sti_laddr_len; 2191 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2192 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2193 nsti->sti_laddr_sa->sa_family = nso->so_family; 2194 } 2195 freemsg(ack_mp); 2196 2197 so_unlock_single(so, SOLOCKED); 2198 mutex_exit(&so->so_lock); 2199 2200 nso->so_state |= SS_ISCONNECTED; 2201 2202 /* 2203 * Pass out new socket. 2204 */ 2205 if (nsop != NULL) 2206 *nsop = nso; 2207 2208 return (0); 2209 2210 2211 eproto_disc_unl: 2212 error = EPROTO; 2213 e_disc_unl: 2214 eprintsoline(so, error); 2215 goto disconnect_unlocked; 2216 2217 pr_disc_vp_unl: 2218 eprintsoline(so, error); 2219 disconnect_vp_unlocked: 2220 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2221 VN_RELE(nvp); 2222 disconnect_unlocked: 2223 (void) sodisconnect(so, SEQ_number, 0); 2224 return (error); 2225 2226 pr_disc_vp: 2227 eprintsoline(so, error); 2228 disconnect_vp: 2229 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2230 so_unlock_single(so, SOLOCKED); 2231 mutex_exit(&so->so_lock); 2232 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2233 VN_RELE(nvp); 2234 return (error); 2235 2236 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2237 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2238 ? EOPNOTSUPP : EINVAL; 2239 e_bad: 2240 eprintsoline(so, error); 2241 return (error); 2242 } 2243 2244 /* 2245 * connect a socket. 2246 * 2247 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2248 * unconnect (by specifying a null address). 2249 */ 2250 int 2251 sotpi_connect(struct sonode *so, 2252 const struct sockaddr *name, 2253 socklen_t namelen, 2254 int fflag, 2255 int flags, 2256 struct cred *cr) 2257 { 2258 struct T_conn_req conn_req; 2259 int error = 0; 2260 mblk_t *mp; 2261 void *src; 2262 socklen_t srclen; 2263 void *addr; 2264 socklen_t addrlen; 2265 boolean_t need_unlock; 2266 sotpi_info_t *sti = SOTOTPI(so); 2267 2268 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2269 (void *)so, (void *)name, namelen, fflag, flags, 2270 pr_state(so->so_state, so->so_mode))); 2271 2272 /* 2273 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2274 * avoid sleeping for memory with SOLOCKED held. 2275 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2276 * + sizeof (struct T_opthdr). 2277 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2278 * exceed sti_faddr_maxlen). 2279 */ 2280 mp = soallocproto(sizeof (struct T_conn_req) + 2281 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2282 cr); 2283 if (mp == NULL) { 2284 /* 2285 * Connect can not fail with ENOBUFS. A signal was 2286 * caught so return EINTR. 2287 */ 2288 error = EINTR; 2289 eprintsoline(so, error); 2290 return (error); 2291 } 2292 2293 mutex_enter(&so->so_lock); 2294 /* 2295 * Make sure there is a preallocated T_unbind_req message 2296 * before any binding. This message is allocated when the 2297 * socket is created. Since another thread can consume 2298 * so_unbind_mp by the time we return from so_lock_single(), 2299 * we should check the availability of so_unbind_mp after 2300 * we return from so_lock_single(). 2301 */ 2302 2303 so_lock_single(so); /* Set SOLOCKED */ 2304 need_unlock = B_TRUE; 2305 2306 if (sti->sti_unbind_mp == NULL) { 2307 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2308 /* NOTE: holding so_lock while sleeping */ 2309 sti->sti_unbind_mp = 2310 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2311 if (sti->sti_unbind_mp == NULL) { 2312 error = EINTR; 2313 goto done; 2314 } 2315 } 2316 2317 /* 2318 * Can't have done a listen before connecting. 2319 */ 2320 if (so->so_state & SS_ACCEPTCONN) { 2321 error = EOPNOTSUPP; 2322 goto done; 2323 } 2324 2325 /* 2326 * Must be bound with the transport 2327 */ 2328 if (!(so->so_state & SS_ISBOUND)) { 2329 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2330 /*CONSTCOND*/ 2331 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2332 /* 2333 * Optimization for AF_INET{,6} transports 2334 * that can handle a T_CONN_REQ without being bound. 2335 */ 2336 so_automatic_bind(so); 2337 } else { 2338 error = sotpi_bind(so, NULL, 0, 2339 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2340 if (error) 2341 goto done; 2342 } 2343 ASSERT(so->so_state & SS_ISBOUND); 2344 flags |= _SOCONNECT_DID_BIND; 2345 } 2346 2347 /* 2348 * Handle a connect to a name parameter of type AF_UNSPEC like a 2349 * connect to a null address. This is the portable method to 2350 * unconnect a socket. 2351 */ 2352 if ((namelen >= sizeof (sa_family_t)) && 2353 (name->sa_family == AF_UNSPEC)) { 2354 name = NULL; 2355 namelen = 0; 2356 } 2357 2358 /* 2359 * Check that we are not already connected. 2360 * A connection-oriented socket cannot be reconnected. 2361 * A connected connection-less socket can be 2362 * - connected to a different address by a subsequent connect 2363 * - "unconnected" by a connect to the NULL address 2364 */ 2365 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2366 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2367 if (so->so_mode & SM_CONNREQUIRED) { 2368 /* Connection-oriented socket */ 2369 error = so->so_state & SS_ISCONNECTED ? 2370 EISCONN : EALREADY; 2371 goto done; 2372 } 2373 /* Connection-less socket */ 2374 if (name == NULL) { 2375 /* 2376 * Remove the connected state and clear SO_DGRAM_ERRIND 2377 * since it was set when the socket was connected. 2378 * If this is UDP also send down a T_DISCON_REQ. 2379 */ 2380 int val; 2381 2382 if ((so->so_family == AF_INET || 2383 so->so_family == AF_INET6) && 2384 (so->so_type == SOCK_DGRAM || 2385 so->so_type == SOCK_RAW) && 2386 /*CONSTCOND*/ 2387 !soconnect_tpi_udp) { 2388 /* XXX What about implicitly unbinding here? */ 2389 error = sodisconnect(so, -1, 2390 _SODISCONNECT_LOCK_HELD); 2391 } else { 2392 so->so_state &= 2393 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2394 sti->sti_faddr_valid = 0; 2395 sti->sti_faddr_len = 0; 2396 } 2397 2398 /* Remove SOLOCKED since setsockopt will grab it */ 2399 so_unlock_single(so, SOLOCKED); 2400 mutex_exit(&so->so_lock); 2401 2402 val = 0; 2403 (void) sotpi_setsockopt(so, SOL_SOCKET, 2404 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2405 cr); 2406 2407 mutex_enter(&so->so_lock); 2408 so_lock_single(so); /* Set SOLOCKED */ 2409 goto done; 2410 } 2411 } 2412 ASSERT(so->so_state & SS_ISBOUND); 2413 2414 if (name == NULL || namelen == 0) { 2415 error = EINVAL; 2416 goto done; 2417 } 2418 /* 2419 * Mark the socket if sti_faddr_sa represents the transport level 2420 * address. 2421 */ 2422 if (flags & _SOCONNECT_NOXLATE) { 2423 struct sockaddr_ux *soaddr_ux; 2424 2425 ASSERT(so->so_family == AF_UNIX); 2426 if (namelen != sizeof (struct sockaddr_ux)) { 2427 error = EINVAL; 2428 goto done; 2429 } 2430 soaddr_ux = (struct sockaddr_ux *)name; 2431 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2432 namelen = sizeof (soaddr_ux->sou_addr); 2433 sti->sti_faddr_noxlate = 1; 2434 } 2435 2436 /* 2437 * Length and family checks. 2438 */ 2439 error = so_addr_verify(so, name, namelen); 2440 if (error) 2441 goto bad; 2442 2443 /* 2444 * Save foreign address. Needed for AF_UNIX as well as 2445 * transport providers that do not support TI_GETPEERNAME. 2446 * Also used for cached foreign address for TCP and UDP. 2447 */ 2448 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2449 error = EINVAL; 2450 goto done; 2451 } 2452 sti->sti_faddr_len = (socklen_t)namelen; 2453 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2454 bcopy(name, sti->sti_faddr_sa, namelen); 2455 sti->sti_faddr_valid = 1; 2456 2457 if (so->so_family == AF_UNIX) { 2458 if (sti->sti_faddr_noxlate) { 2459 /* 2460 * Already have a transport internal address. Do not 2461 * pass any (transport internal) source address. 2462 */ 2463 addr = sti->sti_faddr_sa; 2464 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2465 src = NULL; 2466 srclen = 0; 2467 } else { 2468 /* 2469 * Pass the sockaddr_un source address as an option 2470 * and translate the remote address. 2471 * Holding so_lock thus sti_laddr_sa can not change. 2472 */ 2473 src = sti->sti_laddr_sa; 2474 srclen = (t_uscalar_t)sti->sti_laddr_len; 2475 dprintso(so, 1, 2476 ("sotpi_connect UNIX: srclen %d, src %p\n", 2477 srclen, src)); 2478 error = so_ux_addr_xlate(so, 2479 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2480 (flags & _SOCONNECT_XPG4_2), 2481 &addr, &addrlen); 2482 if (error) 2483 goto bad; 2484 } 2485 } else { 2486 addr = sti->sti_faddr_sa; 2487 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2488 src = NULL; 2489 srclen = 0; 2490 } 2491 /* 2492 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2493 * option which asks the transport provider to send T_UDERR_IND 2494 * messages. These T_UDERR_IND messages are used to return connected 2495 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2496 * 2497 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2498 * we send down a T_CONN_REQ. This is needed to let the 2499 * transport assign a local address that is consistent with 2500 * the remote address. Applications depend on a getsockname() 2501 * after a connect() to retrieve the "source" IP address for 2502 * the connected socket. Invalidate the cached local address 2503 * to force getsockname() to enquire of the transport. 2504 */ 2505 if (!(so->so_mode & SM_CONNREQUIRED)) { 2506 /* 2507 * Datagram socket. 2508 */ 2509 int32_t val; 2510 2511 so_unlock_single(so, SOLOCKED); 2512 mutex_exit(&so->so_lock); 2513 2514 val = 1; 2515 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2516 &val, (t_uscalar_t)sizeof (val), cr); 2517 2518 mutex_enter(&so->so_lock); 2519 so_lock_single(so); /* Set SOLOCKED */ 2520 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2521 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2522 soconnect_tpi_udp) { 2523 soisconnected(so); 2524 goto done; 2525 } 2526 /* 2527 * Send down T_CONN_REQ etc. 2528 * Clear fflag to avoid returning EWOULDBLOCK. 2529 */ 2530 fflag = 0; 2531 ASSERT(so->so_family != AF_UNIX); 2532 sti->sti_laddr_valid = 0; 2533 } else if (sti->sti_laddr_len != 0) { 2534 /* 2535 * If the local address or port was "any" then it may be 2536 * changed by the transport as a result of the 2537 * connect. Invalidate the cached version if we have one. 2538 */ 2539 switch (so->so_family) { 2540 case AF_INET: 2541 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2542 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2543 INADDR_ANY || 2544 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2545 sti->sti_laddr_valid = 0; 2546 break; 2547 2548 case AF_INET6: 2549 ASSERT(sti->sti_laddr_len == 2550 (socklen_t)sizeof (sin6_t)); 2551 if (IN6_IS_ADDR_UNSPECIFIED( 2552 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2553 IN6_IS_ADDR_V4MAPPED_ANY( 2554 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2555 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2556 sti->sti_laddr_valid = 0; 2557 break; 2558 2559 default: 2560 break; 2561 } 2562 } 2563 2564 /* 2565 * Check for failure of an earlier call 2566 */ 2567 if (so->so_error != 0) 2568 goto so_bad; 2569 2570 /* 2571 * Send down T_CONN_REQ. Message was allocated above. 2572 */ 2573 conn_req.PRIM_type = T_CONN_REQ; 2574 conn_req.DEST_length = addrlen; 2575 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2576 if (srclen == 0) { 2577 conn_req.OPT_length = 0; 2578 conn_req.OPT_offset = 0; 2579 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2580 soappendmsg(mp, addr, addrlen); 2581 } else { 2582 /* 2583 * There is a AF_UNIX sockaddr_un to include as a source 2584 * address option. 2585 */ 2586 struct T_opthdr toh; 2587 2588 toh.level = SOL_SOCKET; 2589 toh.name = SO_SRCADDR; 2590 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2591 toh.status = 0; 2592 conn_req.OPT_length = 2593 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2594 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2595 _TPI_ALIGN_TOPT(addrlen)); 2596 2597 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2598 soappendmsg(mp, addr, addrlen); 2599 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2600 soappendmsg(mp, &toh, sizeof (toh)); 2601 soappendmsg(mp, src, srclen); 2602 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2603 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2604 } 2605 /* 2606 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2607 * in order to have the right state when the T_CONN_CON shows up. 2608 */ 2609 soisconnecting(so); 2610 mutex_exit(&so->so_lock); 2611 2612 if (audit_active) 2613 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2614 2615 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2616 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2617 mp = NULL; 2618 mutex_enter(&so->so_lock); 2619 if (error != 0) 2620 goto bad; 2621 2622 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2623 goto bad; 2624 2625 /* Allow other threads to access the socket */ 2626 so_unlock_single(so, SOLOCKED); 2627 need_unlock = B_FALSE; 2628 2629 /* 2630 * Wait until we get a T_CONN_CON or an error 2631 */ 2632 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2633 so_lock_single(so); /* Set SOLOCKED */ 2634 need_unlock = B_TRUE; 2635 } 2636 2637 done: 2638 freemsg(mp); 2639 switch (error) { 2640 case EINPROGRESS: 2641 case EALREADY: 2642 case EISCONN: 2643 case EINTR: 2644 /* Non-fatal errors */ 2645 sti->sti_laddr_valid = 0; 2646 /* FALLTHRU */ 2647 case 0: 2648 break; 2649 default: 2650 ASSERT(need_unlock); 2651 /* 2652 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2653 * and invalidate local-address cache 2654 */ 2655 so->so_state &= ~SS_ISCONNECTING; 2656 sti->sti_laddr_valid = 0; 2657 /* A discon_ind might have already unbound us */ 2658 if ((flags & _SOCONNECT_DID_BIND) && 2659 (so->so_state & SS_ISBOUND)) { 2660 int err; 2661 2662 err = sotpi_unbind(so, 0); 2663 /* LINTED - statement has no conseq */ 2664 if (err) { 2665 eprintsoline(so, err); 2666 } 2667 } 2668 break; 2669 } 2670 if (need_unlock) 2671 so_unlock_single(so, SOLOCKED); 2672 mutex_exit(&so->so_lock); 2673 return (error); 2674 2675 so_bad: error = sogeterr(so, B_TRUE); 2676 bad: eprintsoline(so, error); 2677 goto done; 2678 } 2679 2680 /* ARGSUSED */ 2681 int 2682 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2683 { 2684 struct T_ordrel_req ordrel_req; 2685 mblk_t *mp; 2686 uint_t old_state, state_change; 2687 int error = 0; 2688 sotpi_info_t *sti = SOTOTPI(so); 2689 2690 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2691 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2692 2693 mutex_enter(&so->so_lock); 2694 so_lock_single(so); /* Set SOLOCKED */ 2695 2696 /* 2697 * SunOS 4.X has no check for datagram sockets. 2698 * 5.X checks that it is connected (ENOTCONN) 2699 * X/Open requires that we check the connected state. 2700 */ 2701 if (!(so->so_state & SS_ISCONNECTED)) { 2702 if (!xnet_skip_checks) { 2703 error = ENOTCONN; 2704 if (xnet_check_print) { 2705 printf("sockfs: X/Open shutdown check " 2706 "caused ENOTCONN\n"); 2707 } 2708 } 2709 goto done; 2710 } 2711 /* 2712 * Record the current state and then perform any state changes. 2713 * Then use the difference between the old and new states to 2714 * determine which messages need to be sent. 2715 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2716 * duplicate calls to shutdown(). 2717 */ 2718 old_state = so->so_state; 2719 2720 switch (how) { 2721 case 0: 2722 socantrcvmore(so); 2723 break; 2724 case 1: 2725 socantsendmore(so); 2726 break; 2727 case 2: 2728 socantsendmore(so); 2729 socantrcvmore(so); 2730 break; 2731 default: 2732 error = EINVAL; 2733 goto done; 2734 } 2735 2736 /* 2737 * Assumes that the SS_CANT* flags are never cleared in the above code. 2738 */ 2739 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2740 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2741 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2742 2743 switch (state_change) { 2744 case 0: 2745 dprintso(so, 1, 2746 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2747 so->so_state)); 2748 goto done; 2749 2750 case SS_CANTRCVMORE: 2751 mutex_exit(&so->so_lock); 2752 strseteof(SOTOV(so), 1); 2753 /* 2754 * strseteof takes care of read side wakeups, 2755 * pollwakeups, and signals. 2756 */ 2757 /* 2758 * Get the read lock before flushing data to avoid problems 2759 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2760 */ 2761 mutex_enter(&so->so_lock); 2762 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2763 mutex_exit(&so->so_lock); 2764 2765 /* Flush read side queue */ 2766 strflushrq(SOTOV(so), FLUSHALL); 2767 2768 mutex_enter(&so->so_lock); 2769 so_unlock_read(so); /* Clear SOREADLOCKED */ 2770 break; 2771 2772 case SS_CANTSENDMORE: 2773 mutex_exit(&so->so_lock); 2774 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2775 mutex_enter(&so->so_lock); 2776 break; 2777 2778 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2779 mutex_exit(&so->so_lock); 2780 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2781 strseteof(SOTOV(so), 1); 2782 /* 2783 * strseteof takes care of read side wakeups, 2784 * pollwakeups, and signals. 2785 */ 2786 /* 2787 * Get the read lock before flushing data to avoid problems 2788 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2789 */ 2790 mutex_enter(&so->so_lock); 2791 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2792 mutex_exit(&so->so_lock); 2793 2794 /* Flush read side queue */ 2795 strflushrq(SOTOV(so), FLUSHALL); 2796 2797 mutex_enter(&so->so_lock); 2798 so_unlock_read(so); /* Clear SOREADLOCKED */ 2799 break; 2800 } 2801 2802 ASSERT(MUTEX_HELD(&so->so_lock)); 2803 2804 /* 2805 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2806 * was set due to this call and the new state has both of them set: 2807 * Send the AF_UNIX close indication 2808 * For T_COTS send a discon_ind 2809 * 2810 * If cantsend was set due to this call: 2811 * For T_COTSORD send an ordrel_ind 2812 * 2813 * Note that for T_CLTS there is no message sent here. 2814 */ 2815 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2816 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2817 /* 2818 * For SunOS 4.X compatibility we tell the other end 2819 * that we are unable to receive at this point. 2820 */ 2821 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2822 so_unix_close(so); 2823 2824 if (sti->sti_serv_type == T_COTS) 2825 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2826 } 2827 if ((state_change & SS_CANTSENDMORE) && 2828 (sti->sti_serv_type == T_COTS_ORD)) { 2829 /* Send an orderly release */ 2830 ordrel_req.PRIM_type = T_ORDREL_REQ; 2831 2832 mutex_exit(&so->so_lock); 2833 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2834 0, _ALLOC_SLEEP, cr); 2835 /* 2836 * Send down the T_ORDREL_REQ even if there is flow control. 2837 * This prevents shutdown from blocking. 2838 * Note that there is no T_OK_ACK for ordrel_req. 2839 */ 2840 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2841 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2842 mutex_enter(&so->so_lock); 2843 if (error) { 2844 eprintsoline(so, error); 2845 goto done; 2846 } 2847 } 2848 2849 done: 2850 so_unlock_single(so, SOLOCKED); 2851 mutex_exit(&so->so_lock); 2852 return (error); 2853 } 2854 2855 /* 2856 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2857 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2858 * that we have closed. 2859 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2860 * T_UNITDATA_REQ containing the same option. 2861 * 2862 * For SOCK_DGRAM half-connections (somebody connected to this end 2863 * but this end is not connect) we don't know where to send any 2864 * SO_UNIX_CLOSE. 2865 * 2866 * We have to ignore stream head errors just in case there has been 2867 * a shutdown(output). 2868 * Ignore any flow control to try to get the message more quickly to the peer. 2869 * While locally ignoring flow control solves the problem when there 2870 * is only the loopback transport on the stream it would not provide 2871 * the correct AF_UNIX socket semantics when one or more modules have 2872 * been pushed. 2873 */ 2874 void 2875 so_unix_close(struct sonode *so) 2876 { 2877 int error; 2878 struct T_opthdr toh; 2879 mblk_t *mp; 2880 sotpi_info_t *sti = SOTOTPI(so); 2881 2882 ASSERT(MUTEX_HELD(&so->so_lock)); 2883 2884 ASSERT(so->so_family == AF_UNIX); 2885 2886 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2887 (SS_ISCONNECTED|SS_ISBOUND)) 2888 return; 2889 2890 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2891 (void *)so, pr_state(so->so_state, so->so_mode))); 2892 2893 toh.level = SOL_SOCKET; 2894 toh.name = SO_UNIX_CLOSE; 2895 2896 /* zero length + header */ 2897 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2898 toh.status = 0; 2899 2900 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2901 struct T_optdata_req tdr; 2902 2903 tdr.PRIM_type = T_OPTDATA_REQ; 2904 tdr.DATA_flag = 0; 2905 2906 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2907 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2908 2909 /* NOTE: holding so_lock while sleeping */ 2910 mp = soallocproto2(&tdr, sizeof (tdr), 2911 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2912 } else { 2913 struct T_unitdata_req tudr; 2914 void *addr; 2915 socklen_t addrlen; 2916 void *src; 2917 socklen_t srclen; 2918 struct T_opthdr toh2; 2919 t_scalar_t size; 2920 2921 /* Connecteded DGRAM socket */ 2922 2923 /* 2924 * For AF_UNIX the destination address is translated to 2925 * an internal name and the source address is passed as 2926 * an option. 2927 */ 2928 /* 2929 * Length and family checks. 2930 */ 2931 error = so_addr_verify(so, sti->sti_faddr_sa, 2932 (t_uscalar_t)sti->sti_faddr_len); 2933 if (error) { 2934 eprintsoline(so, error); 2935 return; 2936 } 2937 if (sti->sti_faddr_noxlate) { 2938 /* 2939 * Already have a transport internal address. Do not 2940 * pass any (transport internal) source address. 2941 */ 2942 addr = sti->sti_faddr_sa; 2943 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2944 src = NULL; 2945 srclen = 0; 2946 } else { 2947 /* 2948 * Pass the sockaddr_un source address as an option 2949 * and translate the remote address. 2950 * Holding so_lock thus sti_laddr_sa can not change. 2951 */ 2952 src = sti->sti_laddr_sa; 2953 srclen = (socklen_t)sti->sti_laddr_len; 2954 dprintso(so, 1, 2955 ("so_ux_close: srclen %d, src %p\n", 2956 srclen, src)); 2957 error = so_ux_addr_xlate(so, 2958 sti->sti_faddr_sa, 2959 (socklen_t)sti->sti_faddr_len, 0, 2960 &addr, &addrlen); 2961 if (error) { 2962 eprintsoline(so, error); 2963 return; 2964 } 2965 } 2966 tudr.PRIM_type = T_UNITDATA_REQ; 2967 tudr.DEST_length = addrlen; 2968 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2969 if (srclen == 0) { 2970 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2971 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2972 _TPI_ALIGN_TOPT(addrlen)); 2973 2974 size = tudr.OPT_offset + tudr.OPT_length; 2975 /* NOTE: holding so_lock while sleeping */ 2976 mp = soallocproto2(&tudr, sizeof (tudr), 2977 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2978 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2979 soappendmsg(mp, &toh, sizeof (toh)); 2980 } else { 2981 /* 2982 * There is a AF_UNIX sockaddr_un to include as a 2983 * source address option. 2984 */ 2985 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2986 _TPI_ALIGN_TOPT(srclen)); 2987 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2988 _TPI_ALIGN_TOPT(addrlen)); 2989 2990 toh2.level = SOL_SOCKET; 2991 toh2.name = SO_SRCADDR; 2992 toh2.len = (t_uscalar_t)(srclen + 2993 sizeof (struct T_opthdr)); 2994 toh2.status = 0; 2995 2996 size = tudr.OPT_offset + tudr.OPT_length; 2997 2998 /* NOTE: holding so_lock while sleeping */ 2999 mp = soallocproto2(&tudr, sizeof (tudr), 3000 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 3001 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3002 soappendmsg(mp, &toh, sizeof (toh)); 3003 soappendmsg(mp, &toh2, sizeof (toh2)); 3004 soappendmsg(mp, src, srclen); 3005 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3006 } 3007 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3008 } 3009 mutex_exit(&so->so_lock); 3010 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 3011 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 3012 mutex_enter(&so->so_lock); 3013 } 3014 3015 /* 3016 * Called by sotpi_recvmsg when reading a non-zero amount of data. 3017 * In addition, the caller typically verifies that there is some 3018 * potential state to clear by checking 3019 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 3020 * before calling this routine. 3021 * Note that such a check can be made without holding so_lock since 3022 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 3023 * decrements sti_oobsigcnt. 3024 * 3025 * When data is read *after* the point that all pending 3026 * oob data has been consumed the oob indication is cleared. 3027 * 3028 * This logic keeps select/poll returning POLLRDBAND and 3029 * SIOCATMARK returning true until we have read past 3030 * the mark. 3031 */ 3032 static void 3033 sorecv_update_oobstate(struct sonode *so) 3034 { 3035 sotpi_info_t *sti = SOTOTPI(so); 3036 3037 mutex_enter(&so->so_lock); 3038 ASSERT(so_verify_oobstate(so)); 3039 dprintso(so, 1, 3040 ("sorecv_update_oobstate: counts %d/%d state %s\n", 3041 sti->sti_oobsigcnt, 3042 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 3043 if (sti->sti_oobsigcnt == 0) { 3044 /* No more pending oob indications */ 3045 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 3046 freemsg(so->so_oobmsg); 3047 so->so_oobmsg = NULL; 3048 } 3049 ASSERT(so_verify_oobstate(so)); 3050 mutex_exit(&so->so_lock); 3051 } 3052 3053 /* 3054 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 3055 */ 3056 static int 3057 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 3058 { 3059 sotpi_info_t *sti = SOTOTPI(so); 3060 int error = 0; 3061 mblk_t *tmp = NULL; 3062 mblk_t *pmp = NULL; 3063 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 3064 3065 ASSERT(nmp != NULL); 3066 3067 while (nmp != NULL && uiop->uio_resid > 0) { 3068 ssize_t n; 3069 3070 if (DB_TYPE(nmp) == M_DATA) { 3071 /* 3072 * We have some data, uiomove up to resid bytes. 3073 */ 3074 n = MIN(MBLKL(nmp), uiop->uio_resid); 3075 if (n > 0) 3076 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3077 nmp->b_rptr += n; 3078 if (nmp->b_rptr == nmp->b_wptr) { 3079 pmp = nmp; 3080 nmp = nmp->b_cont; 3081 } 3082 if (error) 3083 break; 3084 } else { 3085 /* 3086 * We only handle data, save for caller to handle. 3087 */ 3088 if (pmp != NULL) { 3089 pmp->b_cont = nmp->b_cont; 3090 } 3091 nmp->b_cont = NULL; 3092 if (*rmp == NULL) { 3093 *rmp = nmp; 3094 } else { 3095 tmp->b_cont = nmp; 3096 } 3097 nmp = nmp->b_cont; 3098 tmp = nmp; 3099 } 3100 } 3101 if (pmp != NULL) { 3102 /* Free any mblk_t(s) which we have consumed */ 3103 pmp->b_cont = NULL; 3104 freemsg(sti->sti_nl7c_rcv_mp); 3105 } 3106 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3107 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3108 if (error == 0) { 3109 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3110 3111 error = p->r_v.r_v2; 3112 p->r_v.r_v2 = 0; 3113 } 3114 rp->r_vals = sti->sti_nl7c_rcv_rval; 3115 sti->sti_nl7c_rcv_rval = 0; 3116 } else { 3117 /* More mblk_t(s) to process so no rval to return */ 3118 rp->r_vals = 0; 3119 } 3120 return (error); 3121 } 3122 /* 3123 * Receive the next message on the queue. 3124 * If msg_controllen is non-zero when called the caller is interested in 3125 * any received control info (options). 3126 * If msg_namelen is non-zero when called the caller is interested in 3127 * any received source address. 3128 * The routine returns with msg_control and msg_name pointing to 3129 * kmem_alloc'ed memory which the caller has to free. 3130 */ 3131 /* ARGSUSED */ 3132 int 3133 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3134 struct cred *cr) 3135 { 3136 union T_primitives *tpr; 3137 mblk_t *mp; 3138 uchar_t pri; 3139 int pflag, opflag; 3140 void *control; 3141 t_uscalar_t controllen; 3142 t_uscalar_t namelen; 3143 int so_state = so->so_state; /* Snapshot */ 3144 ssize_t saved_resid; 3145 rval_t rval; 3146 int flags; 3147 clock_t timout; 3148 int error = 0; 3149 int reterr = 0; 3150 struct uio *suiop = NULL; 3151 sotpi_info_t *sti = SOTOTPI(so); 3152 3153 flags = msg->msg_flags; 3154 msg->msg_flags = 0; 3155 3156 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3157 (void *)so, (void *)msg, flags, 3158 pr_state(so->so_state, so->so_mode), so->so_error)); 3159 3160 if (so->so_version == SOV_STREAM) { 3161 so_update_attrs(so, SOACC); 3162 /* The imaginary "sockmod" has been popped - act as a stream */ 3163 return (strread(SOTOV(so), uiop, cr)); 3164 } 3165 3166 /* 3167 * If we are not connected because we have never been connected 3168 * we return ENOTCONN. If we have been connected (but are no longer 3169 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3170 * the EOF. 3171 * 3172 * An alternative would be to post an ENOTCONN error in stream head 3173 * (read+write) and clear it when we're connected. However, that error 3174 * would cause incorrect poll/select behavior! 3175 */ 3176 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3177 (so->so_mode & SM_CONNREQUIRED)) { 3178 return (ENOTCONN); 3179 } 3180 3181 /* 3182 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3183 * after checking that the read queue is empty) and returns zero. 3184 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3185 * is zero. 3186 */ 3187 3188 if (flags & MSG_OOB) { 3189 /* Check that the transport supports OOB */ 3190 if (!(so->so_mode & SM_EXDATA)) 3191 return (EOPNOTSUPP); 3192 so_update_attrs(so, SOACC); 3193 return (sorecvoob(so, msg, uiop, flags, 3194 (so->so_options & SO_OOBINLINE))); 3195 } 3196 3197 so_update_attrs(so, SOACC); 3198 3199 /* 3200 * Set msg_controllen and msg_namelen to zero here to make it 3201 * simpler in the cases that no control or name is returned. 3202 */ 3203 controllen = msg->msg_controllen; 3204 namelen = msg->msg_namelen; 3205 msg->msg_controllen = 0; 3206 msg->msg_namelen = 0; 3207 3208 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3209 namelen, controllen)); 3210 3211 mutex_enter(&so->so_lock); 3212 /* 3213 * If an NL7C enabled socket and not waiting for write data. 3214 */ 3215 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3216 NL7C_ENABLED) { 3217 if (sti->sti_nl7c_uri) { 3218 /* Close uri processing for a previous request */ 3219 nl7c_close(so); 3220 } 3221 if ((so_state & SS_CANTRCVMORE) && 3222 sti->sti_nl7c_rcv_mp == NULL) { 3223 /* Nothing to process, EOF */ 3224 mutex_exit(&so->so_lock); 3225 return (0); 3226 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3227 /* Persistent NL7C socket, try to process request */ 3228 boolean_t ret; 3229 3230 ret = nl7c_process(so, 3231 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3232 rval.r_vals = sti->sti_nl7c_rcv_rval; 3233 error = rval.r_v.r_v2; 3234 if (error) { 3235 /* Error of some sort, return it */ 3236 mutex_exit(&so->so_lock); 3237 return (error); 3238 } 3239 if (sti->sti_nl7c_flags && 3240 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3241 /* 3242 * Still an NL7C socket and no data 3243 * to pass up to the caller. 3244 */ 3245 mutex_exit(&so->so_lock); 3246 if (ret) { 3247 /* EOF */ 3248 return (0); 3249 } else { 3250 /* Need more data */ 3251 return (EAGAIN); 3252 } 3253 } 3254 } else { 3255 /* 3256 * Not persistent so no further NL7C processing. 3257 */ 3258 sti->sti_nl7c_flags = 0; 3259 } 3260 } 3261 /* 3262 * Only one reader is allowed at any given time. This is needed 3263 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3264 * 3265 * This is slightly different that BSD behavior in that it fails with 3266 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3267 * is single-threaded using sblock(), which is dropped while waiting 3268 * for data to appear. The difference shows up e.g. if one 3269 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3270 * does use nonblocking io and different threads are reading each 3271 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3272 * in this case as long as the read queue doesn't get empty. 3273 * In this implementation the thread using nonblocking io can 3274 * get an EWOULDBLOCK error due to the blocking thread executing 3275 * e.g. in the uiomove in kstrgetmsg. 3276 * This difference is not believed to be significant. 3277 */ 3278 /* Set SOREADLOCKED */ 3279 error = so_lock_read_intr(so, 3280 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3281 mutex_exit(&so->so_lock); 3282 if (error) 3283 return (error); 3284 3285 /* 3286 * Tell kstrgetmsg to not inspect the stream head errors until all 3287 * queued data has been consumed. 3288 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3289 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3290 * 3291 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3292 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3293 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3294 */ 3295 pflag = MSG_ANY | MSG_DELAYERROR; 3296 if (flags & MSG_PEEK) { 3297 pflag |= MSG_IPEEK; 3298 flags &= ~MSG_WAITALL; 3299 } 3300 if (so->so_mode & SM_ATOMIC) 3301 pflag |= MSG_DISCARDTAIL; 3302 3303 if (flags & MSG_DONTWAIT) 3304 timout = 0; 3305 else 3306 timout = -1; 3307 opflag = pflag; 3308 3309 suiop = sod_rcv_init(so, flags, &uiop); 3310 retry: 3311 saved_resid = uiop->uio_resid; 3312 pri = 0; 3313 mp = NULL; 3314 if (sti->sti_nl7c_rcv_mp != NULL) { 3315 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3316 error = nl7c_sorecv(so, &mp, uiop, &rval); 3317 } else { 3318 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3319 timout, &rval); 3320 } 3321 if (error != 0) { 3322 /* kstrgetmsg returns ETIME when timeout expires */ 3323 if (error == ETIME) 3324 error = EWOULDBLOCK; 3325 goto out; 3326 } 3327 /* 3328 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3329 * For non-datagrams MOREDATA is used to set MSG_EOR. 3330 */ 3331 ASSERT(!(rval.r_val1 & MORECTL)); 3332 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3333 msg->msg_flags |= MSG_TRUNC; 3334 3335 if (mp == NULL) { 3336 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3337 /* 3338 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3339 * The draft Posix socket spec states that the mark should 3340 * not be cleared when peeking. We follow the latter. 3341 */ 3342 if ((so->so_state & 3343 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3344 (uiop->uio_resid != saved_resid) && 3345 !(flags & MSG_PEEK)) { 3346 sorecv_update_oobstate(so); 3347 } 3348 3349 mutex_enter(&so->so_lock); 3350 /* Set MSG_EOR based on MOREDATA */ 3351 if (!(rval.r_val1 & MOREDATA)) { 3352 if (so->so_state & SS_SAVEDEOR) { 3353 msg->msg_flags |= MSG_EOR; 3354 so->so_state &= ~SS_SAVEDEOR; 3355 } 3356 } 3357 /* 3358 * If some data was received (i.e. not EOF) and the 3359 * read/recv* has not been satisfied wait for some more. 3360 */ 3361 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3362 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3363 mutex_exit(&so->so_lock); 3364 pflag = opflag | MSG_NOMARK; 3365 goto retry; 3366 } 3367 goto out_locked; 3368 } 3369 3370 /* strsock_proto has already verified length and alignment */ 3371 tpr = (union T_primitives *)mp->b_rptr; 3372 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3373 3374 switch (tpr->type) { 3375 case T_DATA_IND: { 3376 if ((so->so_state & 3377 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3378 (uiop->uio_resid != saved_resid) && 3379 !(flags & MSG_PEEK)) { 3380 sorecv_update_oobstate(so); 3381 } 3382 3383 /* 3384 * Set msg_flags to MSG_EOR based on 3385 * MORE_flag and MOREDATA. 3386 */ 3387 mutex_enter(&so->so_lock); 3388 so->so_state &= ~SS_SAVEDEOR; 3389 if (!(tpr->data_ind.MORE_flag & 1)) { 3390 if (!(rval.r_val1 & MOREDATA)) 3391 msg->msg_flags |= MSG_EOR; 3392 else 3393 so->so_state |= SS_SAVEDEOR; 3394 } 3395 freemsg(mp); 3396 /* 3397 * If some data was received (i.e. not EOF) and the 3398 * read/recv* has not been satisfied wait for some more. 3399 */ 3400 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3401 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3402 mutex_exit(&so->so_lock); 3403 pflag = opflag | MSG_NOMARK; 3404 goto retry; 3405 } 3406 goto out_locked; 3407 } 3408 case T_UNITDATA_IND: { 3409 void *addr; 3410 t_uscalar_t addrlen; 3411 void *abuf; 3412 t_uscalar_t optlen; 3413 void *opt; 3414 3415 if ((so->so_state & 3416 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3417 (uiop->uio_resid != saved_resid) && 3418 !(flags & MSG_PEEK)) { 3419 sorecv_update_oobstate(so); 3420 } 3421 3422 if (namelen != 0) { 3423 /* Caller wants source address */ 3424 addrlen = tpr->unitdata_ind.SRC_length; 3425 addr = sogetoff(mp, 3426 tpr->unitdata_ind.SRC_offset, 3427 addrlen, 1); 3428 if (addr == NULL) { 3429 freemsg(mp); 3430 error = EPROTO; 3431 eprintsoline(so, error); 3432 goto out; 3433 } 3434 if (so->so_family == AF_UNIX) { 3435 /* 3436 * Can not use the transport level address. 3437 * If there is a SO_SRCADDR option carrying 3438 * the socket level address it will be 3439 * extracted below. 3440 */ 3441 addr = NULL; 3442 addrlen = 0; 3443 } 3444 } 3445 optlen = tpr->unitdata_ind.OPT_length; 3446 if (optlen != 0) { 3447 t_uscalar_t ncontrollen; 3448 3449 /* 3450 * Extract any source address option. 3451 * Determine how large cmsg buffer is needed. 3452 */ 3453 opt = sogetoff(mp, 3454 tpr->unitdata_ind.OPT_offset, 3455 optlen, __TPI_ALIGN_SIZE); 3456 3457 if (opt == NULL) { 3458 freemsg(mp); 3459 error = EPROTO; 3460 eprintsoline(so, error); 3461 goto out; 3462 } 3463 if (so->so_family == AF_UNIX) 3464 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3465 ncontrollen = so_cmsglen(mp, opt, optlen, 3466 !(flags & MSG_XPG4_2)); 3467 if (controllen != 0) 3468 controllen = ncontrollen; 3469 else if (ncontrollen != 0) 3470 msg->msg_flags |= MSG_CTRUNC; 3471 } else { 3472 controllen = 0; 3473 } 3474 3475 if (namelen != 0) { 3476 /* 3477 * Return address to caller. 3478 * Caller handles truncation if length 3479 * exceeds msg_namelen. 3480 * NOTE: AF_UNIX NUL termination is ensured by 3481 * the sender's copyin_name(). 3482 */ 3483 abuf = kmem_alloc(addrlen, KM_SLEEP); 3484 3485 bcopy(addr, abuf, addrlen); 3486 msg->msg_name = abuf; 3487 msg->msg_namelen = addrlen; 3488 } 3489 3490 if (controllen != 0) { 3491 /* 3492 * Return control msg to caller. 3493 * Caller handles truncation if length 3494 * exceeds msg_controllen. 3495 */ 3496 control = kmem_zalloc(controllen, KM_SLEEP); 3497 3498 error = so_opt2cmsg(mp, opt, optlen, 3499 !(flags & MSG_XPG4_2), 3500 control, controllen); 3501 if (error) { 3502 freemsg(mp); 3503 if (msg->msg_namelen != 0) 3504 kmem_free(msg->msg_name, 3505 msg->msg_namelen); 3506 kmem_free(control, controllen); 3507 eprintsoline(so, error); 3508 goto out; 3509 } 3510 msg->msg_control = control; 3511 msg->msg_controllen = controllen; 3512 } 3513 3514 freemsg(mp); 3515 goto out; 3516 } 3517 case T_OPTDATA_IND: { 3518 struct T_optdata_req *tdr; 3519 void *opt; 3520 t_uscalar_t optlen; 3521 3522 if ((so->so_state & 3523 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3524 (uiop->uio_resid != saved_resid) && 3525 !(flags & MSG_PEEK)) { 3526 sorecv_update_oobstate(so); 3527 } 3528 3529 tdr = (struct T_optdata_req *)mp->b_rptr; 3530 optlen = tdr->OPT_length; 3531 if (optlen != 0) { 3532 t_uscalar_t ncontrollen; 3533 /* 3534 * Determine how large cmsg buffer is needed. 3535 */ 3536 opt = sogetoff(mp, 3537 tpr->optdata_ind.OPT_offset, 3538 optlen, __TPI_ALIGN_SIZE); 3539 3540 if (opt == NULL) { 3541 freemsg(mp); 3542 error = EPROTO; 3543 eprintsoline(so, error); 3544 goto out; 3545 } 3546 3547 ncontrollen = so_cmsglen(mp, opt, optlen, 3548 !(flags & MSG_XPG4_2)); 3549 if (controllen != 0) 3550 controllen = ncontrollen; 3551 else if (ncontrollen != 0) 3552 msg->msg_flags |= MSG_CTRUNC; 3553 } else { 3554 controllen = 0; 3555 } 3556 3557 if (controllen != 0) { 3558 /* 3559 * Return control msg to caller. 3560 * Caller handles truncation if length 3561 * exceeds msg_controllen. 3562 */ 3563 control = kmem_zalloc(controllen, KM_SLEEP); 3564 3565 error = so_opt2cmsg(mp, opt, optlen, 3566 !(flags & MSG_XPG4_2), 3567 control, controllen); 3568 if (error) { 3569 freemsg(mp); 3570 kmem_free(control, controllen); 3571 eprintsoline(so, error); 3572 goto out; 3573 } 3574 msg->msg_control = control; 3575 msg->msg_controllen = controllen; 3576 } 3577 3578 /* 3579 * Set msg_flags to MSG_EOR based on 3580 * DATA_flag and MOREDATA. 3581 */ 3582 mutex_enter(&so->so_lock); 3583 so->so_state &= ~SS_SAVEDEOR; 3584 if (!(tpr->data_ind.MORE_flag & 1)) { 3585 if (!(rval.r_val1 & MOREDATA)) 3586 msg->msg_flags |= MSG_EOR; 3587 else 3588 so->so_state |= SS_SAVEDEOR; 3589 } 3590 freemsg(mp); 3591 /* 3592 * If some data was received (i.e. not EOF) and the 3593 * read/recv* has not been satisfied wait for some more. 3594 * Not possible to wait if control info was received. 3595 */ 3596 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3597 controllen == 0 && 3598 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3599 mutex_exit(&so->so_lock); 3600 pflag = opflag | MSG_NOMARK; 3601 goto retry; 3602 } 3603 goto out_locked; 3604 } 3605 case T_EXDATA_IND: { 3606 dprintso(so, 1, 3607 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3608 "state %s\n", 3609 sti->sti_oobsigcnt, sti->sti_oobcnt, 3610 saved_resid - uiop->uio_resid, 3611 pr_state(so->so_state, so->so_mode))); 3612 /* 3613 * kstrgetmsg handles MSGMARK so there is nothing to 3614 * inspect in the T_EXDATA_IND. 3615 * strsock_proto makes the stream head queue the T_EXDATA_IND 3616 * as a separate message with no M_DATA component. Furthermore, 3617 * the stream head does not consolidate M_DATA messages onto 3618 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3619 * remains a message by itself. This is needed since MSGMARK 3620 * marks both the whole message as well as the last byte 3621 * of the message. 3622 */ 3623 freemsg(mp); 3624 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3625 if (flags & MSG_PEEK) { 3626 /* 3627 * Even though we are peeking we consume the 3628 * T_EXDATA_IND thereby moving the mark information 3629 * to SS_RCVATMARK. Then the oob code below will 3630 * retry the peeking kstrgetmsg. 3631 * Note that the stream head read queue is 3632 * never flushed without holding SOREADLOCKED 3633 * thus the T_EXDATA_IND can not disappear 3634 * underneath us. 3635 */ 3636 dprintso(so, 1, 3637 ("sotpi_recvmsg: consume EXDATA_IND " 3638 "counts %d/%d state %s\n", 3639 sti->sti_oobsigcnt, 3640 sti->sti_oobcnt, 3641 pr_state(so->so_state, so->so_mode))); 3642 3643 pflag = MSG_ANY | MSG_DELAYERROR; 3644 if (so->so_mode & SM_ATOMIC) 3645 pflag |= MSG_DISCARDTAIL; 3646 3647 pri = 0; 3648 mp = NULL; 3649 3650 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3651 &pri, &pflag, (clock_t)-1, &rval); 3652 ASSERT(uiop->uio_resid == saved_resid); 3653 3654 if (error) { 3655 #ifdef SOCK_DEBUG 3656 if (error != EWOULDBLOCK && error != EINTR) { 3657 eprintsoline(so, error); 3658 } 3659 #endif /* SOCK_DEBUG */ 3660 goto out; 3661 } 3662 ASSERT(mp); 3663 tpr = (union T_primitives *)mp->b_rptr; 3664 ASSERT(tpr->type == T_EXDATA_IND); 3665 freemsg(mp); 3666 } /* end "if (flags & MSG_PEEK)" */ 3667 3668 /* 3669 * Decrement the number of queued and pending oob. 3670 * 3671 * SS_RCVATMARK is cleared when we read past a mark. 3672 * SS_HAVEOOBDATA is cleared when we've read past the 3673 * last mark. 3674 * SS_OOBPEND is cleared if we've read past the last 3675 * mark and no (new) SIGURG has been posted. 3676 */ 3677 mutex_enter(&so->so_lock); 3678 ASSERT(so_verify_oobstate(so)); 3679 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3680 ASSERT(sti->sti_oobsigcnt > 0); 3681 sti->sti_oobsigcnt--; 3682 ASSERT(sti->sti_oobcnt > 0); 3683 sti->sti_oobcnt--; 3684 /* 3685 * Since the T_EXDATA_IND has been removed from the stream 3686 * head, but we have not read data past the mark, 3687 * sockfs needs to track that the socket is still at the mark. 3688 * 3689 * Since no data was received call kstrgetmsg again to wait 3690 * for data. 3691 */ 3692 so->so_state |= SS_RCVATMARK; 3693 mutex_exit(&so->so_lock); 3694 dprintso(so, 1, 3695 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3696 sti->sti_oobsigcnt, sti->sti_oobcnt, 3697 pr_state(so->so_state, so->so_mode))); 3698 pflag = opflag; 3699 goto retry; 3700 } 3701 default: 3702 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3703 (void *)so, tpr->type, (void *)mp); 3704 ASSERT(0); 3705 freemsg(mp); 3706 error = EPROTO; 3707 eprintsoline(so, error); 3708 goto out; 3709 } 3710 /* NOTREACHED */ 3711 out: 3712 mutex_enter(&so->so_lock); 3713 out_locked: 3714 if (so->so_direct != NULL) { 3715 mutex_enter(so->so_direct->sod_lockp); 3716 reterr = sod_rcv_done(so, suiop, uiop); 3717 mutex_exit(so->so_direct->sod_lockp); 3718 } 3719 if (reterr != 0 && error == 0) 3720 error = reterr; 3721 so_unlock_read(so); /* Clear SOREADLOCKED */ 3722 mutex_exit(&so->so_lock); 3723 return (error); 3724 } 3725 3726 /* 3727 * Sending data with options on a datagram socket. 3728 * Assumes caller has verified that SS_ISBOUND etc. are set. 3729 */ 3730 static int 3731 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3732 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3733 { 3734 struct T_unitdata_req tudr; 3735 mblk_t *mp; 3736 int error; 3737 void *addr; 3738 socklen_t addrlen; 3739 void *src; 3740 socklen_t srclen; 3741 ssize_t len; 3742 int size; 3743 struct T_opthdr toh; 3744 struct fdbuf *fdbuf; 3745 t_uscalar_t optlen; 3746 void *fds; 3747 int fdlen; 3748 sotpi_info_t *sti = SOTOTPI(so); 3749 3750 ASSERT(name && namelen); 3751 ASSERT(control && controllen); 3752 3753 len = uiop->uio_resid; 3754 if (len > (ssize_t)sti->sti_tidu_size) { 3755 return (EMSGSIZE); 3756 } 3757 3758 /* 3759 * For AF_UNIX the destination address is translated to an internal 3760 * name and the source address is passed as an option. 3761 * Also, file descriptors are passed as file pointers in an 3762 * option. 3763 */ 3764 3765 /* 3766 * Length and family checks. 3767 */ 3768 error = so_addr_verify(so, name, namelen); 3769 if (error) { 3770 eprintsoline(so, error); 3771 return (error); 3772 } 3773 if (so->so_family == AF_UNIX) { 3774 if (sti->sti_faddr_noxlate) { 3775 /* 3776 * Already have a transport internal address. Do not 3777 * pass any (transport internal) source address. 3778 */ 3779 addr = name; 3780 addrlen = namelen; 3781 src = NULL; 3782 srclen = 0; 3783 } else { 3784 /* 3785 * Pass the sockaddr_un source address as an option 3786 * and translate the remote address. 3787 * 3788 * Note that this code does not prevent sti_laddr_sa 3789 * from changing while it is being used. Thus 3790 * if an unbind+bind occurs concurrently with this 3791 * send the peer might see a partially new and a 3792 * partially old "from" address. 3793 */ 3794 src = sti->sti_laddr_sa; 3795 srclen = (t_uscalar_t)sti->sti_laddr_len; 3796 dprintso(so, 1, 3797 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3798 srclen, src)); 3799 error = so_ux_addr_xlate(so, name, namelen, 3800 (flags & MSG_XPG4_2), 3801 &addr, &addrlen); 3802 if (error) { 3803 eprintsoline(so, error); 3804 return (error); 3805 } 3806 } 3807 } else { 3808 addr = name; 3809 addrlen = namelen; 3810 src = NULL; 3811 srclen = 0; 3812 } 3813 optlen = so_optlen(control, controllen, 3814 !(flags & MSG_XPG4_2)); 3815 tudr.PRIM_type = T_UNITDATA_REQ; 3816 tudr.DEST_length = addrlen; 3817 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3818 if (srclen != 0) 3819 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3820 _TPI_ALIGN_TOPT(srclen)); 3821 else 3822 tudr.OPT_length = optlen; 3823 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3824 _TPI_ALIGN_TOPT(addrlen)); 3825 3826 size = tudr.OPT_offset + tudr.OPT_length; 3827 3828 /* 3829 * File descriptors only when SM_FDPASSING set. 3830 */ 3831 error = so_getfdopt(control, controllen, 3832 !(flags & MSG_XPG4_2), &fds, &fdlen); 3833 if (error) 3834 return (error); 3835 if (fdlen != -1) { 3836 if (!(so->so_mode & SM_FDPASSING)) 3837 return (EOPNOTSUPP); 3838 3839 error = fdbuf_create(fds, fdlen, &fdbuf); 3840 if (error) 3841 return (error); 3842 mp = fdbuf_allocmsg(size, fdbuf); 3843 } else { 3844 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3845 if (mp == NULL) { 3846 /* 3847 * Caught a signal waiting for memory. 3848 * Let send* return EINTR. 3849 */ 3850 return (EINTR); 3851 } 3852 } 3853 soappendmsg(mp, &tudr, sizeof (tudr)); 3854 soappendmsg(mp, addr, addrlen); 3855 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3856 3857 if (fdlen != -1) { 3858 ASSERT(fdbuf != NULL); 3859 toh.level = SOL_SOCKET; 3860 toh.name = SO_FILEP; 3861 toh.len = fdbuf->fd_size + 3862 (t_uscalar_t)sizeof (struct T_opthdr); 3863 toh.status = 0; 3864 soappendmsg(mp, &toh, sizeof (toh)); 3865 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3866 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3867 } 3868 if (srclen != 0) { 3869 /* 3870 * There is a AF_UNIX sockaddr_un to include as a source 3871 * address option. 3872 */ 3873 toh.level = SOL_SOCKET; 3874 toh.name = SO_SRCADDR; 3875 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3876 toh.status = 0; 3877 soappendmsg(mp, &toh, sizeof (toh)); 3878 soappendmsg(mp, src, srclen); 3879 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3880 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3881 } 3882 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3883 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3884 /* At most 3 bytes left in the message */ 3885 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3886 ASSERT(MBLKL(mp) <= (ssize_t)size); 3887 3888 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3889 if (audit_active) 3890 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3891 3892 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3893 #ifdef SOCK_DEBUG 3894 if (error) { 3895 eprintsoline(so, error); 3896 } 3897 #endif /* SOCK_DEBUG */ 3898 return (error); 3899 } 3900 3901 /* 3902 * Sending data with options on a connected stream socket. 3903 * Assumes caller has verified that SS_ISCONNECTED is set. 3904 */ 3905 static int 3906 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3907 t_uscalar_t controllen, int flags) 3908 { 3909 struct T_optdata_req tdr; 3910 mblk_t *mp; 3911 int error; 3912 ssize_t iosize; 3913 int size; 3914 struct fdbuf *fdbuf; 3915 t_uscalar_t optlen; 3916 void *fds; 3917 int fdlen; 3918 struct T_opthdr toh; 3919 sotpi_info_t *sti = SOTOTPI(so); 3920 3921 dprintso(so, 1, 3922 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3923 3924 /* 3925 * Has to be bound and connected. However, since no locks are 3926 * held the state could have changed after sotpi_sendmsg checked it 3927 * thus it is not possible to ASSERT on the state. 3928 */ 3929 3930 /* Options on connection-oriented only when SM_OPTDATA set. */ 3931 if (!(so->so_mode & SM_OPTDATA)) 3932 return (EOPNOTSUPP); 3933 3934 do { 3935 /* 3936 * Set the MORE flag if uio_resid does not fit in this 3937 * message or if the caller passed in "more". 3938 * Error for transports with zero tidu_size. 3939 */ 3940 tdr.PRIM_type = T_OPTDATA_REQ; 3941 iosize = sti->sti_tidu_size; 3942 if (iosize <= 0) 3943 return (EMSGSIZE); 3944 if (uiop->uio_resid > iosize) { 3945 tdr.DATA_flag = 1; 3946 } else { 3947 if (more) 3948 tdr.DATA_flag = 1; 3949 else 3950 tdr.DATA_flag = 0; 3951 iosize = uiop->uio_resid; 3952 } 3953 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3954 tdr.DATA_flag, iosize)); 3955 3956 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3957 tdr.OPT_length = optlen; 3958 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3959 3960 size = (int)sizeof (tdr) + optlen; 3961 /* 3962 * File descriptors only when SM_FDPASSING set. 3963 */ 3964 error = so_getfdopt(control, controllen, 3965 !(flags & MSG_XPG4_2), &fds, &fdlen); 3966 if (error) 3967 return (error); 3968 if (fdlen != -1) { 3969 if (!(so->so_mode & SM_FDPASSING)) 3970 return (EOPNOTSUPP); 3971 3972 error = fdbuf_create(fds, fdlen, &fdbuf); 3973 if (error) 3974 return (error); 3975 mp = fdbuf_allocmsg(size, fdbuf); 3976 } else { 3977 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3978 if (mp == NULL) { 3979 /* 3980 * Caught a signal waiting for memory. 3981 * Let send* return EINTR. 3982 */ 3983 return (EINTR); 3984 } 3985 } 3986 soappendmsg(mp, &tdr, sizeof (tdr)); 3987 3988 if (fdlen != -1) { 3989 ASSERT(fdbuf != NULL); 3990 toh.level = SOL_SOCKET; 3991 toh.name = SO_FILEP; 3992 toh.len = fdbuf->fd_size + 3993 (t_uscalar_t)sizeof (struct T_opthdr); 3994 toh.status = 0; 3995 soappendmsg(mp, &toh, sizeof (toh)); 3996 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3997 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3998 } 3999 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 4000 /* At most 3 bytes left in the message */ 4001 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 4002 ASSERT(MBLKL(mp) <= (ssize_t)size); 4003 4004 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4005 4006 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4007 0, MSG_BAND, 0); 4008 if (error) { 4009 eprintsoline(so, error); 4010 return (error); 4011 } 4012 control = NULL; 4013 if (uiop->uio_resid > 0) { 4014 /* 4015 * Recheck for fatal errors. Fail write even though 4016 * some data have been written. This is consistent 4017 * with strwrite semantics and BSD sockets semantics. 4018 */ 4019 if (so->so_state & SS_CANTSENDMORE) { 4020 eprintsoline(so, error); 4021 return (EPIPE); 4022 } 4023 if (so->so_error != 0) { 4024 mutex_enter(&so->so_lock); 4025 error = sogeterr(so, B_TRUE); 4026 mutex_exit(&so->so_lock); 4027 if (error != 0) { 4028 eprintsoline(so, error); 4029 return (error); 4030 } 4031 } 4032 } 4033 } while (uiop->uio_resid > 0); 4034 return (0); 4035 } 4036 4037 /* 4038 * Sending data on a datagram socket. 4039 * Assumes caller has verified that SS_ISBOUND etc. are set. 4040 * 4041 * For AF_UNIX the destination address is translated to an internal 4042 * name and the source address is passed as an option. 4043 */ 4044 int 4045 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4046 struct uio *uiop, int flags) 4047 { 4048 struct T_unitdata_req tudr; 4049 mblk_t *mp; 4050 int error; 4051 void *addr; 4052 socklen_t addrlen; 4053 void *src; 4054 socklen_t srclen; 4055 ssize_t len; 4056 sotpi_info_t *sti = SOTOTPI(so); 4057 4058 ASSERT(name != NULL && namelen != 0); 4059 4060 len = uiop->uio_resid; 4061 if (len > sti->sti_tidu_size) { 4062 error = EMSGSIZE; 4063 goto done; 4064 } 4065 4066 /* Length and family checks */ 4067 error = so_addr_verify(so, name, namelen); 4068 if (error != 0) 4069 goto done; 4070 4071 if (sti->sti_direct) 4072 return (sodgram_direct(so, name, namelen, uiop, flags)); 4073 4074 if (so->so_family == AF_UNIX) { 4075 if (sti->sti_faddr_noxlate) { 4076 /* 4077 * Already have a transport internal address. Do not 4078 * pass any (transport internal) source address. 4079 */ 4080 addr = name; 4081 addrlen = namelen; 4082 src = NULL; 4083 srclen = 0; 4084 } else { 4085 /* 4086 * Pass the sockaddr_un source address as an option 4087 * and translate the remote address. 4088 * 4089 * Note that this code does not prevent sti_laddr_sa 4090 * from changing while it is being used. Thus 4091 * if an unbind+bind occurs concurrently with this 4092 * send the peer might see a partially new and a 4093 * partially old "from" address. 4094 */ 4095 src = sti->sti_laddr_sa; 4096 srclen = (socklen_t)sti->sti_laddr_len; 4097 dprintso(so, 1, 4098 ("sosend_dgram UNIX: srclen %d, src %p\n", 4099 srclen, src)); 4100 error = so_ux_addr_xlate(so, name, namelen, 4101 (flags & MSG_XPG4_2), 4102 &addr, &addrlen); 4103 if (error) { 4104 eprintsoline(so, error); 4105 goto done; 4106 } 4107 } 4108 } else { 4109 addr = name; 4110 addrlen = namelen; 4111 src = NULL; 4112 srclen = 0; 4113 } 4114 tudr.PRIM_type = T_UNITDATA_REQ; 4115 tudr.DEST_length = addrlen; 4116 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4117 if (srclen == 0) { 4118 tudr.OPT_length = 0; 4119 tudr.OPT_offset = 0; 4120 4121 mp = soallocproto2(&tudr, sizeof (tudr), 4122 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4123 if (mp == NULL) { 4124 /* 4125 * Caught a signal waiting for memory. 4126 * Let send* return EINTR. 4127 */ 4128 error = EINTR; 4129 goto done; 4130 } 4131 } else { 4132 /* 4133 * There is a AF_UNIX sockaddr_un to include as a source 4134 * address option. 4135 */ 4136 struct T_opthdr toh; 4137 ssize_t size; 4138 4139 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4140 _TPI_ALIGN_TOPT(srclen)); 4141 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4142 _TPI_ALIGN_TOPT(addrlen)); 4143 4144 toh.level = SOL_SOCKET; 4145 toh.name = SO_SRCADDR; 4146 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4147 toh.status = 0; 4148 4149 size = tudr.OPT_offset + tudr.OPT_length; 4150 mp = soallocproto2(&tudr, sizeof (tudr), 4151 addr, addrlen, size, _ALLOC_INTR, CRED()); 4152 if (mp == NULL) { 4153 /* 4154 * Caught a signal waiting for memory. 4155 * Let send* return EINTR. 4156 */ 4157 error = EINTR; 4158 goto done; 4159 } 4160 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4161 soappendmsg(mp, &toh, sizeof (toh)); 4162 soappendmsg(mp, src, srclen); 4163 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4164 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4165 } 4166 4167 if (audit_active) 4168 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4169 4170 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4171 done: 4172 #ifdef SOCK_DEBUG 4173 if (error) { 4174 eprintsoline(so, error); 4175 } 4176 #endif /* SOCK_DEBUG */ 4177 return (error); 4178 } 4179 4180 /* 4181 * Sending data on a connected stream socket. 4182 * Assumes caller has verified that SS_ISCONNECTED is set. 4183 */ 4184 int 4185 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4186 int sflag) 4187 { 4188 struct T_data_req tdr; 4189 mblk_t *mp; 4190 int error; 4191 ssize_t iosize; 4192 sotpi_info_t *sti = SOTOTPI(so); 4193 4194 dprintso(so, 1, 4195 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4196 (void *)so, uiop->uio_resid, prim, sflag)); 4197 4198 /* 4199 * Has to be bound and connected. However, since no locks are 4200 * held the state could have changed after sotpi_sendmsg checked it 4201 * thus it is not possible to ASSERT on the state. 4202 */ 4203 4204 do { 4205 /* 4206 * Set the MORE flag if uio_resid does not fit in this 4207 * message or if the caller passed in "more". 4208 * Error for transports with zero tidu_size. 4209 */ 4210 tdr.PRIM_type = prim; 4211 iosize = sti->sti_tidu_size; 4212 if (iosize <= 0) 4213 return (EMSGSIZE); 4214 if (uiop->uio_resid > iosize) { 4215 tdr.MORE_flag = 1; 4216 } else { 4217 if (more) 4218 tdr.MORE_flag = 1; 4219 else 4220 tdr.MORE_flag = 0; 4221 iosize = uiop->uio_resid; 4222 } 4223 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4224 prim, tdr.MORE_flag, iosize)); 4225 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4226 if (mp == NULL) { 4227 /* 4228 * Caught a signal waiting for memory. 4229 * Let send* return EINTR. 4230 */ 4231 return (EINTR); 4232 } 4233 4234 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4235 0, sflag | MSG_BAND, 0); 4236 if (error) { 4237 eprintsoline(so, error); 4238 return (error); 4239 } 4240 if (uiop->uio_resid > 0) { 4241 /* 4242 * Recheck for fatal errors. Fail write even though 4243 * some data have been written. This is consistent 4244 * with strwrite semantics and BSD sockets semantics. 4245 */ 4246 if (so->so_state & SS_CANTSENDMORE) { 4247 eprintsoline(so, error); 4248 return (EPIPE); 4249 } 4250 if (so->so_error != 0) { 4251 mutex_enter(&so->so_lock); 4252 error = sogeterr(so, B_TRUE); 4253 mutex_exit(&so->so_lock); 4254 if (error != 0) { 4255 eprintsoline(so, error); 4256 return (error); 4257 } 4258 } 4259 } 4260 } while (uiop->uio_resid > 0); 4261 return (0); 4262 } 4263 4264 /* 4265 * Check the state for errors and call the appropriate send function. 4266 * 4267 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4268 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4269 * after sending the message. 4270 */ 4271 static int 4272 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4273 struct cred *cr) 4274 { 4275 int so_state; 4276 int so_mode; 4277 int error; 4278 struct sockaddr *name; 4279 t_uscalar_t namelen; 4280 int dontroute; 4281 int flags; 4282 sotpi_info_t *sti = SOTOTPI(so); 4283 4284 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4285 (void *)so, (void *)msg, msg->msg_flags, 4286 pr_state(so->so_state, so->so_mode), so->so_error)); 4287 4288 if (so->so_version == SOV_STREAM) { 4289 /* The imaginary "sockmod" has been popped - act as a stream */ 4290 so_update_attrs(so, SOMOD); 4291 return (strwrite(SOTOV(so), uiop, cr)); 4292 } 4293 4294 mutex_enter(&so->so_lock); 4295 so_state = so->so_state; 4296 4297 if (so_state & SS_CANTSENDMORE) { 4298 mutex_exit(&so->so_lock); 4299 return (EPIPE); 4300 } 4301 4302 if (so->so_error != 0) { 4303 error = sogeterr(so, B_TRUE); 4304 if (error != 0) { 4305 mutex_exit(&so->so_lock); 4306 return (error); 4307 } 4308 } 4309 4310 name = (struct sockaddr *)msg->msg_name; 4311 namelen = msg->msg_namelen; 4312 4313 so_mode = so->so_mode; 4314 4315 if (name == NULL) { 4316 if (!(so_state & SS_ISCONNECTED)) { 4317 mutex_exit(&so->so_lock); 4318 if (so_mode & SM_CONNREQUIRED) 4319 return (ENOTCONN); 4320 else 4321 return (EDESTADDRREQ); 4322 } 4323 if (so_mode & SM_CONNREQUIRED) { 4324 name = NULL; 4325 namelen = 0; 4326 } else { 4327 /* 4328 * Note that this code does not prevent sti_faddr_sa 4329 * from changing while it is being used. Thus 4330 * if an "unconnect"+connect occurs concurrently with 4331 * this send the datagram might be delivered to a 4332 * garbaled address. 4333 */ 4334 ASSERT(sti->sti_faddr_sa); 4335 name = sti->sti_faddr_sa; 4336 namelen = (t_uscalar_t)sti->sti_faddr_len; 4337 } 4338 } else { 4339 if (!(so_state & SS_ISCONNECTED) && 4340 (so_mode & SM_CONNREQUIRED)) { 4341 /* Required but not connected */ 4342 mutex_exit(&so->so_lock); 4343 return (ENOTCONN); 4344 } 4345 /* 4346 * Ignore the address on connection-oriented sockets. 4347 * Just like BSD this code does not generate an error for 4348 * TCP (a CONNREQUIRED socket) when sending to an address 4349 * passed in with sendto/sendmsg. Instead the data is 4350 * delivered on the connection as if no address had been 4351 * supplied. 4352 */ 4353 if ((so_state & SS_ISCONNECTED) && 4354 !(so_mode & SM_CONNREQUIRED)) { 4355 mutex_exit(&so->so_lock); 4356 return (EISCONN); 4357 } 4358 if (!(so_state & SS_ISBOUND)) { 4359 so_lock_single(so); /* Set SOLOCKED */ 4360 error = sotpi_bind(so, NULL, 0, 4361 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4362 so_unlock_single(so, SOLOCKED); 4363 if (error) { 4364 mutex_exit(&so->so_lock); 4365 eprintsoline(so, error); 4366 return (error); 4367 } 4368 } 4369 /* 4370 * Handle delayed datagram errors. These are only queued 4371 * when the application sets SO_DGRAM_ERRIND. 4372 * Return the error if we are sending to the address 4373 * that was returned in the last T_UDERROR_IND. 4374 * If sending to some other address discard the delayed 4375 * error indication. 4376 */ 4377 if (sti->sti_delayed_error) { 4378 struct T_uderror_ind *tudi; 4379 void *addr; 4380 t_uscalar_t addrlen; 4381 boolean_t match = B_FALSE; 4382 4383 ASSERT(sti->sti_eaddr_mp); 4384 error = sti->sti_delayed_error; 4385 sti->sti_delayed_error = 0; 4386 tudi = 4387 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4388 addrlen = tudi->DEST_length; 4389 addr = sogetoff(sti->sti_eaddr_mp, 4390 tudi->DEST_offset, addrlen, 1); 4391 ASSERT(addr); /* Checked by strsock_proto */ 4392 switch (so->so_family) { 4393 case AF_INET: { 4394 /* Compare just IP address and port */ 4395 sin_t *sin1 = (sin_t *)name; 4396 sin_t *sin2 = (sin_t *)addr; 4397 4398 if (addrlen == sizeof (sin_t) && 4399 namelen == addrlen && 4400 sin1->sin_port == sin2->sin_port && 4401 sin1->sin_addr.s_addr == 4402 sin2->sin_addr.s_addr) 4403 match = B_TRUE; 4404 break; 4405 } 4406 case AF_INET6: { 4407 /* Compare just IP address and port. Not flow */ 4408 sin6_t *sin1 = (sin6_t *)name; 4409 sin6_t *sin2 = (sin6_t *)addr; 4410 4411 if (addrlen == sizeof (sin6_t) && 4412 namelen == addrlen && 4413 sin1->sin6_port == sin2->sin6_port && 4414 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4415 &sin2->sin6_addr)) 4416 match = B_TRUE; 4417 break; 4418 } 4419 case AF_UNIX: 4420 default: 4421 if (namelen == addrlen && 4422 bcmp(name, addr, namelen) == 0) 4423 match = B_TRUE; 4424 } 4425 if (match) { 4426 freemsg(sti->sti_eaddr_mp); 4427 sti->sti_eaddr_mp = NULL; 4428 mutex_exit(&so->so_lock); 4429 #ifdef DEBUG 4430 dprintso(so, 0, 4431 ("sockfs delayed error %d for %s\n", 4432 error, 4433 pr_addr(so->so_family, name, namelen))); 4434 #endif /* DEBUG */ 4435 return (error); 4436 } 4437 freemsg(sti->sti_eaddr_mp); 4438 sti->sti_eaddr_mp = NULL; 4439 } 4440 } 4441 mutex_exit(&so->so_lock); 4442 4443 flags = msg->msg_flags; 4444 dontroute = 0; 4445 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4446 uint32_t val; 4447 4448 val = 1; 4449 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4450 &val, (t_uscalar_t)sizeof (val), cr); 4451 if (error) 4452 return (error); 4453 dontroute = 1; 4454 } 4455 4456 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4457 error = EOPNOTSUPP; 4458 goto done; 4459 } 4460 if (msg->msg_controllen != 0) { 4461 if (!(so_mode & SM_CONNREQUIRED)) { 4462 so_update_attrs(so, SOMOD); 4463 error = sosend_dgramcmsg(so, name, namelen, uiop, 4464 msg->msg_control, msg->msg_controllen, flags); 4465 } else { 4466 if (flags & MSG_OOB) { 4467 /* Can't generate T_EXDATA_REQ with options */ 4468 error = EOPNOTSUPP; 4469 goto done; 4470 } 4471 so_update_attrs(so, SOMOD); 4472 error = sosend_svccmsg(so, uiop, 4473 !(flags & MSG_EOR), 4474 msg->msg_control, msg->msg_controllen, 4475 flags); 4476 } 4477 goto done; 4478 } 4479 4480 so_update_attrs(so, SOMOD); 4481 if (!(so_mode & SM_CONNREQUIRED)) { 4482 /* 4483 * If there is no SO_DONTROUTE to turn off return immediately 4484 * from send_dgram. This can allow tail-call optimizations. 4485 */ 4486 if (!dontroute) { 4487 return (sosend_dgram(so, name, namelen, uiop, flags)); 4488 } 4489 error = sosend_dgram(so, name, namelen, uiop, flags); 4490 } else { 4491 t_scalar_t prim; 4492 int sflag; 4493 4494 /* Ignore msg_name in the connected state */ 4495 if (flags & MSG_OOB) { 4496 prim = T_EXDATA_REQ; 4497 /* 4498 * Send down T_EXDATA_REQ even if there is flow 4499 * control for data. 4500 */ 4501 sflag = MSG_IGNFLOW; 4502 } else { 4503 if (so_mode & SM_BYTESTREAM) { 4504 /* Byte stream transport - use write */ 4505 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4506 4507 /* Send M_DATA messages */ 4508 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4509 (error = nl7c_data(so, uiop)) >= 0) { 4510 /* NL7C consumed the data */ 4511 return (error); 4512 } 4513 /* 4514 * If there is no SO_DONTROUTE to turn off, 4515 * sti_direct is on, and there is no flow 4516 * control, we can take the fast path. 4517 */ 4518 if (!dontroute && sti->sti_direct != 0 && 4519 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4520 return (sostream_direct(so, uiop, 4521 NULL, cr)); 4522 } 4523 error = strwrite(SOTOV(so), uiop, cr); 4524 goto done; 4525 } 4526 prim = T_DATA_REQ; 4527 sflag = 0; 4528 } 4529 /* 4530 * If there is no SO_DONTROUTE to turn off return immediately 4531 * from sosend_svc. This can allow tail-call optimizations. 4532 */ 4533 if (!dontroute) 4534 return (sosend_svc(so, uiop, prim, 4535 !(flags & MSG_EOR), sflag)); 4536 error = sosend_svc(so, uiop, prim, 4537 !(flags & MSG_EOR), sflag); 4538 } 4539 ASSERT(dontroute); 4540 done: 4541 if (dontroute) { 4542 uint32_t val; 4543 4544 val = 0; 4545 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4546 &val, (t_uscalar_t)sizeof (val), cr); 4547 } 4548 return (error); 4549 } 4550 4551 /* 4552 * kstrwritemp() has very similar semantics as that of strwrite(). 4553 * The main difference is it obtains mblks from the caller and also 4554 * does not do any copy as done in strwrite() from user buffers to 4555 * kernel buffers. 4556 * 4557 * Currently, this routine is used by sendfile to send data allocated 4558 * within the kernel without any copying. This interface does not use the 4559 * synchronous stream interface as synch. stream interface implies 4560 * copying. 4561 */ 4562 int 4563 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4564 { 4565 struct stdata *stp; 4566 struct queue *wqp; 4567 mblk_t *newmp; 4568 char waitflag; 4569 int tempmode; 4570 int error = 0; 4571 int done = 0; 4572 struct sonode *so; 4573 boolean_t direct; 4574 4575 ASSERT(vp->v_stream); 4576 stp = vp->v_stream; 4577 4578 so = VTOSO(vp); 4579 direct = _SOTOTPI(so)->sti_direct; 4580 4581 /* 4582 * This is the sockfs direct fast path. canputnext() need 4583 * not be accurate so we don't grab the sd_lock here. If 4584 * we get flow-controlled, we grab sd_lock just before the 4585 * do..while loop below to emulate what strwrite() does. 4586 */ 4587 wqp = stp->sd_wrq; 4588 if (canputnext(wqp) && direct && 4589 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4590 return (sostream_direct(so, NULL, mp, CRED())); 4591 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4592 /* Fast check of flags before acquiring the lock */ 4593 mutex_enter(&stp->sd_lock); 4594 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4595 mutex_exit(&stp->sd_lock); 4596 if (error != 0) { 4597 if (!(stp->sd_flag & STPLEX) && 4598 (stp->sd_wput_opt & SW_SIGPIPE)) { 4599 error = EPIPE; 4600 } 4601 return (error); 4602 } 4603 } 4604 4605 waitflag = WRITEWAIT; 4606 if (stp->sd_flag & OLDNDELAY) 4607 tempmode = fmode & ~FNDELAY; 4608 else 4609 tempmode = fmode; 4610 4611 mutex_enter(&stp->sd_lock); 4612 do { 4613 if (canputnext(wqp)) { 4614 mutex_exit(&stp->sd_lock); 4615 if (stp->sd_wputdatafunc != NULL) { 4616 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4617 NULL, NULL, NULL); 4618 if (newmp == NULL) { 4619 /* The caller will free mp */ 4620 return (ECOMM); 4621 } 4622 mp = newmp; 4623 } 4624 putnext(wqp, mp); 4625 return (0); 4626 } 4627 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4628 &done); 4629 } while (error == 0 && !done); 4630 4631 mutex_exit(&stp->sd_lock); 4632 /* 4633 * EAGAIN tells the application to try again. ENOMEM 4634 * is returned only if the memory allocation size 4635 * exceeds the physical limits of the system. ENOMEM 4636 * can't be true here. 4637 */ 4638 if (error == ENOMEM) 4639 error = EAGAIN; 4640 return (error); 4641 } 4642 4643 /* ARGSUSED */ 4644 static int 4645 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4646 struct cred *cr, mblk_t **mpp) 4647 { 4648 int error; 4649 4650 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4651 return (EAFNOSUPPORT); 4652 4653 if (so->so_state & SS_CANTSENDMORE) 4654 return (EPIPE); 4655 4656 if (so->so_type != SOCK_STREAM) 4657 return (EOPNOTSUPP); 4658 4659 if ((so->so_state & SS_ISCONNECTED) == 0) 4660 return (ENOTCONN); 4661 4662 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4663 if (error == 0) 4664 *mpp = NULL; 4665 return (error); 4666 } 4667 4668 /* 4669 * Sending data on a datagram socket. 4670 * Assumes caller has verified that SS_ISBOUND etc. are set. 4671 */ 4672 /* ARGSUSED */ 4673 static int 4674 sodgram_direct(struct sonode *so, struct sockaddr *name, 4675 socklen_t namelen, struct uio *uiop, int flags) 4676 { 4677 struct T_unitdata_req tudr; 4678 mblk_t *mp = NULL; 4679 int error = 0; 4680 void *addr; 4681 socklen_t addrlen; 4682 ssize_t len; 4683 struct stdata *stp = SOTOV(so)->v_stream; 4684 int so_state; 4685 queue_t *udp_wq; 4686 boolean_t connected; 4687 mblk_t *mpdata = NULL; 4688 sotpi_info_t *sti = SOTOTPI(so); 4689 4690 ASSERT(name != NULL && namelen != 0); 4691 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4692 ASSERT(!(so->so_mode & SM_EXDATA)); 4693 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4694 ASSERT(SOTOV(so)->v_type == VSOCK); 4695 4696 /* Caller checked for proper length */ 4697 len = uiop->uio_resid; 4698 ASSERT(len <= sti->sti_tidu_size); 4699 4700 /* Length and family checks have been done by caller */ 4701 ASSERT(name->sa_family == so->so_family); 4702 ASSERT(so->so_family == AF_INET || 4703 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4704 ASSERT(so->so_family == AF_INET6 || 4705 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4706 4707 addr = name; 4708 addrlen = namelen; 4709 4710 if (stp->sd_sidp != NULL && 4711 (error = straccess(stp, JCWRITE)) != 0) 4712 goto done; 4713 4714 so_state = so->so_state; 4715 4716 connected = so_state & SS_ISCONNECTED; 4717 if (!connected) { 4718 tudr.PRIM_type = T_UNITDATA_REQ; 4719 tudr.DEST_length = addrlen; 4720 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4721 tudr.OPT_length = 0; 4722 tudr.OPT_offset = 0; 4723 4724 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4725 _ALLOC_INTR, CRED()); 4726 if (mp == NULL) { 4727 /* 4728 * Caught a signal waiting for memory. 4729 * Let send* return EINTR. 4730 */ 4731 error = EINTR; 4732 goto done; 4733 } 4734 } 4735 4736 /* 4737 * For UDP we don't break up the copyin into smaller pieces 4738 * as in the TCP case. That means if ENOMEM is returned by 4739 * mcopyinuio() then the uio vector has not been modified at 4740 * all and we fallback to either strwrite() or kstrputmsg() 4741 * below. Note also that we never generate priority messages 4742 * from here. 4743 */ 4744 udp_wq = stp->sd_wrq->q_next; 4745 if (canput(udp_wq) && 4746 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4747 ASSERT(DB_TYPE(mpdata) == M_DATA); 4748 ASSERT(uiop->uio_resid == 0); 4749 if (!connected) 4750 linkb(mp, mpdata); 4751 else 4752 mp = mpdata; 4753 if (audit_active) 4754 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4755 4756 udp_wput(udp_wq, mp); 4757 return (0); 4758 } 4759 4760 ASSERT(mpdata == NULL); 4761 if (error != 0 && error != ENOMEM) { 4762 freemsg(mp); 4763 return (error); 4764 } 4765 4766 /* 4767 * For connected, let strwrite() handle the blocking case. 4768 * Otherwise we fall thru and use kstrputmsg(). 4769 */ 4770 if (connected) 4771 return (strwrite(SOTOV(so), uiop, CRED())); 4772 4773 if (audit_active) 4774 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4775 4776 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4777 done: 4778 #ifdef SOCK_DEBUG 4779 if (error != 0) { 4780 eprintsoline(so, error); 4781 } 4782 #endif /* SOCK_DEBUG */ 4783 return (error); 4784 } 4785 4786 int 4787 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4788 { 4789 struct stdata *stp = SOTOV(so)->v_stream; 4790 ssize_t iosize, rmax, maxblk; 4791 queue_t *tcp_wq = stp->sd_wrq->q_next; 4792 mblk_t *newmp; 4793 int error = 0, wflag = 0; 4794 4795 ASSERT(so->so_mode & SM_BYTESTREAM); 4796 ASSERT(SOTOV(so)->v_type == VSOCK); 4797 4798 if (stp->sd_sidp != NULL && 4799 (error = straccess(stp, JCWRITE)) != 0) 4800 return (error); 4801 4802 if (uiop == NULL) { 4803 /* 4804 * kstrwritemp() should have checked sd_flag and 4805 * flow-control before coming here. If we end up 4806 * here it means that we can simply pass down the 4807 * data to tcp. 4808 */ 4809 ASSERT(mp != NULL); 4810 if (stp->sd_wputdatafunc != NULL) { 4811 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4812 NULL, NULL, NULL); 4813 if (newmp == NULL) { 4814 /* The caller will free mp */ 4815 return (ECOMM); 4816 } 4817 mp = newmp; 4818 } 4819 tcp_wput(tcp_wq, mp); 4820 return (0); 4821 } 4822 4823 /* Fallback to strwrite() to do proper error handling */ 4824 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4825 return (strwrite(SOTOV(so), uiop, cr)); 4826 4827 rmax = stp->sd_qn_maxpsz; 4828 ASSERT(rmax >= 0 || rmax == INFPSZ); 4829 if (rmax == 0 || uiop->uio_resid <= 0) 4830 return (0); 4831 4832 if (rmax == INFPSZ) 4833 rmax = uiop->uio_resid; 4834 4835 maxblk = stp->sd_maxblk; 4836 4837 for (;;) { 4838 iosize = MIN(uiop->uio_resid, rmax); 4839 4840 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4841 if (mp == NULL) { 4842 /* 4843 * Fallback to strwrite() for ENOMEM; if this 4844 * is our first time in this routine and the uio 4845 * vector has not been modified, we will end up 4846 * calling strwrite() without any flag set. 4847 */ 4848 if (error == ENOMEM) 4849 goto slow_send; 4850 else 4851 return (error); 4852 } 4853 ASSERT(uiop->uio_resid >= 0); 4854 /* 4855 * If mp is non-NULL and ENOMEM is set, it means that 4856 * mcopyinuio() was able to break down some of the user 4857 * data into one or more mblks. Send the partial data 4858 * to tcp and let the rest be handled in strwrite(). 4859 */ 4860 ASSERT(error == 0 || error == ENOMEM); 4861 if (stp->sd_wputdatafunc != NULL) { 4862 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4863 NULL, NULL, NULL); 4864 if (newmp == NULL) { 4865 /* The caller will free mp */ 4866 return (ECOMM); 4867 } 4868 mp = newmp; 4869 } 4870 tcp_wput(tcp_wq, mp); 4871 4872 wflag |= NOINTR; 4873 4874 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4875 ASSERT(error == 0); 4876 break; 4877 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4878 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4879 slow_send: 4880 /* 4881 * We were able to send down partial data using 4882 * the direct call interface, but are now relying 4883 * on strwrite() to handle the non-fastpath cases. 4884 * If the socket is blocking we will sleep in 4885 * strwaitq() until write is permitted, otherwise, 4886 * we will need to return the amount of bytes 4887 * written so far back to the app. This is the 4888 * reason why we pass NOINTR flag to strwrite() 4889 * for non-blocking socket, because we don't want 4890 * to return EAGAIN when portion of the user data 4891 * has actually been sent down. 4892 */ 4893 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4894 } 4895 } 4896 return (0); 4897 } 4898 4899 /* 4900 * Update sti_faddr by asking the transport (unless AF_UNIX). 4901 */ 4902 /* ARGSUSED */ 4903 int 4904 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4905 boolean_t accept, struct cred *cr) 4906 { 4907 struct strbuf strbuf; 4908 int error = 0, res; 4909 void *addr; 4910 t_uscalar_t addrlen; 4911 k_sigset_t smask; 4912 sotpi_info_t *sti = SOTOTPI(so); 4913 4914 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4915 (void *)so, pr_state(so->so_state, so->so_mode))); 4916 4917 ASSERT(*namelen > 0); 4918 mutex_enter(&so->so_lock); 4919 so_lock_single(so); /* Set SOLOCKED */ 4920 4921 if (accept) { 4922 bcopy(sti->sti_faddr_sa, name, 4923 MIN(*namelen, sti->sti_faddr_len)); 4924 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4925 goto done; 4926 } 4927 4928 if (!(so->so_state & SS_ISCONNECTED)) { 4929 error = ENOTCONN; 4930 goto done; 4931 } 4932 /* Added this check for X/Open */ 4933 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4934 error = EINVAL; 4935 if (xnet_check_print) { 4936 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4937 } 4938 goto done; 4939 } 4940 4941 if (sti->sti_faddr_valid) { 4942 bcopy(sti->sti_faddr_sa, name, 4943 MIN(*namelen, sti->sti_faddr_len)); 4944 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4945 goto done; 4946 } 4947 4948 #ifdef DEBUG 4949 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4950 pr_addr(so->so_family, sti->sti_faddr_sa, 4951 (t_uscalar_t)sti->sti_faddr_len))); 4952 #endif /* DEBUG */ 4953 4954 if (so->so_family == AF_UNIX) { 4955 /* Transport has different name space - return local info */ 4956 if (sti->sti_faddr_noxlate) 4957 *namelen = 0; 4958 error = 0; 4959 goto done; 4960 } 4961 4962 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4963 4964 ASSERT(sti->sti_faddr_sa); 4965 /* Allocate local buffer to use with ioctl */ 4966 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4967 mutex_exit(&so->so_lock); 4968 addr = kmem_alloc(addrlen, KM_SLEEP); 4969 4970 /* 4971 * Issue TI_GETPEERNAME with signals masked. 4972 * Put the result in sti_faddr_sa so that getpeername works after 4973 * a shutdown(output). 4974 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4975 * back to the socket. 4976 */ 4977 strbuf.buf = addr; 4978 strbuf.maxlen = addrlen; 4979 strbuf.len = 0; 4980 4981 sigintr(&smask, 0); 4982 res = 0; 4983 ASSERT(cr); 4984 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4985 0, K_TO_K, cr, &res); 4986 sigunintr(&smask); 4987 4988 mutex_enter(&so->so_lock); 4989 /* 4990 * If there is an error record the error in so_error put don't fail 4991 * the getpeername. Instead fallback on the recorded 4992 * sti->sti_faddr_sa. 4993 */ 4994 if (error) { 4995 /* 4996 * Various stream head errors can be returned to the ioctl. 4997 * However, it is impossible to determine which ones of 4998 * these are really socket level errors that were incorrectly 4999 * consumed by the ioctl. Thus this code silently ignores the 5000 * error - to code explicitly does not reinstate the error 5001 * using soseterror(). 5002 * Experiments have shows that at least this set of 5003 * errors are reported and should not be reinstated on the 5004 * socket: 5005 * EINVAL E.g. if an I_LINK was in effect when 5006 * getpeername was called. 5007 * EPIPE The ioctl error semantics prefer the write 5008 * side error over the read side error. 5009 * ENOTCONN The transport just got disconnected but 5010 * sockfs had not yet seen the T_DISCON_IND 5011 * when issuing the ioctl. 5012 */ 5013 error = 0; 5014 } else if (res == 0 && strbuf.len > 0 && 5015 (so->so_state & SS_ISCONNECTED)) { 5016 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 5017 sti->sti_faddr_len = (socklen_t)strbuf.len; 5018 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 5019 sti->sti_faddr_valid = 1; 5020 5021 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 5022 *namelen = sti->sti_faddr_len; 5023 } 5024 kmem_free(addr, addrlen); 5025 #ifdef DEBUG 5026 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 5027 pr_addr(so->so_family, sti->sti_faddr_sa, 5028 (t_uscalar_t)sti->sti_faddr_len))); 5029 #endif /* DEBUG */ 5030 done: 5031 so_unlock_single(so, SOLOCKED); 5032 mutex_exit(&so->so_lock); 5033 return (error); 5034 } 5035 5036 /* 5037 * Update sti_laddr by asking the transport (unless AF_UNIX). 5038 */ 5039 int 5040 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5041 struct cred *cr) 5042 { 5043 struct strbuf strbuf; 5044 int error = 0, res; 5045 void *addr; 5046 t_uscalar_t addrlen; 5047 k_sigset_t smask; 5048 sotpi_info_t *sti = SOTOTPI(so); 5049 5050 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5051 (void *)so, pr_state(so->so_state, so->so_mode))); 5052 5053 ASSERT(*namelen > 0); 5054 mutex_enter(&so->so_lock); 5055 so_lock_single(so); /* Set SOLOCKED */ 5056 5057 #ifdef DEBUG 5058 5059 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5060 pr_addr(so->so_family, sti->sti_laddr_sa, 5061 (t_uscalar_t)sti->sti_laddr_len))); 5062 #endif /* DEBUG */ 5063 if (sti->sti_laddr_valid) { 5064 bcopy(sti->sti_laddr_sa, name, 5065 MIN(*namelen, sti->sti_laddr_len)); 5066 *namelen = sti->sti_laddr_len; 5067 goto done; 5068 } 5069 5070 if (so->so_family == AF_UNIX) { 5071 /* Transport has different name space - return local info */ 5072 error = 0; 5073 *namelen = 0; 5074 goto done; 5075 } 5076 if (!(so->so_state & SS_ISBOUND)) { 5077 /* If not bound, then nothing to return. */ 5078 error = 0; 5079 goto done; 5080 } 5081 5082 /* Allocate local buffer to use with ioctl */ 5083 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5084 mutex_exit(&so->so_lock); 5085 addr = kmem_alloc(addrlen, KM_SLEEP); 5086 5087 /* 5088 * Issue TI_GETMYNAME with signals masked. 5089 * Put the result in sti_laddr_sa so that getsockname works after 5090 * a shutdown(output). 5091 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5092 * back to the socket. 5093 */ 5094 strbuf.buf = addr; 5095 strbuf.maxlen = addrlen; 5096 strbuf.len = 0; 5097 5098 sigintr(&smask, 0); 5099 res = 0; 5100 ASSERT(cr); 5101 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5102 0, K_TO_K, cr, &res); 5103 sigunintr(&smask); 5104 5105 mutex_enter(&so->so_lock); 5106 /* 5107 * If there is an error record the error in so_error put don't fail 5108 * the getsockname. Instead fallback on the recorded 5109 * sti->sti_laddr_sa. 5110 */ 5111 if (error) { 5112 /* 5113 * Various stream head errors can be returned to the ioctl. 5114 * However, it is impossible to determine which ones of 5115 * these are really socket level errors that were incorrectly 5116 * consumed by the ioctl. Thus this code silently ignores the 5117 * error - to code explicitly does not reinstate the error 5118 * using soseterror(). 5119 * Experiments have shows that at least this set of 5120 * errors are reported and should not be reinstated on the 5121 * socket: 5122 * EINVAL E.g. if an I_LINK was in effect when 5123 * getsockname was called. 5124 * EPIPE The ioctl error semantics prefer the write 5125 * side error over the read side error. 5126 */ 5127 error = 0; 5128 } else if (res == 0 && strbuf.len > 0 && 5129 (so->so_state & SS_ISBOUND)) { 5130 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5131 sti->sti_laddr_len = (socklen_t)strbuf.len; 5132 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5133 sti->sti_laddr_valid = 1; 5134 5135 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5136 *namelen = sti->sti_laddr_len; 5137 } 5138 kmem_free(addr, addrlen); 5139 #ifdef DEBUG 5140 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5141 pr_addr(so->so_family, sti->sti_laddr_sa, 5142 (t_uscalar_t)sti->sti_laddr_len))); 5143 #endif /* DEBUG */ 5144 done: 5145 so_unlock_single(so, SOLOCKED); 5146 mutex_exit(&so->so_lock); 5147 return (error); 5148 } 5149 5150 /* 5151 * Get socket options. For SOL_SOCKET options some options are handled 5152 * by the sockfs while others use the value recorded in the sonode as a 5153 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5154 * 5155 * On the return most *optlenp bytes are copied to optval. 5156 */ 5157 /* ARGSUSED */ 5158 int 5159 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5160 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5161 { 5162 struct T_optmgmt_req optmgmt_req; 5163 struct T_optmgmt_ack *optmgmt_ack; 5164 struct opthdr oh; 5165 struct opthdr *opt_res; 5166 mblk_t *mp = NULL; 5167 int error = 0; 5168 void *option = NULL; /* Set if fallback value */ 5169 t_uscalar_t maxlen = *optlenp; 5170 t_uscalar_t len; 5171 uint32_t value; 5172 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5173 struct timeval32 tmo_val32; 5174 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5175 5176 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5177 (void *)so, level, option_name, optval, (void *)optlenp, 5178 pr_state(so->so_state, so->so_mode))); 5179 5180 mutex_enter(&so->so_lock); 5181 so_lock_single(so); /* Set SOLOCKED */ 5182 5183 /* 5184 * Check for SOL_SOCKET options. 5185 * Certain SOL_SOCKET options are returned directly whereas 5186 * others only provide a default (fallback) value should 5187 * the T_SVR4_OPTMGMT_REQ fail. 5188 */ 5189 if (level == SOL_SOCKET) { 5190 /* Check parameters */ 5191 switch (option_name) { 5192 case SO_TYPE: 5193 case SO_ERROR: 5194 case SO_DEBUG: 5195 case SO_ACCEPTCONN: 5196 case SO_REUSEADDR: 5197 case SO_KEEPALIVE: 5198 case SO_DONTROUTE: 5199 case SO_BROADCAST: 5200 case SO_USELOOPBACK: 5201 case SO_OOBINLINE: 5202 case SO_SNDBUF: 5203 case SO_RCVBUF: 5204 #ifdef notyet 5205 case SO_SNDLOWAT: 5206 case SO_RCVLOWAT: 5207 #endif /* notyet */ 5208 case SO_DOMAIN: 5209 case SO_DGRAM_ERRIND: 5210 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5211 error = EINVAL; 5212 eprintsoline(so, error); 5213 goto done2; 5214 } 5215 break; 5216 case SO_RCVTIMEO: 5217 case SO_SNDTIMEO: 5218 if (get_udatamodel() == DATAMODEL_NONE || 5219 get_udatamodel() == DATAMODEL_NATIVE) { 5220 if (maxlen < sizeof (struct timeval)) { 5221 error = EINVAL; 5222 eprintsoline(so, error); 5223 goto done2; 5224 } 5225 } else { 5226 if (maxlen < sizeof (struct timeval32)) { 5227 error = EINVAL; 5228 eprintsoline(so, error); 5229 goto done2; 5230 } 5231 5232 } 5233 break; 5234 case SO_LINGER: 5235 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5236 error = EINVAL; 5237 eprintsoline(so, error); 5238 goto done2; 5239 } 5240 break; 5241 case SO_SND_BUFINFO: 5242 if (maxlen < (t_uscalar_t) 5243 sizeof (struct so_snd_bufinfo)) { 5244 error = EINVAL; 5245 eprintsoline(so, error); 5246 goto done2; 5247 } 5248 break; 5249 } 5250 5251 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5252 5253 switch (option_name) { 5254 case SO_TYPE: 5255 value = so->so_type; 5256 option = &value; 5257 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5258 5259 case SO_ERROR: 5260 value = sogeterr(so, B_TRUE); 5261 option = &value; 5262 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5263 5264 case SO_ACCEPTCONN: 5265 if (so->so_state & SS_ACCEPTCONN) 5266 value = SO_ACCEPTCONN; 5267 else 5268 value = 0; 5269 #ifdef DEBUG 5270 if (value) { 5271 dprintso(so, 1, 5272 ("sotpi_getsockopt: 0x%x is set\n", 5273 option_name)); 5274 } else { 5275 dprintso(so, 1, 5276 ("sotpi_getsockopt: 0x%x not set\n", 5277 option_name)); 5278 } 5279 #endif /* DEBUG */ 5280 option = &value; 5281 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5282 5283 case SO_DEBUG: 5284 case SO_REUSEADDR: 5285 case SO_KEEPALIVE: 5286 case SO_DONTROUTE: 5287 case SO_BROADCAST: 5288 case SO_USELOOPBACK: 5289 case SO_OOBINLINE: 5290 case SO_DGRAM_ERRIND: 5291 value = (so->so_options & option_name); 5292 #ifdef DEBUG 5293 if (value) { 5294 dprintso(so, 1, 5295 ("sotpi_getsockopt: 0x%x is set\n", 5296 option_name)); 5297 } else { 5298 dprintso(so, 1, 5299 ("sotpi_getsockopt: 0x%x not set\n", 5300 option_name)); 5301 } 5302 #endif /* DEBUG */ 5303 option = &value; 5304 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5305 5306 /* 5307 * The following options are only returned by sockfs when the 5308 * T_SVR4_OPTMGMT_REQ fails. 5309 */ 5310 case SO_LINGER: 5311 option = &so->so_linger; 5312 len = (t_uscalar_t)sizeof (struct linger); 5313 break; 5314 case SO_SNDBUF: { 5315 ssize_t lvalue; 5316 5317 /* 5318 * If the option has not been set then get a default 5319 * value from the read queue. This value is 5320 * returned if the transport fails 5321 * the T_SVR4_OPTMGMT_REQ. 5322 */ 5323 lvalue = so->so_sndbuf; 5324 if (lvalue == 0) { 5325 mutex_exit(&so->so_lock); 5326 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5327 QHIWAT, 0, &lvalue); 5328 mutex_enter(&so->so_lock); 5329 dprintso(so, 1, 5330 ("got SO_SNDBUF %ld from q\n", lvalue)); 5331 } 5332 value = (int)lvalue; 5333 option = &value; 5334 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5335 break; 5336 } 5337 case SO_RCVBUF: { 5338 ssize_t lvalue; 5339 5340 /* 5341 * If the option has not been set then get a default 5342 * value from the read queue. This value is 5343 * returned if the transport fails 5344 * the T_SVR4_OPTMGMT_REQ. 5345 * 5346 * XXX If SO_RCVBUF has been set and this is an 5347 * XPG 4.2 application then do not ask the transport 5348 * since the transport might adjust the value and not 5349 * return exactly what was set by the application. 5350 * For non-XPG 4.2 application we return the value 5351 * that the transport is actually using. 5352 */ 5353 lvalue = so->so_rcvbuf; 5354 if (lvalue == 0) { 5355 mutex_exit(&so->so_lock); 5356 (void) strqget(RD(strvp2wq(SOTOV(so))), 5357 QHIWAT, 0, &lvalue); 5358 mutex_enter(&so->so_lock); 5359 dprintso(so, 1, 5360 ("got SO_RCVBUF %ld from q\n", lvalue)); 5361 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5362 value = (int)lvalue; 5363 option = &value; 5364 goto copyout; /* skip asking transport */ 5365 } 5366 value = (int)lvalue; 5367 option = &value; 5368 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5369 break; 5370 } 5371 case SO_DOMAIN: 5372 value = so->so_family; 5373 option = &value; 5374 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5375 5376 #ifdef notyet 5377 /* 5378 * We do not implement the semantics of these options 5379 * thus we shouldn't implement the options either. 5380 */ 5381 case SO_SNDLOWAT: 5382 value = so->so_sndlowat; 5383 option = &value; 5384 break; 5385 case SO_RCVLOWAT: 5386 value = so->so_rcvlowat; 5387 option = &value; 5388 break; 5389 #endif /* notyet */ 5390 case SO_SNDTIMEO: 5391 case SO_RCVTIMEO: { 5392 clock_t val; 5393 5394 if (option_name == SO_RCVTIMEO) 5395 val = drv_hztousec(so->so_rcvtimeo); 5396 else 5397 val = drv_hztousec(so->so_sndtimeo); 5398 tmo_val.tv_sec = val / (1000 * 1000); 5399 tmo_val.tv_usec = val % (1000 * 1000); 5400 if (get_udatamodel() == DATAMODEL_NONE || 5401 get_udatamodel() == DATAMODEL_NATIVE) { 5402 option = &tmo_val; 5403 len = sizeof (struct timeval); 5404 } else { 5405 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5406 option = &tmo_val32; 5407 len = sizeof (struct timeval32); 5408 } 5409 break; 5410 } 5411 case SO_SND_BUFINFO: { 5412 snd_bufinfo.sbi_wroff = 5413 (so->so_proto_props).sopp_wroff; 5414 snd_bufinfo.sbi_maxblk = 5415 (so->so_proto_props).sopp_maxblk; 5416 snd_bufinfo.sbi_maxpsz = 5417 (so->so_proto_props).sopp_maxpsz; 5418 snd_bufinfo.sbi_tail = 5419 (so->so_proto_props).sopp_tail; 5420 option = &snd_bufinfo; 5421 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5422 break; 5423 } 5424 } 5425 } 5426 5427 mutex_exit(&so->so_lock); 5428 5429 /* Send request */ 5430 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5431 optmgmt_req.MGMT_flags = T_CHECK; 5432 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5433 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5434 5435 oh.level = level; 5436 oh.name = option_name; 5437 oh.len = maxlen; 5438 5439 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5440 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5441 /* Let option management work in the presence of data flow control */ 5442 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5443 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5444 mp = NULL; 5445 mutex_enter(&so->so_lock); 5446 if (error) { 5447 eprintsoline(so, error); 5448 goto done2; 5449 } 5450 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5451 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5452 if (error) { 5453 if (option != NULL) { 5454 /* We have a fallback value */ 5455 error = 0; 5456 goto copyout; 5457 } 5458 eprintsoline(so, error); 5459 goto done2; 5460 } 5461 ASSERT(mp); 5462 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5463 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5464 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5465 if (opt_res == NULL) { 5466 if (option != NULL) { 5467 /* We have a fallback value */ 5468 error = 0; 5469 goto copyout; 5470 } 5471 error = EPROTO; 5472 eprintsoline(so, error); 5473 goto done; 5474 } 5475 option = &opt_res[1]; 5476 5477 /* check to ensure that the option is within bounds */ 5478 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5479 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5480 if (option != NULL) { 5481 /* We have a fallback value */ 5482 error = 0; 5483 goto copyout; 5484 } 5485 error = EPROTO; 5486 eprintsoline(so, error); 5487 goto done; 5488 } 5489 5490 len = opt_res->len; 5491 5492 copyout: { 5493 t_uscalar_t size = MIN(len, maxlen); 5494 bcopy(option, optval, size); 5495 bcopy(&size, optlenp, sizeof (size)); 5496 } 5497 done: 5498 freemsg(mp); 5499 done2: 5500 so_unlock_single(so, SOLOCKED); 5501 mutex_exit(&so->so_lock); 5502 5503 return (error); 5504 } 5505 5506 /* 5507 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5508 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5509 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5510 * setsockopt has to work even if the transport does not support the option. 5511 */ 5512 /* ARGSUSED */ 5513 int 5514 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5515 const void *optval, t_uscalar_t optlen, struct cred *cr) 5516 { 5517 struct T_optmgmt_req optmgmt_req; 5518 struct opthdr oh; 5519 mblk_t *mp; 5520 int error = 0; 5521 boolean_t handled = B_FALSE; 5522 5523 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5524 (void *)so, level, option_name, optval, optlen, 5525 pr_state(so->so_state, so->so_mode))); 5526 5527 /* X/Open requires this check */ 5528 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5529 if (xnet_check_print) 5530 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5531 return (EINVAL); 5532 } 5533 5534 mutex_enter(&so->so_lock); 5535 so_lock_single(so); /* Set SOLOCKED */ 5536 mutex_exit(&so->so_lock); 5537 5538 /* 5539 * For SOCKET or TCP level options, try to set it here itself 5540 * provided socket has not been popped and we know the tcp 5541 * structure (stored in so_priv). 5542 */ 5543 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5544 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5545 (so->so_version == SOV_SOCKSTREAM) && 5546 (so->so_proto_handle != NULL)) { 5547 tcp_t *tcp = (tcp_t *)so->so_proto_handle; 5548 boolean_t onoff; 5549 5550 #define intvalue (*(int32_t *)optval) 5551 5552 switch (level) { 5553 case SOL_SOCKET: 5554 switch (option_name) { /* Check length param */ 5555 case SO_DEBUG: 5556 case SO_REUSEADDR: 5557 case SO_DONTROUTE: 5558 case SO_BROADCAST: 5559 case SO_USELOOPBACK: 5560 case SO_OOBINLINE: 5561 case SO_DGRAM_ERRIND: 5562 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5563 error = EINVAL; 5564 eprintsoline(so, error); 5565 mutex_enter(&so->so_lock); 5566 goto done2; 5567 } 5568 ASSERT(optval); 5569 onoff = intvalue != 0; 5570 handled = B_TRUE; 5571 break; 5572 case SO_SNDTIMEO: 5573 case SO_RCVTIMEO: 5574 if (get_udatamodel() == DATAMODEL_NONE || 5575 get_udatamodel() == DATAMODEL_NATIVE) { 5576 if (optlen != 5577 sizeof (struct timeval)) { 5578 error = EINVAL; 5579 eprintsoline(so, error); 5580 mutex_enter(&so->so_lock); 5581 goto done2; 5582 } 5583 } else { 5584 if (optlen != 5585 sizeof (struct timeval32)) { 5586 error = EINVAL; 5587 eprintsoline(so, error); 5588 mutex_enter(&so->so_lock); 5589 goto done2; 5590 } 5591 } 5592 ASSERT(optval); 5593 handled = B_TRUE; 5594 break; 5595 case SO_LINGER: 5596 if (optlen != 5597 (t_uscalar_t)sizeof (struct linger)) { 5598 error = EINVAL; 5599 eprintsoline(so, error); 5600 mutex_enter(&so->so_lock); 5601 goto done2; 5602 } 5603 ASSERT(optval); 5604 handled = B_TRUE; 5605 break; 5606 } 5607 5608 switch (option_name) { /* Do actions */ 5609 case SO_LINGER: { 5610 struct linger *lgr = (struct linger *)optval; 5611 5612 if (lgr->l_onoff) { 5613 tcp->tcp_linger = 1; 5614 tcp->tcp_lingertime = lgr->l_linger; 5615 so->so_linger.l_onoff = SO_LINGER; 5616 so->so_options |= SO_LINGER; 5617 } else { 5618 tcp->tcp_linger = 0; 5619 tcp->tcp_lingertime = 0; 5620 so->so_linger.l_onoff = 0; 5621 so->so_options &= ~SO_LINGER; 5622 } 5623 so->so_linger.l_linger = lgr->l_linger; 5624 handled = B_TRUE; 5625 break; 5626 } 5627 case SO_SNDTIMEO: 5628 case SO_RCVTIMEO: { 5629 struct timeval tl; 5630 clock_t val; 5631 5632 if (get_udatamodel() == DATAMODEL_NONE || 5633 get_udatamodel() == DATAMODEL_NATIVE) 5634 bcopy(&tl, (struct timeval *)optval, 5635 sizeof (struct timeval)); 5636 else 5637 TIMEVAL32_TO_TIMEVAL(&tl, 5638 (struct timeval32 *)optval); 5639 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5640 if (option_name == SO_RCVTIMEO) 5641 so->so_rcvtimeo = drv_usectohz(val); 5642 else 5643 so->so_sndtimeo = drv_usectohz(val); 5644 break; 5645 } 5646 5647 case SO_DEBUG: 5648 tcp->tcp_debug = onoff; 5649 #ifdef SOCK_TEST 5650 if (intvalue & 2) 5651 sock_test_timelimit = 10 * hz; 5652 else 5653 sock_test_timelimit = 0; 5654 5655 if (intvalue & 4) 5656 do_useracc = 0; 5657 else 5658 do_useracc = 1; 5659 #endif /* SOCK_TEST */ 5660 break; 5661 case SO_DONTROUTE: 5662 /* 5663 * SO_DONTROUTE, SO_USELOOPBACK and 5664 * SO_BROADCAST are only of interest to IP. 5665 * We track them here only so 5666 * that we can report their current value. 5667 */ 5668 tcp->tcp_dontroute = onoff; 5669 if (onoff) 5670 so->so_options |= option_name; 5671 else 5672 so->so_options &= ~option_name; 5673 break; 5674 case SO_USELOOPBACK: 5675 tcp->tcp_useloopback = onoff; 5676 if (onoff) 5677 so->so_options |= option_name; 5678 else 5679 so->so_options &= ~option_name; 5680 break; 5681 case SO_BROADCAST: 5682 tcp->tcp_broadcast = onoff; 5683 if (onoff) 5684 so->so_options |= option_name; 5685 else 5686 so->so_options &= ~option_name; 5687 break; 5688 case SO_REUSEADDR: 5689 tcp->tcp_reuseaddr = onoff; 5690 if (onoff) 5691 so->so_options |= option_name; 5692 else 5693 so->so_options &= ~option_name; 5694 break; 5695 case SO_OOBINLINE: 5696 tcp->tcp_oobinline = onoff; 5697 if (onoff) 5698 so->so_options |= option_name; 5699 else 5700 so->so_options &= ~option_name; 5701 break; 5702 case SO_DGRAM_ERRIND: 5703 tcp->tcp_dgram_errind = onoff; 5704 if (onoff) 5705 so->so_options |= option_name; 5706 else 5707 so->so_options &= ~option_name; 5708 break; 5709 } 5710 break; 5711 case IPPROTO_TCP: 5712 switch (option_name) { 5713 case TCP_NODELAY: 5714 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5715 error = EINVAL; 5716 eprintsoline(so, error); 5717 mutex_enter(&so->so_lock); 5718 goto done2; 5719 } 5720 ASSERT(optval); 5721 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5722 handled = B_TRUE; 5723 break; 5724 } 5725 break; 5726 default: 5727 handled = B_FALSE; 5728 break; 5729 } 5730 } 5731 5732 if (handled) { 5733 mutex_enter(&so->so_lock); 5734 goto done2; 5735 } 5736 5737 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5738 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5739 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5740 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5741 5742 oh.level = level; 5743 oh.name = option_name; 5744 oh.len = optlen; 5745 5746 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5747 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5748 /* Let option management work in the presence of data flow control */ 5749 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5750 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5751 mp = NULL; 5752 mutex_enter(&so->so_lock); 5753 if (error) { 5754 eprintsoline(so, error); 5755 goto done2; 5756 } 5757 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5758 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5759 if (error) { 5760 eprintsoline(so, error); 5761 goto done; 5762 } 5763 ASSERT(mp); 5764 /* No need to verify T_optmgmt_ack */ 5765 freemsg(mp); 5766 done: 5767 /* 5768 * Check for SOL_SOCKET options and record their values. 5769 * If we know about a SOL_SOCKET parameter and the transport 5770 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5771 * EPROTO) we let the setsockopt succeed. 5772 */ 5773 if (level == SOL_SOCKET) { 5774 /* Check parameters */ 5775 switch (option_name) { 5776 case SO_DEBUG: 5777 case SO_REUSEADDR: 5778 case SO_KEEPALIVE: 5779 case SO_DONTROUTE: 5780 case SO_BROADCAST: 5781 case SO_USELOOPBACK: 5782 case SO_OOBINLINE: 5783 case SO_SNDBUF: 5784 case SO_RCVBUF: 5785 #ifdef notyet 5786 case SO_SNDLOWAT: 5787 case SO_RCVLOWAT: 5788 #endif /* notyet */ 5789 case SO_DGRAM_ERRIND: 5790 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5791 error = EINVAL; 5792 eprintsoline(so, error); 5793 goto done2; 5794 } 5795 ASSERT(optval); 5796 handled = B_TRUE; 5797 break; 5798 case SO_SNDTIMEO: 5799 case SO_RCVTIMEO: 5800 if (get_udatamodel() == DATAMODEL_NONE || 5801 get_udatamodel() == DATAMODEL_NATIVE) { 5802 if (optlen != sizeof (struct timeval)) { 5803 error = EINVAL; 5804 eprintsoline(so, error); 5805 goto done2; 5806 } 5807 } else { 5808 if (optlen != sizeof (struct timeval32)) { 5809 error = EINVAL; 5810 eprintsoline(so, error); 5811 goto done2; 5812 } 5813 } 5814 ASSERT(optval); 5815 handled = B_TRUE; 5816 break; 5817 case SO_LINGER: 5818 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5819 error = EINVAL; 5820 eprintsoline(so, error); 5821 goto done2; 5822 } 5823 ASSERT(optval); 5824 handled = B_TRUE; 5825 break; 5826 } 5827 5828 #define intvalue (*(int32_t *)optval) 5829 5830 switch (option_name) { 5831 case SO_TYPE: 5832 case SO_ERROR: 5833 case SO_ACCEPTCONN: 5834 /* Can't be set */ 5835 error = ENOPROTOOPT; 5836 goto done2; 5837 case SO_LINGER: { 5838 struct linger *l = (struct linger *)optval; 5839 5840 so->so_linger.l_linger = l->l_linger; 5841 if (l->l_onoff) { 5842 so->so_linger.l_onoff = SO_LINGER; 5843 so->so_options |= SO_LINGER; 5844 } else { 5845 so->so_linger.l_onoff = 0; 5846 so->so_options &= ~SO_LINGER; 5847 } 5848 break; 5849 } 5850 5851 case SO_DEBUG: 5852 #ifdef SOCK_TEST 5853 if (intvalue & 2) 5854 sock_test_timelimit = 10 * hz; 5855 else 5856 sock_test_timelimit = 0; 5857 5858 if (intvalue & 4) 5859 do_useracc = 0; 5860 else 5861 do_useracc = 1; 5862 #endif /* SOCK_TEST */ 5863 /* FALLTHRU */ 5864 case SO_REUSEADDR: 5865 case SO_KEEPALIVE: 5866 case SO_DONTROUTE: 5867 case SO_BROADCAST: 5868 case SO_USELOOPBACK: 5869 case SO_OOBINLINE: 5870 case SO_DGRAM_ERRIND: 5871 if (intvalue != 0) { 5872 dprintso(so, 1, 5873 ("socket_setsockopt: setting 0x%x\n", 5874 option_name)); 5875 so->so_options |= option_name; 5876 } else { 5877 dprintso(so, 1, 5878 ("socket_setsockopt: clearing 0x%x\n", 5879 option_name)); 5880 so->so_options &= ~option_name; 5881 } 5882 break; 5883 /* 5884 * The following options are only returned by us when the 5885 * transport layer fails. 5886 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5887 * since the transport might adjust the value and not 5888 * return exactly what was set by the application. 5889 */ 5890 case SO_SNDBUF: 5891 so->so_sndbuf = intvalue; 5892 break; 5893 case SO_RCVBUF: 5894 so->so_rcvbuf = intvalue; 5895 break; 5896 case SO_RCVPSH: 5897 so->so_rcv_timer_interval = intvalue; 5898 break; 5899 #ifdef notyet 5900 /* 5901 * We do not implement the semantics of these options 5902 * thus we shouldn't implement the options either. 5903 */ 5904 case SO_SNDLOWAT: 5905 so->so_sndlowat = intvalue; 5906 break; 5907 case SO_RCVLOWAT: 5908 so->so_rcvlowat = intvalue; 5909 break; 5910 #endif /* notyet */ 5911 case SO_SNDTIMEO: 5912 case SO_RCVTIMEO: { 5913 struct timeval tl; 5914 clock_t val; 5915 5916 if (get_udatamodel() == DATAMODEL_NONE || 5917 get_udatamodel() == DATAMODEL_NATIVE) 5918 bcopy(&tl, (struct timeval *)optval, 5919 sizeof (struct timeval)); 5920 else 5921 TIMEVAL32_TO_TIMEVAL(&tl, 5922 (struct timeval32 *)optval); 5923 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5924 if (option_name == SO_RCVTIMEO) 5925 so->so_rcvtimeo = drv_usectohz(val); 5926 else 5927 so->so_sndtimeo = drv_usectohz(val); 5928 break; 5929 } 5930 } 5931 #undef intvalue 5932 5933 if (error) { 5934 if ((error == ENOPROTOOPT || error == EPROTO || 5935 error == EINVAL) && handled) { 5936 dprintso(so, 1, 5937 ("setsockopt: ignoring error %d for 0x%x\n", 5938 error, option_name)); 5939 error = 0; 5940 } 5941 } 5942 } 5943 done2: 5944 so_unlock_single(so, SOLOCKED); 5945 mutex_exit(&so->so_lock); 5946 return (error); 5947 } 5948 5949 /* 5950 * sotpi_close() is called when the last open reference goes away. 5951 */ 5952 /* ARGSUSED */ 5953 int 5954 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5955 { 5956 struct vnode *vp = SOTOV(so); 5957 dev_t dev; 5958 int error = 0; 5959 sotpi_info_t *sti = SOTOTPI(so); 5960 5961 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5962 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5963 5964 dev = sti->sti_dev; 5965 5966 ASSERT(STREAMSTAB(getmajor(dev))); 5967 5968 mutex_enter(&so->so_lock); 5969 so_lock_single(so); /* Set SOLOCKED */ 5970 5971 ASSERT(so_verify_oobstate(so)); 5972 5973 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5974 sti->sti_nl7c_flags = 0; 5975 nl7c_close(so); 5976 } 5977 5978 if (vp->v_stream != NULL) { 5979 vnode_t *ux_vp; 5980 5981 if (so->so_family == AF_UNIX) { 5982 /* Could avoid this when CANTSENDMORE for !dgram */ 5983 so_unix_close(so); 5984 } 5985 5986 mutex_exit(&so->so_lock); 5987 /* 5988 * Disassemble the linkage from the AF_UNIX underlying file 5989 * system vnode to this socket (by atomically clearing 5990 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5991 * and frees the stream head. 5992 */ 5993 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5994 ASSERT(ux_vp->v_stream); 5995 sti->sti_ux_bound_vp = NULL; 5996 vn_rele_stream(ux_vp); 5997 } 5998 if (so->so_family == AF_INET || so->so_family == AF_INET6) { 5999 strsetrwputdatahooks(SOTOV(so), NULL, NULL); 6000 if (sti->sti_kssl_ent != NULL) { 6001 kssl_release_ent(sti->sti_kssl_ent, so, 6002 sti->sti_kssl_type); 6003 sti->sti_kssl_ent = NULL; 6004 } 6005 if (sti->sti_kssl_ctx != NULL) { 6006 kssl_release_ctx(sti->sti_kssl_ctx); 6007 sti->sti_kssl_ctx = NULL; 6008 } 6009 sti->sti_kssl_type = KSSL_NO_PROXY; 6010 } 6011 error = strclose(vp, flag, cr); 6012 vp->v_stream = NULL; 6013 mutex_enter(&so->so_lock); 6014 } 6015 6016 /* 6017 * Flush the T_DISCON_IND on sti_discon_ind_mp. 6018 */ 6019 so_flush_discon_ind(so); 6020 6021 so_unlock_single(so, SOLOCKED); 6022 mutex_exit(&so->so_lock); 6023 6024 /* 6025 * Needed for STREAMs. 6026 * Decrement the device driver's reference count for streams 6027 * opened via the clone dip. The driver was held in clone_open(). 6028 * The absence of clone_close() forces this asymmetry. 6029 */ 6030 if (so->so_flag & SOCLONE) 6031 ddi_rele_driver(getmajor(dev)); 6032 6033 return (error); 6034 } 6035 6036 static int 6037 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 6038 struct cred *cr, int32_t *rvalp) 6039 { 6040 struct vnode *vp = SOTOV(so); 6041 sotpi_info_t *sti = SOTOTPI(so); 6042 int error = 0; 6043 6044 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 6045 cmd, arg, pr_state(so->so_state, so->so_mode))); 6046 6047 switch (cmd) { 6048 case SIOCSQPTR: 6049 /* 6050 * SIOCSQPTR is valid only when helper stream is created 6051 * by the protocol. 6052 */ 6053 case _I_INSERT: 6054 case _I_REMOVE: 6055 /* 6056 * Since there's no compelling reason to support these ioctls 6057 * on sockets, and doing so would increase the complexity 6058 * markedly, prevent it. 6059 */ 6060 return (EOPNOTSUPP); 6061 6062 case I_FIND: 6063 case I_LIST: 6064 case I_LOOK: 6065 case I_POP: 6066 case I_PUSH: 6067 /* 6068 * To prevent races and inconsistencies between the actual 6069 * state of the stream and the state according to the sonode, 6070 * we serialize all operations which modify or operate on the 6071 * list of modules on the socket's stream. 6072 */ 6073 mutex_enter(&sti->sti_plumb_lock); 6074 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 6075 mutex_exit(&sti->sti_plumb_lock); 6076 return (error); 6077 6078 default: 6079 if (so->so_version != SOV_STREAM) 6080 break; 6081 6082 /* 6083 * The imaginary "sockmod" has been popped; act as a stream. 6084 */ 6085 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6086 } 6087 6088 ASSERT(so->so_version != SOV_STREAM); 6089 6090 /* 6091 * Process socket-specific ioctls. 6092 */ 6093 switch (cmd) { 6094 case FIONBIO: { 6095 int32_t value; 6096 6097 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6098 (mode & (int)FKIOCTL))) 6099 return (EFAULT); 6100 6101 mutex_enter(&so->so_lock); 6102 if (value) { 6103 so->so_state |= SS_NDELAY; 6104 } else { 6105 so->so_state &= ~SS_NDELAY; 6106 } 6107 mutex_exit(&so->so_lock); 6108 return (0); 6109 } 6110 6111 case FIOASYNC: { 6112 int32_t value; 6113 6114 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6115 (mode & (int)FKIOCTL))) 6116 return (EFAULT); 6117 6118 mutex_enter(&so->so_lock); 6119 /* 6120 * SS_ASYNC flag not already set correctly? 6121 * (!value != !(so->so_state & SS_ASYNC)) 6122 * but some engineers find that too hard to read. 6123 */ 6124 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 6125 value != 0 && (so->so_state & SS_ASYNC) == 0) 6126 error = so_flip_async(so, vp, mode, cr); 6127 mutex_exit(&so->so_lock); 6128 return (error); 6129 } 6130 6131 case SIOCSPGRP: 6132 case FIOSETOWN: { 6133 pid_t pgrp; 6134 6135 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 6136 (mode & (int)FKIOCTL))) 6137 return (EFAULT); 6138 6139 mutex_enter(&so->so_lock); 6140 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 6141 /* Any change? */ 6142 if (pgrp != so->so_pgrp) 6143 error = so_set_siggrp(so, vp, pgrp, mode, cr); 6144 mutex_exit(&so->so_lock); 6145 return (error); 6146 } 6147 case SIOCGPGRP: 6148 case FIOGETOWN: 6149 if (so_copyout(&so->so_pgrp, (void *)arg, 6150 sizeof (pid_t), (mode & (int)FKIOCTL))) 6151 return (EFAULT); 6152 return (0); 6153 6154 case SIOCATMARK: { 6155 int retval; 6156 uint_t so_state; 6157 6158 /* 6159 * strwaitmark has a finite timeout after which it 6160 * returns -1 if the mark state is undetermined. 6161 * In order to avoid any race between the mark state 6162 * in sockfs and the mark state in the stream head this 6163 * routine loops until the mark state can be determined 6164 * (or the urgent data indication has been removed by some 6165 * other thread). 6166 */ 6167 do { 6168 mutex_enter(&so->so_lock); 6169 so_state = so->so_state; 6170 mutex_exit(&so->so_lock); 6171 if (so_state & SS_RCVATMARK) { 6172 retval = 1; 6173 } else if (!(so_state & SS_OOBPEND)) { 6174 /* 6175 * No SIGURG has been generated -- there is no 6176 * pending or present urgent data. Thus can't 6177 * possibly be at the mark. 6178 */ 6179 retval = 0; 6180 } else { 6181 /* 6182 * Have the stream head wait until there is 6183 * either some messages on the read queue, or 6184 * STRATMARK or STRNOTATMARK gets set. The 6185 * STRNOTATMARK flag is used so that the 6186 * transport can send up a MSGNOTMARKNEXT 6187 * M_DATA to indicate that it is not 6188 * at the mark and additional data is not about 6189 * to be send upstream. 6190 * 6191 * If the mark state is undetermined this will 6192 * return -1 and we will loop rechecking the 6193 * socket state. 6194 */ 6195 retval = strwaitmark(vp); 6196 } 6197 } while (retval == -1); 6198 6199 if (so_copyout(&retval, (void *)arg, sizeof (int), 6200 (mode & (int)FKIOCTL))) 6201 return (EFAULT); 6202 return (0); 6203 } 6204 6205 case I_FDINSERT: 6206 case I_SENDFD: 6207 case I_RECVFD: 6208 case I_ATMARK: 6209 case _SIOCSOCKFALLBACK: 6210 /* 6211 * These ioctls do not apply to sockets. I_FDINSERT can be 6212 * used to send M_PROTO messages without modifying the socket 6213 * state. I_SENDFD/RECVFD should not be used for socket file 6214 * descriptor passing since they assume a twisted stream. 6215 * SIOCATMARK must be used instead of I_ATMARK. 6216 * 6217 * _SIOCSOCKFALLBACK from an application should never be 6218 * processed. It is only generated by socktpi_open() or 6219 * in response to I_POP or I_PUSH. 6220 */ 6221 #ifdef DEBUG 6222 zcmn_err(getzoneid(), CE_WARN, 6223 "Unsupported STREAMS ioctl 0x%x on socket. " 6224 "Pid = %d\n", cmd, curproc->p_pid); 6225 #endif /* DEBUG */ 6226 return (EOPNOTSUPP); 6227 6228 case _I_GETPEERCRED: 6229 if ((mode & FKIOCTL) == 0) 6230 return (EINVAL); 6231 6232 mutex_enter(&so->so_lock); 6233 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6234 error = ENOTSUP; 6235 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6236 error = ENOTCONN; 6237 } else if (so->so_peercred != NULL) { 6238 k_peercred_t *kp = (k_peercred_t *)arg; 6239 kp->pc_cr = so->so_peercred; 6240 kp->pc_cpid = so->so_cpid; 6241 crhold(so->so_peercred); 6242 } else { 6243 error = EINVAL; 6244 } 6245 mutex_exit(&so->so_lock); 6246 return (error); 6247 6248 default: 6249 /* 6250 * Do the higher-order bits of the ioctl cmd indicate 6251 * that it is an I_* streams ioctl? 6252 */ 6253 if ((cmd & 0xffffff00U) == STR && 6254 so->so_version == SOV_SOCKBSD) { 6255 #ifdef DEBUG 6256 zcmn_err(getzoneid(), CE_WARN, 6257 "Unsupported STREAMS ioctl 0x%x on socket. " 6258 "Pid = %d\n", cmd, curproc->p_pid); 6259 #endif /* DEBUG */ 6260 return (EOPNOTSUPP); 6261 } 6262 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6263 } 6264 } 6265 6266 /* 6267 * Handle plumbing-related ioctls. 6268 */ 6269 static int 6270 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6271 struct cred *cr, int32_t *rvalp) 6272 { 6273 static const char sockmod_name[] = "sockmod"; 6274 struct sonode *so = VTOSO(vp); 6275 char mname[FMNAMESZ + 1]; 6276 int error; 6277 sotpi_info_t *sti = SOTOTPI(so); 6278 6279 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6280 6281 if (so->so_version == SOV_SOCKBSD) 6282 return (EOPNOTSUPP); 6283 6284 if (so->so_version == SOV_STREAM) { 6285 /* 6286 * The imaginary "sockmod" has been popped - act as a stream. 6287 * If this is a push of sockmod then change back to a socket. 6288 */ 6289 if (cmd == I_PUSH) { 6290 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6291 (void *)arg, mname, sizeof (mname), NULL); 6292 6293 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6294 dprintso(so, 0, ("socktpi_ioctl: going to " 6295 "socket version\n")); 6296 so_stream2sock(so); 6297 return (0); 6298 } 6299 } 6300 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6301 } 6302 6303 switch (cmd) { 6304 case I_PUSH: 6305 if (sti->sti_direct) { 6306 mutex_enter(&so->so_lock); 6307 so_lock_single(so); 6308 mutex_exit(&so->so_lock); 6309 6310 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6311 cr, rvalp); 6312 6313 mutex_enter(&so->so_lock); 6314 if (error == 0) 6315 sti->sti_direct = 0; 6316 so_unlock_single(so, SOLOCKED); 6317 mutex_exit(&so->so_lock); 6318 6319 if (error != 0) 6320 return (error); 6321 } 6322 6323 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6324 if (error == 0) 6325 sti->sti_pushcnt++; 6326 return (error); 6327 6328 case I_POP: 6329 if (sti->sti_pushcnt == 0) { 6330 /* Emulate sockmod being popped */ 6331 dprintso(so, 0, 6332 ("socktpi_ioctl: going to STREAMS version\n")); 6333 return (so_sock2stream(so)); 6334 } 6335 6336 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6337 if (error == 0) 6338 sti->sti_pushcnt--; 6339 return (error); 6340 6341 case I_LIST: { 6342 struct str_mlist *kmlistp, *umlistp; 6343 struct str_list kstrlist; 6344 ssize_t kstrlistsize; 6345 int i, nmods; 6346 6347 STRUCT_DECL(str_list, ustrlist); 6348 STRUCT_INIT(ustrlist, mode); 6349 6350 if (arg == NULL) { 6351 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6352 if (error == 0) 6353 (*rvalp)++; /* Add one for sockmod */ 6354 return (error); 6355 } 6356 6357 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6358 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6359 if (error != 0) 6360 return (error); 6361 6362 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6363 if (nmods <= 0) 6364 return (EINVAL); 6365 /* 6366 * Ceiling nmods at nstrpush to prevent someone from 6367 * maliciously consuming lots of kernel memory. 6368 */ 6369 nmods = MIN(nmods, nstrpush); 6370 6371 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6372 kstrlist.sl_nmods = nmods; 6373 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6374 6375 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6376 cr, rvalp); 6377 if (error != 0) 6378 goto done; 6379 6380 /* 6381 * Considering the module list as a 0-based array of sl_nmods 6382 * modules, sockmod should conceptually exist at slot 6383 * sti_pushcnt. Insert sockmod at this location by sliding all 6384 * of the module names after so_pushcnt over by one. We know 6385 * that there will be room to do this since we allocated 6386 * sl_modlist with an additional slot. 6387 */ 6388 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6389 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6390 6391 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6392 kstrlist.sl_nmods++; 6393 6394 /* 6395 * Copy all of the entries out to ustrlist. 6396 */ 6397 kmlistp = kstrlist.sl_modlist; 6398 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6399 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6400 error = so_copyout(kmlistp++, umlistp++, 6401 sizeof (struct str_mlist), mode & FKIOCTL); 6402 if (error != 0) 6403 goto done; 6404 } 6405 6406 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6407 mode & FKIOCTL); 6408 if (error == 0) 6409 *rvalp = 0; 6410 done: 6411 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6412 return (error); 6413 } 6414 case I_LOOK: 6415 if (sti->sti_pushcnt == 0) { 6416 return (so_copyout(sockmod_name, (void *)arg, 6417 sizeof (sockmod_name), mode & FKIOCTL)); 6418 } 6419 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6420 6421 case I_FIND: 6422 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6423 if (error && error != EINVAL) 6424 return (error); 6425 6426 /* if not found and string was sockmod return 1 */ 6427 if (*rvalp == 0 || error == EINVAL) { 6428 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6429 (void *)arg, mname, sizeof (mname), NULL); 6430 if (error == ENAMETOOLONG) 6431 error = EINVAL; 6432 6433 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6434 *rvalp = 1; 6435 } 6436 return (error); 6437 6438 default: 6439 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6440 break; 6441 } 6442 6443 return (0); 6444 } 6445 6446 /* 6447 * Wrapper around the streams poll routine that implements socket poll 6448 * semantics. 6449 * The sockfs never calls pollwakeup itself - the stream head take care 6450 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6451 * stream head there can never be a deadlock due to holding so_lock across 6452 * pollwakeup and acquiring so_lock in this routine. 6453 * 6454 * However, since the performance of VOP_POLL is critical we avoid 6455 * acquiring so_lock here. This is based on two assumptions: 6456 * - The poll implementation holds locks to serialize the VOP_POLL call 6457 * and a pollwakeup for the same pollhead. This ensures that should 6458 * e.g. so_state change during a socktpi_poll call the pollwakeup 6459 * (which strsock_* and strrput conspire to issue) is issued after 6460 * the state change. Thus the pollwakeup will block until VOP_POLL has 6461 * returned and then wake up poll and have it call VOP_POLL again. 6462 * - The reading of so_state without holding so_lock does not result in 6463 * stale data that is older than the latest state change that has dropped 6464 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6465 * memory barrier to force the data into the coherency domain. 6466 */ 6467 static int 6468 sotpi_poll( 6469 struct sonode *so, 6470 short events, 6471 int anyyet, 6472 short *reventsp, 6473 struct pollhead **phpp) 6474 { 6475 short origevents = events; 6476 struct vnode *vp = SOTOV(so); 6477 int error; 6478 int so_state = so->so_state; /* snapshot */ 6479 sotpi_info_t *sti = SOTOTPI(so); 6480 6481 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6482 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6483 6484 ASSERT(vp->v_type == VSOCK); 6485 ASSERT(vp->v_stream != NULL); 6486 6487 if (so->so_version == SOV_STREAM) { 6488 /* The imaginary "sockmod" has been popped - act as a stream */ 6489 return (strpoll(vp->v_stream, events, anyyet, 6490 reventsp, phpp)); 6491 } 6492 6493 if (!(so_state & SS_ISCONNECTED) && 6494 (so->so_mode & SM_CONNREQUIRED)) { 6495 /* Not connected yet - turn off write side events */ 6496 events &= ~(POLLOUT|POLLWRBAND); 6497 } 6498 /* 6499 * Check for errors without calling strpoll if the caller wants them. 6500 * In sockets the errors are represented as input/output events 6501 * and there is no need to ask the stream head for this information. 6502 */ 6503 if (so->so_error != 0 && 6504 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6505 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6506 return (0); 6507 } 6508 /* 6509 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6510 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6511 * will not trigger a POLLIN event with POLLRDDATA set. 6512 * The handling of urgent data (causing POLLRDBAND) is done by 6513 * inspecting SS_OOBPEND below. 6514 */ 6515 events |= POLLRDDATA; 6516 6517 /* 6518 * After shutdown(output) a stream head write error is set. 6519 * However, we should not return output events. 6520 */ 6521 events |= POLLNOERR; 6522 error = strpoll(vp->v_stream, events, anyyet, 6523 reventsp, phpp); 6524 if (error) 6525 return (error); 6526 6527 ASSERT(!(*reventsp & POLLERR)); 6528 6529 /* 6530 * Notes on T_CONN_IND handling for sockets. 6531 * 6532 * If strpoll() returned without events, SR_POLLIN is guaranteed 6533 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6534 * 6535 * Since the so_lock is not held, soqueueconnind() may have run 6536 * and a T_CONN_IND may be waiting. We now check for any queued 6537 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6538 * to ensure poll returns. 6539 * 6540 * However: 6541 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6542 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6543 * the following actions will occur; taken together they ensure the 6544 * syscall will return. 6545 * 6546 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6547 * the accept() was run on a non-blocking socket sowaitconnind() 6548 * may have already returned EWOULDBLOCK, so not be waiting to 6549 * process the message. Additionally socktpi_poll() has probably 6550 * proceeded past the sti_conn_ind_head check below. 6551 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6552 * this thread, however that could occur before poll_common() 6553 * has entered cv_wait. 6554 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6555 * 6556 * Before proceeding to cv_wait() in poll_common() for an event, 6557 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6558 * and if set, re-calls strpoll() to ensure the late arriving 6559 * T_CONN_IND is recognized, and pollsys() returns. 6560 */ 6561 6562 if (sti->sti_conn_ind_head != NULL) 6563 *reventsp |= (POLLIN|POLLRDNORM) & events; 6564 6565 if (so->so_state & SS_OOBPEND) 6566 *reventsp |= POLLRDBAND & events; 6567 6568 if (sti->sti_nl7c_rcv_mp != NULL) { 6569 *reventsp |= (POLLIN|POLLRDNORM) & events; 6570 } 6571 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6572 ((POLLIN|POLLRDNORM) & *reventsp)) { 6573 sti->sti_nl7c_flags |= NL7C_POLLIN; 6574 } 6575 6576 return (0); 6577 } 6578 6579 /*ARGSUSED*/ 6580 static int 6581 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6582 { 6583 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6584 int error = 0; 6585 6586 error = sonode_constructor(buf, cdrarg, kmflags); 6587 if (error != 0) 6588 return (error); 6589 6590 error = i_sotpi_info_constructor(&st->st_info); 6591 if (error != 0) 6592 sonode_destructor(buf, cdrarg); 6593 6594 st->st_sonode.so_priv = &st->st_info; 6595 6596 return (error); 6597 } 6598 6599 /*ARGSUSED1*/ 6600 static void 6601 socktpi_destructor(void *buf, void *cdrarg) 6602 { 6603 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6604 6605 ASSERT(st->st_sonode.so_priv == &st->st_info); 6606 st->st_sonode.so_priv = NULL; 6607 6608 i_sotpi_info_destructor(&st->st_info); 6609 sonode_destructor(buf, cdrarg); 6610 } 6611 6612 static int 6613 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6614 { 6615 int retval; 6616 6617 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6618 struct sonode *so = (struct sonode *)buf; 6619 sotpi_info_t *sti = SOTOTPI(so); 6620 6621 mutex_enter(&socklist.sl_lock); 6622 6623 sti->sti_next_so = socklist.sl_list; 6624 sti->sti_prev_so = NULL; 6625 if (sti->sti_next_so != NULL) 6626 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6627 socklist.sl_list = so; 6628 6629 mutex_exit(&socklist.sl_lock); 6630 6631 } 6632 return (retval); 6633 } 6634 6635 static void 6636 socktpi_unix_destructor(void *buf, void *cdrarg) 6637 { 6638 struct sonode *so = (struct sonode *)buf; 6639 sotpi_info_t *sti = SOTOTPI(so); 6640 6641 mutex_enter(&socklist.sl_lock); 6642 6643 if (sti->sti_next_so != NULL) 6644 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6645 if (sti->sti_prev_so != NULL) 6646 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6647 else 6648 socklist.sl_list = sti->sti_next_so; 6649 6650 mutex_exit(&socklist.sl_lock); 6651 6652 socktpi_destructor(buf, cdrarg); 6653 } 6654 6655 int 6656 socktpi_init(void) 6657 { 6658 /* 6659 * Create sonode caches. We create a special one for AF_UNIX so 6660 * that we can track them for netstat(1m). 6661 */ 6662 socktpi_cache = kmem_cache_create("socktpi_cache", 6663 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6664 socktpi_destructor, NULL, NULL, NULL, 0); 6665 6666 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6667 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6668 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6669 6670 return (0); 6671 } 6672 6673 /* 6674 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6675 * 6676 * Caller must still update state and mode using sotpi_update_state(). 6677 * 6678 * Returns the STREAM queue that the protocol should use. 6679 */ 6680 queue_t * 6681 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6682 boolean_t *direct, struct cred *cr) 6683 { 6684 sotpi_info_t *sti; 6685 struct sockparams *origsp = so->so_sockparams; 6686 sock_lower_handle_t handle = so->so_proto_handle; 6687 uint_t old_state = so->so_state; 6688 struct stdata *stp; 6689 struct vnode *vp; 6690 queue_t *q; 6691 6692 *direct = B_FALSE; 6693 so->so_sockparams = newsp; 6694 /* 6695 * Allocate and initalize fields required by TPI. 6696 */ 6697 (void) sotpi_info_create(so, KM_SLEEP); 6698 sotpi_info_init(so); 6699 6700 if (sotpi_init(so, NULL, cr, SO_FALLBACK) != 0) { 6701 sotpi_info_fini(so); 6702 sotpi_info_destroy(so); 6703 so->so_state = old_state; 6704 return (NULL); 6705 } 6706 ASSERT(handle == so->so_proto_handle); 6707 sti = SOTOTPI(so); 6708 if (sti->sti_direct != 0) 6709 *direct = B_TRUE; 6710 6711 /* 6712 * Keep the original sp around so we can properly dispose of the 6713 * sonode when the socket is being closed. 6714 */ 6715 sti->sti_orig_sp = origsp; 6716 6717 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6718 so_alloc_addr(so, so->so_max_addr_len); 6719 6720 /* 6721 * If the application has done a SIOCSPGRP, make sure the 6722 * STREAM head is aware. This needs to take place before 6723 * the protocol start sending up messages. Otherwise we 6724 * might miss to generate SIGPOLL. 6725 * 6726 * It is possible that the application will receive duplicate 6727 * signals if some were already generated for either data or 6728 * connection indications. 6729 */ 6730 if (so->so_pgrp != 0) { 6731 mutex_enter(&so->so_lock); 6732 if (so_set_events(so, so->so_vnode, cr) != 0) 6733 so->so_pgrp = 0; 6734 mutex_exit(&so->so_lock); 6735 } 6736 6737 /* 6738 * Determine which queue to use. 6739 */ 6740 vp = SOTOV(so); 6741 stp = vp->v_stream; 6742 ASSERT(stp != NULL); 6743 q = stp->sd_wrq->q_next; 6744 6745 /* 6746 * Skip any modules that may have been auto pushed when the device 6747 * was opened 6748 */ 6749 while (q->q_next != NULL) 6750 q = q->q_next; 6751 q = _RD(q); 6752 6753 return (q); 6754 } 6755 6756 void 6757 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6758 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6759 socklen_t faddrlen, short opts) 6760 { 6761 sotpi_info_t *sti = SOTOTPI(so); 6762 6763 so_proc_tcapability_ack(so, tcap); 6764 6765 so->so_options |= opts; 6766 6767 /* 6768 * Determine whether the foreign and local address are valid 6769 */ 6770 if (laddrlen != 0) { 6771 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6772 sti->sti_laddr_len = laddrlen; 6773 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6774 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6775 } 6776 6777 if (faddrlen != 0) { 6778 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6779 sti->sti_faddr_len = faddrlen; 6780 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6781 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6782 } 6783 6784 } 6785 6786 /* 6787 * Allocate enough space to cache the local and foreign addresses. 6788 */ 6789 void 6790 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6791 { 6792 sotpi_info_t *sti = SOTOTPI(so); 6793 6794 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6795 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6796 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6797 P2ROUNDUP(maxlen, KMEM_ALIGN); 6798 so->so_max_addr_len = sti->sti_laddr_maxlen; 6799 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6800 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6801 + sti->sti_laddr_maxlen); 6802 6803 if (so->so_family == AF_UNIX) { 6804 /* 6805 * Initialize AF_UNIX related fields. 6806 */ 6807 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6808 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6809 } 6810 } 6811 6812 6813 sotpi_info_t * 6814 sotpi_sototpi(struct sonode *so) 6815 { 6816 sotpi_info_t *sti; 6817 6818 if (so == NULL) 6819 return (NULL); 6820 6821 sti = (sotpi_info_t *)so->so_priv; 6822 6823 ASSERT(sti != NULL); 6824 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6825 6826 return (sti); 6827 } 6828 6829 static int 6830 i_sotpi_info_constructor(sotpi_info_t *sti) 6831 { 6832 sti->sti_magic = SOTPI_INFO_MAGIC; 6833 sti->sti_ack_mp = NULL; 6834 sti->sti_discon_ind_mp = NULL; 6835 sti->sti_ux_bound_vp = NULL; 6836 sti->sti_unbind_mp = NULL; 6837 6838 sti->sti_conn_ind_head = NULL; 6839 sti->sti_conn_ind_tail = NULL; 6840 6841 sti->sti_laddr_sa = NULL; 6842 sti->sti_faddr_sa = NULL; 6843 6844 sti->sti_nl7c_flags = 0; 6845 sti->sti_nl7c_uri = NULL; 6846 sti->sti_nl7c_rcv_mp = NULL; 6847 6848 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6849 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6850 6851 return (0); 6852 } 6853 6854 static void 6855 i_sotpi_info_destructor(sotpi_info_t *sti) 6856 { 6857 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6858 ASSERT(sti->sti_ack_mp == NULL); 6859 ASSERT(sti->sti_discon_ind_mp == NULL); 6860 ASSERT(sti->sti_ux_bound_vp == NULL); 6861 ASSERT(sti->sti_unbind_mp == NULL); 6862 6863 ASSERT(sti->sti_conn_ind_head == NULL); 6864 ASSERT(sti->sti_conn_ind_tail == NULL); 6865 6866 ASSERT(sti->sti_laddr_sa == NULL); 6867 ASSERT(sti->sti_faddr_sa == NULL); 6868 6869 ASSERT(sti->sti_nl7c_flags == 0); 6870 ASSERT(sti->sti_nl7c_uri == NULL); 6871 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6872 6873 mutex_destroy(&sti->sti_plumb_lock); 6874 cv_destroy(&sti->sti_ack_cv); 6875 } 6876 6877 /* 6878 * Creates and attaches TPI information to the given sonode 6879 */ 6880 static boolean_t 6881 sotpi_info_create(struct sonode *so, int kmflags) 6882 { 6883 sotpi_info_t *sti; 6884 6885 ASSERT(so->so_priv == NULL); 6886 6887 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6888 return (B_FALSE); 6889 6890 if (i_sotpi_info_constructor(sti) != 0) { 6891 kmem_free(sti, sizeof (*sti)); 6892 return (B_FALSE); 6893 } 6894 6895 so->so_priv = (void *)sti; 6896 return (B_TRUE); 6897 } 6898 6899 /* 6900 * Initializes the TPI information. 6901 */ 6902 static void 6903 sotpi_info_init(struct sonode *so) 6904 { 6905 struct vnode *vp = SOTOV(so); 6906 sotpi_info_t *sti = SOTOTPI(so); 6907 time_t now; 6908 6909 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6910 vp->v_rdev = sti->sti_dev; 6911 6912 sti->sti_orig_sp = NULL; 6913 6914 sti->sti_pushcnt = 0; 6915 6916 now = gethrestime_sec(); 6917 sti->sti_atime = now; 6918 sti->sti_mtime = now; 6919 sti->sti_ctime = now; 6920 6921 sti->sti_eaddr_mp = NULL; 6922 sti->sti_delayed_error = 0; 6923 6924 sti->sti_provinfo = NULL; 6925 6926 sti->sti_oobcnt = 0; 6927 sti->sti_oobsigcnt = 0; 6928 6929 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6930 6931 sti->sti_laddr_sa = 0; 6932 sti->sti_faddr_sa = 0; 6933 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6934 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6935 6936 sti->sti_laddr_valid = 0; 6937 sti->sti_faddr_valid = 0; 6938 sti->sti_faddr_noxlate = 0; 6939 6940 sti->sti_direct = 0; 6941 6942 ASSERT(sti->sti_ack_mp == NULL); 6943 ASSERT(sti->sti_ux_bound_vp == NULL); 6944 ASSERT(sti->sti_unbind_mp == NULL); 6945 6946 ASSERT(sti->sti_conn_ind_head == NULL); 6947 ASSERT(sti->sti_conn_ind_tail == NULL); 6948 6949 /* Initialize the kernel SSL proxy fields */ 6950 sti->sti_kssl_type = KSSL_NO_PROXY; 6951 sti->sti_kssl_ent = NULL; 6952 sti->sti_kssl_ctx = NULL; 6953 } 6954 6955 /* 6956 * Given a sonode, grab the TPI info and free any data. 6957 */ 6958 static void 6959 sotpi_info_fini(struct sonode *so) 6960 { 6961 sotpi_info_t *sti = SOTOTPI(so); 6962 mblk_t *mp; 6963 6964 ASSERT(sti->sti_discon_ind_mp == NULL); 6965 6966 if ((mp = sti->sti_conn_ind_head) != NULL) { 6967 mblk_t *mp1; 6968 6969 while (mp) { 6970 mp1 = mp->b_next; 6971 mp->b_next = NULL; 6972 freemsg(mp); 6973 mp = mp1; 6974 } 6975 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6976 } 6977 6978 /* 6979 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6980 * indirect them. It also uses so_count as a validity test. 6981 */ 6982 mutex_enter(&so->so_lock); 6983 6984 if (sti->sti_laddr_sa) { 6985 ASSERT((caddr_t)sti->sti_faddr_sa == 6986 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6987 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6988 sti->sti_laddr_valid = 0; 6989 sti->sti_faddr_valid = 0; 6990 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6991 sti->sti_laddr_sa = NULL; 6992 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6993 sti->sti_faddr_sa = NULL; 6994 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6995 } 6996 6997 mutex_exit(&so->so_lock); 6998 6999 if ((mp = sti->sti_eaddr_mp) != NULL) { 7000 freemsg(mp); 7001 sti->sti_eaddr_mp = NULL; 7002 sti->sti_delayed_error = 0; 7003 } 7004 7005 if ((mp = sti->sti_ack_mp) != NULL) { 7006 freemsg(mp); 7007 sti->sti_ack_mp = NULL; 7008 } 7009 7010 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 7011 sti->sti_nl7c_rcv_mp = NULL; 7012 freemsg(mp); 7013 } 7014 sti->sti_nl7c_rcv_rval = 0; 7015 if (sti->sti_nl7c_uri != NULL) { 7016 nl7c_urifree(so); 7017 /* urifree() cleared nl7c_uri */ 7018 } 7019 if (sti->sti_nl7c_flags) { 7020 sti->sti_nl7c_flags = 0; 7021 } 7022 7023 ASSERT(sti->sti_ux_bound_vp == NULL); 7024 if ((mp = sti->sti_unbind_mp) != NULL) { 7025 freemsg(mp); 7026 sti->sti_unbind_mp = NULL; 7027 } 7028 } 7029 7030 /* 7031 * Destroys the TPI information attached to a sonode. 7032 */ 7033 static void 7034 sotpi_info_destroy(struct sonode *so) 7035 { 7036 sotpi_info_t *sti = SOTOTPI(so); 7037 7038 i_sotpi_info_destructor(sti); 7039 kmem_free(sti, sizeof (*sti)); 7040 7041 so->so_priv = NULL; 7042 } 7043 7044 /* 7045 * Create the global sotpi socket module entry. It will never be freed. 7046 */ 7047 smod_info_t * 7048 sotpi_smod_create(void) 7049 { 7050 smod_info_t *smodp; 7051 7052 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 7053 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 7054 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 7055 /* 7056 * Initialize the smod_refcnt to 1 so it will never be freed. 7057 */ 7058 smodp->smod_refcnt = 1; 7059 smodp->smod_uc_version = SOCK_UC_VERSION; 7060 smodp->smod_dc_version = SOCK_DC_VERSION; 7061 smodp->smod_sock_create_func = &sotpi_create; 7062 smodp->smod_sock_destroy_func = &sotpi_destroy; 7063 return (smodp); 7064 } 7065