1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/kmem_impl.h> 36 #include <sys/sysmacros.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/open.h> 44 #include <sys/user.h> 45 #include <sys/termios.h> 46 #include <sys/stream.h> 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/suntpi.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <sys/sodirect.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <inet/kssl/ksslapi.h> 85 86 #include <fs/sockfs/sockcommon.h> 87 #include <fs/sockfs/socktpi.h> 88 #include <fs/sockfs/socktpi_impl.h> 89 90 /* 91 * Possible failures when memory can't be allocated. The documented behavior: 92 * 93 * 5.5: 4.X: XNET: 94 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 95 * EINTR 96 * (4.X does not document EINTR but returns it) 97 * bind: ENOSR - ENOBUFS/ENOSR 98 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 99 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 100 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 101 * (4.X getpeername and getsockname do not fail in practice) 102 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 103 * listen: - - ENOBUFS 104 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 107 * EINTR 108 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 109 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 110 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 111 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 112 * 113 * Resolution. When allocation fails: 114 * recv: return EINTR 115 * send: return EINTR 116 * connect, accept: EINTR 117 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 118 * socket, socketpair: ENOBUFS 119 * getpeername, getsockname: sleep 120 * getsockopt, setsockopt: sleep 121 */ 122 123 #ifdef SOCK_TEST 124 /* 125 * Variables that make sockfs do something other than the standard TPI 126 * for the AF_INET transports. 127 * 128 * solisten_tpi_tcp: 129 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 130 * the transport is already bound. This is needed to avoid loosing the 131 * port number should listen() do a T_UNBIND_REQ followed by a 132 * O_T_BIND_REQ. 133 * 134 * soconnect_tpi_udp: 135 * UDP and ICMP can handle a T_CONN_REQ. 136 * This is needed to make the sequence of connect(), getsockname() 137 * return the local IP address used to send packets to the connected to 138 * destination. 139 * 140 * soconnect_tpi_tcp: 141 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 142 * Set this to non-zero to send TPI conformant messages to TCP in this 143 * respect. This is a performance optimization. 144 * 145 * soaccept_tpi_tcp: 146 * TCP can handle a T_CONN_REQ without the acceptor being bound. 147 * This is a performance optimization that has been picked up in XTI. 148 * 149 * soaccept_tpi_multioptions: 150 * When inheriting SOL_SOCKET options from the listener to the accepting 151 * socket send them as a single message for AF_INET{,6}. 152 */ 153 int solisten_tpi_tcp = 0; 154 int soconnect_tpi_udp = 0; 155 int soconnect_tpi_tcp = 0; 156 int soaccept_tpi_tcp = 0; 157 int soaccept_tpi_multioptions = 1; 158 #else /* SOCK_TEST */ 159 #define soconnect_tpi_tcp 0 160 #define soconnect_tpi_udp 0 161 #define solisten_tpi_tcp 0 162 #define soaccept_tpi_tcp 0 163 #define soaccept_tpi_multioptions 1 164 #endif /* SOCK_TEST */ 165 166 #ifdef SOCK_TEST 167 extern int do_useracc; 168 extern clock_t sock_test_timelimit; 169 #endif /* SOCK_TEST */ 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 /* Sockets acting as an in-kernel SSL proxy */ 203 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 204 strsigset_t *, strsigset_t *, strpollset_t *); 205 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 206 strsigset_t *, strsigset_t *, strpollset_t *); 207 208 static int sotpi_unbind(struct sonode *, int); 209 210 extern int sodput(sodirect_t *, mblk_t *); 211 extern void sodwakeup(sodirect_t *); 212 213 /* TPI sockfs sonode operations */ 214 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 215 int); 216 static int sotpi_accept(struct sonode *, int, struct cred *, 217 struct sonode **); 218 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 219 int, struct cred *); 220 static int sotpi_listen(struct sonode *, int, struct cred *); 221 static int sotpi_connect(struct sonode *, const struct sockaddr *, 222 socklen_t, int, int, struct cred *); 223 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 224 struct uio *, struct cred *); 225 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 226 struct uio *, struct cred *); 227 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 228 struct cred *, mblk_t **); 229 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 230 struct uio *, void *, t_uscalar_t, int); 231 static int sodgram_direct(struct sonode *, struct sockaddr *, 232 socklen_t, struct uio *, int); 233 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 234 socklen_t *, boolean_t, struct cred *); 235 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 236 socklen_t *, struct cred *); 237 static int sotpi_shutdown(struct sonode *, int, struct cred *); 238 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 239 socklen_t *, int, struct cred *); 240 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 241 socklen_t, struct cred *); 242 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 243 int32_t *); 244 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 245 struct cred *, int32_t *); 246 static int sotpi_poll(struct sonode *, short, int, short *, 247 struct pollhead **); 248 static int sotpi_close(struct sonode *, int, struct cred *); 249 250 static int i_sotpi_info_constructor(sotpi_info_t *); 251 static void i_sotpi_info_destructor(sotpi_info_t *); 252 253 sonodeops_t sotpi_sonodeops = { 254 sotpi_init, /* sop_init */ 255 sotpi_accept, /* sop_accept */ 256 sotpi_bind, /* sop_bind */ 257 sotpi_listen, /* sop_listen */ 258 sotpi_connect, /* sop_connect */ 259 sotpi_recvmsg, /* sop_recvmsg */ 260 sotpi_sendmsg, /* sop_sendmsg */ 261 sotpi_sendmblk, /* sop_sendmblk */ 262 sotpi_getpeername, /* sop_getpeername */ 263 sotpi_getsockname, /* sop_getsockname */ 264 sotpi_shutdown, /* sop_shutdown */ 265 sotpi_getsockopt, /* sop_getsockopt */ 266 sotpi_setsockopt, /* sop_setsockopt */ 267 sotpi_ioctl, /* sop_ioctl */ 268 sotpi_poll, /* sop_poll */ 269 sotpi_close, /* sop_close */ 270 }; 271 272 /* 273 * Return a TPI socket vnode. 274 * 275 * Note that sockets assume that the driver will clone (either itself 276 * or by using the clone driver) i.e. a socket() call will always 277 * result in a new vnode being created. 278 */ 279 280 /* 281 * Common create code for socket and accept. If tso is set the values 282 * from that node is used instead of issuing a T_INFO_REQ. 283 */ 284 285 /* ARGSUSED */ 286 static struct sonode * 287 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 288 int version, int sflags, int *errorp, cred_t *cr) 289 { 290 struct sonode *so; 291 kmem_cache_t *cp; 292 int sfamily = family; 293 294 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 295 296 if (family == AF_NCA) { 297 /* 298 * The request is for an NCA socket so for NL7C use the 299 * INET domain instead and mark NL7C_AF_NCA below. 300 */ 301 family = AF_INET; 302 /* 303 * NL7C is not supported in the non-global zone, 304 * we enforce this restriction here. 305 */ 306 if (getzoneid() != GLOBAL_ZONEID) { 307 *errorp = ENOTSUP; 308 return (NULL); 309 } 310 } 311 312 /* 313 * to be compatible with old tpi socket implementation ignore 314 * sleep flag (sflags) passed in 315 */ 316 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 317 so = kmem_cache_alloc(cp, KM_SLEEP); 318 if (so == NULL) { 319 *errorp = ENOMEM; 320 return (NULL); 321 } 322 323 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 324 sotpi_info_init(so); 325 326 if (sfamily == AF_NCA) { 327 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 328 } 329 330 if (version == SOV_DEFAULT) 331 version = so_default_version; 332 333 so->so_version = (short)version; 334 *errorp = 0; 335 336 return (so); 337 } 338 339 static void 340 sotpi_destroy(struct sonode *so) 341 { 342 kmem_cache_t *cp; 343 struct sockparams *origsp; 344 345 /* 346 * If there is a new dealloc function (ie. smod_destroy_func), 347 * then it should check the correctness of the ops. 348 */ 349 350 ASSERT(so->so_ops == &sotpi_sonodeops); 351 352 origsp = SOTOTPI(so)->sti_orig_sp; 353 354 sotpi_info_fini(so); 355 356 if (so->so_state & SS_FALLBACK_COMP) { 357 /* 358 * A fallback happend, which means that a sotpi_info_t struct 359 * was allocated (as opposed to being allocated from the TPI 360 * sonode cache. Therefore we explicitly free the struct 361 * here. 362 */ 363 sotpi_info_destroy(so); 364 ASSERT(origsp != NULL); 365 366 origsp->sp_smod_info->smod_sock_destroy_func(so); 367 SOCKPARAMS_DEC_REF(origsp); 368 } else { 369 sonode_fini(so); 370 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 371 socktpi_cache; 372 kmem_cache_free(cp, so); 373 } 374 } 375 376 /* ARGSUSED1 */ 377 int 378 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 379 { 380 major_t maj; 381 dev_t newdev; 382 struct vnode *vp; 383 int error = 0; 384 struct stdata *stp; 385 386 sotpi_info_t *sti = SOTOTPI(so); 387 388 dprint(1, ("sotpi_init()\n")); 389 390 /* 391 * over write the sleep flag passed in but that is ok 392 * as tpi socket does not honor sleep flag. 393 */ 394 flags |= FREAD|FWRITE; 395 396 /* 397 * Record in so_flag that it is a clone. 398 */ 399 if (getmajor(sti->sti_dev) == clone_major) 400 so->so_flag |= SOCLONE; 401 402 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 403 (so->so_family == AF_INET || so->so_family == AF_INET6) && 404 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 405 so->so_protocol == IPPROTO_IP)) { 406 /* Tell tcp or udp that it's talking to sockets */ 407 flags |= SO_SOCKSTR; 408 409 /* 410 * Here we indicate to socktpi_open() our attempt to 411 * make direct calls between sockfs and transport. 412 * The final decision is left to socktpi_open(). 413 */ 414 sti->sti_direct = 1; 415 416 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 417 if (so->so_type == SOCK_STREAM && tso != NULL) { 418 if (SOTOTPI(tso)->sti_direct) { 419 /* 420 * Inherit sti_direct from listener and pass 421 * SO_ACCEPTOR open flag to tcp, indicating 422 * that this is an accept fast-path instance. 423 */ 424 flags |= SO_ACCEPTOR; 425 } else { 426 /* 427 * sti_direct is not set on listener, meaning 428 * that the listener has been converted from 429 * a socket to a stream. Ensure that the 430 * acceptor inherits these settings. 431 */ 432 sti->sti_direct = 0; 433 flags &= ~SO_SOCKSTR; 434 } 435 } 436 } 437 438 /* 439 * Tell local transport that it is talking to sockets. 440 */ 441 if (so->so_family == AF_UNIX) { 442 flags |= SO_SOCKSTR; 443 } 444 445 vp = SOTOV(so); 446 newdev = vp->v_rdev; 447 maj = getmajor(newdev); 448 ASSERT(STREAMSTAB(maj)); 449 450 error = stropen(vp, &newdev, flags, cr); 451 452 stp = vp->v_stream; 453 if (error == 0) { 454 if (so->so_flag & SOCLONE) 455 ASSERT(newdev != vp->v_rdev); 456 mutex_enter(&so->so_lock); 457 sti->sti_dev = newdev; 458 vp->v_rdev = newdev; 459 mutex_exit(&so->so_lock); 460 461 if (stp->sd_flag & STRISTTY) { 462 /* 463 * this is a post SVR4 tty driver - a socket can not 464 * be a controlling terminal. Fail the open. 465 */ 466 (void) sotpi_close(so, flags, cr); 467 return (ENOTTY); /* XXX */ 468 } 469 470 ASSERT(stp->sd_wrq != NULL); 471 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 472 473 /* 474 * If caller is interested in doing direct function call 475 * interface to/from transport module, probe the module 476 * directly beneath the streamhead to see if it qualifies. 477 * 478 * We turn off the direct interface when qualifications fail. 479 * In the acceptor case, we simply turn off the sti_direct 480 * flag on the socket. We do the fallback after the accept 481 * has completed, before the new socket is returned to the 482 * application. 483 */ 484 if (sti->sti_direct) { 485 queue_t *tq = stp->sd_wrq->q_next; 486 487 /* 488 * sti_direct is currently supported and tested 489 * only for tcp/udp; this is the main reason to 490 * have the following assertions. 491 */ 492 ASSERT(so->so_family == AF_INET || 493 so->so_family == AF_INET6); 494 ASSERT(so->so_protocol == IPPROTO_UDP || 495 so->so_protocol == IPPROTO_TCP || 496 so->so_protocol == IPPROTO_IP); 497 ASSERT(so->so_type == SOCK_DGRAM || 498 so->so_type == SOCK_STREAM); 499 500 /* 501 * Abort direct call interface if the module directly 502 * underneath the stream head is not defined with the 503 * _D_DIRECT flag. This could happen in the tcp or 504 * udp case, when some other module is autopushed 505 * above it, or for some reasons the expected module 506 * isn't purely D_MP (which is the main requirement). 507 * 508 * Else, SS_DIRECT is valid. If the read-side Q has 509 * _QSODIRECT set then and uioasync is enabled then 510 * set SS_SODIRECT to enable sodirect. 511 */ 512 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 513 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 514 int rval; 515 516 /* Continue on without direct calls */ 517 sti->sti_direct = 0; 518 519 /* 520 * Cannot issue ioctl on fallback socket since 521 * there is no conn associated with the queue. 522 * The fallback downcall will notify the proto 523 * of the change. 524 */ 525 if (!(flags & SO_ACCEPTOR) && 526 !(flags & SO_FALLBACK)) { 527 if ((error = strioctl(vp, 528 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 529 cr, &rval)) != 0) { 530 (void) sotpi_close(so, flags, 531 cr); 532 return (error); 533 } 534 } 535 } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) && 536 uioasync.enabled) { 537 /* Enable sodirect */ 538 so->so_state |= SS_SODIRECT; 539 } 540 } 541 542 if (flags & SO_FALLBACK) { 543 /* 544 * The stream created does not have a conn. 545 * do stream set up after conn has been assigned 546 */ 547 return (error); 548 } 549 if (error = so_strinit(so, tso)) { 550 (void) sotpi_close(so, flags, cr); 551 return (error); 552 } 553 554 /* Wildcard */ 555 if (so->so_protocol != so->so_sockparams->sp_protocol) { 556 int protocol = so->so_protocol; 557 /* 558 * Issue SO_PROTOTYPE setsockopt. 559 */ 560 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 561 &protocol, (t_uscalar_t)sizeof (protocol), cr); 562 if (error != 0) { 563 (void) sotpi_close(so, flags, cr); 564 /* 565 * Setsockopt often fails with ENOPROTOOPT but 566 * socket() should fail with 567 * EPROTONOSUPPORT/EPROTOTYPE. 568 */ 569 return (EPROTONOSUPPORT); 570 } 571 } 572 573 } else { 574 /* 575 * While the same socket can not be reopened (unlike specfs) 576 * the stream head sets STREOPENFAIL when the autopush fails. 577 */ 578 if ((stp != NULL) && 579 (stp->sd_flag & STREOPENFAIL)) { 580 /* 581 * Open failed part way through. 582 */ 583 mutex_enter(&stp->sd_lock); 584 stp->sd_flag &= ~STREOPENFAIL; 585 mutex_exit(&stp->sd_lock); 586 (void) sotpi_close(so, flags, cr); 587 return (error); 588 /*NOTREACHED*/ 589 } 590 ASSERT(stp == NULL); 591 } 592 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 593 "sockfs open:maj %d vp %p so %p error %d", 594 maj, vp, so, error); 595 return (error); 596 } 597 598 /* 599 * Bind the socket to an unspecified address in sockfs only. 600 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 601 * required in all cases. 602 */ 603 static void 604 so_automatic_bind(struct sonode *so) 605 { 606 sotpi_info_t *sti = SOTOTPI(so); 607 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 608 609 ASSERT(MUTEX_HELD(&so->so_lock)); 610 ASSERT(!(so->so_state & SS_ISBOUND)); 611 ASSERT(sti->sti_unbind_mp); 612 613 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 614 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 615 sti->sti_laddr_sa->sa_family = so->so_family; 616 so->so_state |= SS_ISBOUND; 617 } 618 619 620 /* 621 * bind the socket. 622 * 623 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 624 * are passed in we allow rebinding. Note that for backwards compatibility 625 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 626 * Thus the rebinding code is currently not executed. 627 * 628 * The constraints for rebinding are: 629 * - it is a SOCK_DGRAM, or 630 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 631 * and no listen() has been done. 632 * This rebinding code was added based on some language in the XNET book 633 * about not returning EINVAL it the protocol allows rebinding. However, 634 * this language is not present in the Posix socket draft. Thus maybe the 635 * rebinding logic should be deleted from the source. 636 * 637 * A null "name" can be used to unbind the socket if: 638 * - it is a SOCK_DGRAM, or 639 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 640 * and no listen() has been done. 641 */ 642 /* ARGSUSED */ 643 static int 644 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 645 socklen_t namelen, int backlog, int flags, struct cred *cr) 646 { 647 struct T_bind_req bind_req; 648 struct T_bind_ack *bind_ack; 649 int error = 0; 650 mblk_t *mp; 651 void *addr; 652 t_uscalar_t addrlen; 653 int unbind_on_err = 1; 654 boolean_t clear_acceptconn_on_err = B_FALSE; 655 boolean_t restore_backlog_on_err = B_FALSE; 656 int save_so_backlog; 657 t_scalar_t PRIM_type = O_T_BIND_REQ; 658 boolean_t tcp_udp_xport; 659 void *nl7c = NULL; 660 sotpi_info_t *sti = SOTOTPI(so); 661 662 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 663 (void *)so, (void *)name, namelen, backlog, flags, 664 pr_state(so->so_state, so->so_mode))); 665 666 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 667 668 if (!(flags & _SOBIND_LOCK_HELD)) { 669 mutex_enter(&so->so_lock); 670 so_lock_single(so); /* Set SOLOCKED */ 671 } else { 672 ASSERT(MUTEX_HELD(&so->so_lock)); 673 ASSERT(so->so_flag & SOLOCKED); 674 } 675 676 /* 677 * Make sure that there is a preallocated unbind_req message 678 * before binding. This message allocated when the socket is 679 * created but it might be have been consumed. 680 */ 681 if (sti->sti_unbind_mp == NULL) { 682 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 683 /* NOTE: holding so_lock while sleeping */ 684 sti->sti_unbind_mp = 685 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 686 } 687 688 if (flags & _SOBIND_REBIND) { 689 /* 690 * Called from solisten after doing an sotpi_unbind() or 691 * potentially without the unbind (latter for AF_INET{,6}). 692 */ 693 ASSERT(name == NULL && namelen == 0); 694 695 if (so->so_family == AF_UNIX) { 696 ASSERT(sti->sti_ux_bound_vp); 697 addr = &sti->sti_ux_laddr; 698 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 699 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 700 "addr 0x%p, vp %p\n", 701 addrlen, 702 (void *)((struct so_ux_addr *)addr)->soua_vp, 703 (void *)sti->sti_ux_bound_vp)); 704 } else { 705 addr = sti->sti_laddr_sa; 706 addrlen = (t_uscalar_t)sti->sti_laddr_len; 707 } 708 } else if (flags & _SOBIND_UNSPEC) { 709 ASSERT(name == NULL && namelen == 0); 710 711 /* 712 * The caller checked SS_ISBOUND but not necessarily 713 * under so_lock 714 */ 715 if (so->so_state & SS_ISBOUND) { 716 /* No error */ 717 goto done; 718 } 719 720 /* Set an initial local address */ 721 switch (so->so_family) { 722 case AF_UNIX: 723 /* 724 * Use an address with same size as struct sockaddr 725 * just like BSD. 726 */ 727 sti->sti_laddr_len = 728 (socklen_t)sizeof (struct sockaddr); 729 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 730 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 731 sti->sti_laddr_sa->sa_family = so->so_family; 732 733 /* 734 * Pass down an address with the implicit bind 735 * magic number and the rest all zeros. 736 * The transport will return a unique address. 737 */ 738 sti->sti_ux_laddr.soua_vp = NULL; 739 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 740 addr = &sti->sti_ux_laddr; 741 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 742 break; 743 744 case AF_INET: 745 case AF_INET6: 746 /* 747 * An unspecified bind in TPI has a NULL address. 748 * Set the address in sockfs to have the sa_family. 749 */ 750 sti->sti_laddr_len = (so->so_family == AF_INET) ? 751 (socklen_t)sizeof (sin_t) : 752 (socklen_t)sizeof (sin6_t); 753 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 754 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 755 sti->sti_laddr_sa->sa_family = so->so_family; 756 addr = NULL; 757 addrlen = 0; 758 break; 759 760 default: 761 /* 762 * An unspecified bind in TPI has a NULL address. 763 * Set the address in sockfs to be zero length. 764 * 765 * Can not assume there is a sa_family for all 766 * protocol families. For example, AF_X25 does not 767 * have a family field. 768 */ 769 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 770 sti->sti_laddr_len = 0; /* XXX correct? */ 771 addr = NULL; 772 addrlen = 0; 773 break; 774 } 775 776 } else { 777 if (so->so_state & SS_ISBOUND) { 778 /* 779 * If it is ok to rebind the socket, first unbind 780 * with the transport. A rebind to the NULL address 781 * is interpreted as an unbind. 782 * Note that a bind to NULL in BSD does unbind the 783 * socket but it fails with EINVAL. 784 * Note that regular sockets set SOV_SOCKBSD i.e. 785 * _SOBIND_SOCKBSD gets set here hence no type of 786 * socket does currently allow rebinding. 787 * 788 * If the name is NULL just do an unbind. 789 */ 790 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 791 name != NULL) { 792 error = EINVAL; 793 unbind_on_err = 0; 794 eprintsoline(so, error); 795 goto done; 796 } 797 if ((so->so_mode & SM_CONNREQUIRED) && 798 (so->so_state & SS_CANTREBIND)) { 799 error = EINVAL; 800 unbind_on_err = 0; 801 eprintsoline(so, error); 802 goto done; 803 } 804 error = sotpi_unbind(so, 0); 805 if (error) { 806 eprintsoline(so, error); 807 goto done; 808 } 809 ASSERT(!(so->so_state & SS_ISBOUND)); 810 if (name == NULL) { 811 so->so_state &= 812 ~(SS_ISCONNECTED|SS_ISCONNECTING); 813 goto done; 814 } 815 } 816 817 /* X/Open requires this check */ 818 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 819 if (xnet_check_print) { 820 printf("sockfs: X/Open bind state check " 821 "caused EINVAL\n"); 822 } 823 error = EINVAL; 824 goto done; 825 } 826 827 switch (so->so_family) { 828 case AF_UNIX: 829 /* 830 * All AF_UNIX addresses are nul terminated 831 * when copied (copyin_name) in so the minimum 832 * length is 3 bytes. 833 */ 834 if (name == NULL || 835 (ssize_t)namelen <= sizeof (short) + 1) { 836 error = EISDIR; 837 eprintsoline(so, error); 838 goto done; 839 } 840 /* 841 * Verify so_family matches the bound family. 842 * BSD does not check this for AF_UNIX resulting 843 * in funny mknods. 844 */ 845 if (name->sa_family != so->so_family) { 846 error = EAFNOSUPPORT; 847 goto done; 848 } 849 break; 850 case AF_INET: 851 if (name == NULL) { 852 error = EINVAL; 853 eprintsoline(so, error); 854 goto done; 855 } 856 if ((size_t)namelen != sizeof (sin_t)) { 857 error = name->sa_family != so->so_family ? 858 EAFNOSUPPORT : EINVAL; 859 eprintsoline(so, error); 860 goto done; 861 } 862 if ((flags & _SOBIND_XPG4_2) && 863 (name->sa_family != so->so_family)) { 864 /* 865 * This check has to be made for X/Open 866 * sockets however application failures have 867 * been observed when it is applied to 868 * all sockets. 869 */ 870 error = EAFNOSUPPORT; 871 eprintsoline(so, error); 872 goto done; 873 } 874 /* 875 * Force a zero sa_family to match so_family. 876 * 877 * Some programs like inetd(1M) don't set the 878 * family field. Other programs leave 879 * sin_family set to garbage - SunOS 4.X does 880 * not check the family field on a bind. 881 * We use the family field that 882 * was passed in to the socket() call. 883 */ 884 name->sa_family = so->so_family; 885 break; 886 887 case AF_INET6: { 888 #ifdef DEBUG 889 sin6_t *sin6 = (sin6_t *)name; 890 #endif /* DEBUG */ 891 892 if (name == NULL) { 893 error = EINVAL; 894 eprintsoline(so, error); 895 goto done; 896 } 897 if ((size_t)namelen != sizeof (sin6_t)) { 898 error = name->sa_family != so->so_family ? 899 EAFNOSUPPORT : EINVAL; 900 eprintsoline(so, error); 901 goto done; 902 } 903 if (name->sa_family != so->so_family) { 904 /* 905 * With IPv6 we require the family to match 906 * unlike in IPv4. 907 */ 908 error = EAFNOSUPPORT; 909 eprintsoline(so, error); 910 goto done; 911 } 912 #ifdef DEBUG 913 /* 914 * Verify that apps don't forget to clear 915 * sin6_scope_id etc 916 */ 917 if (sin6->sin6_scope_id != 0 && 918 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 919 zcmn_err(getzoneid(), CE_WARN, 920 "bind with uninitialized sin6_scope_id " 921 "(%d) on socket. Pid = %d\n", 922 (int)sin6->sin6_scope_id, 923 (int)curproc->p_pid); 924 } 925 if (sin6->__sin6_src_id != 0) { 926 zcmn_err(getzoneid(), CE_WARN, 927 "bind with uninitialized __sin6_src_id " 928 "(%d) on socket. Pid = %d\n", 929 (int)sin6->__sin6_src_id, 930 (int)curproc->p_pid); 931 } 932 #endif /* DEBUG */ 933 break; 934 } 935 default: 936 /* 937 * Don't do any length or sa_family check to allow 938 * non-sockaddr style addresses. 939 */ 940 if (name == NULL) { 941 error = EINVAL; 942 eprintsoline(so, error); 943 goto done; 944 } 945 break; 946 } 947 948 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 949 error = ENAMETOOLONG; 950 eprintsoline(so, error); 951 goto done; 952 } 953 /* 954 * Save local address. 955 */ 956 sti->sti_laddr_len = (socklen_t)namelen; 957 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 958 bcopy(name, sti->sti_laddr_sa, namelen); 959 960 addr = sti->sti_laddr_sa; 961 addrlen = (t_uscalar_t)sti->sti_laddr_len; 962 switch (so->so_family) { 963 case AF_INET6: 964 case AF_INET: 965 break; 966 case AF_UNIX: { 967 struct sockaddr_un *soun = 968 (struct sockaddr_un *)sti->sti_laddr_sa; 969 struct vnode *vp, *rvp; 970 struct vattr vattr; 971 972 ASSERT(sti->sti_ux_bound_vp == NULL); 973 /* 974 * Create vnode for the specified path name. 975 * Keep vnode held with a reference in sti_ux_bound_vp. 976 * Use the vnode pointer as the address used in the 977 * bind with the transport. 978 * 979 * Use the same mode as in BSD. In particular this does 980 * not observe the umask. 981 */ 982 /* MAXPATHLEN + soun_family + nul termination */ 983 if (sti->sti_laddr_len > 984 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 985 error = ENAMETOOLONG; 986 eprintsoline(so, error); 987 goto done; 988 } 989 vattr.va_type = VSOCK; 990 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 991 vattr.va_mask = AT_TYPE|AT_MODE; 992 /* NOTE: holding so_lock */ 993 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 994 EXCL, 0, &vp, CRMKNOD, 0, 0); 995 if (error) { 996 if (error == EEXIST) 997 error = EADDRINUSE; 998 eprintsoline(so, error); 999 goto done; 1000 } 1001 /* 1002 * Establish pointer from the underlying filesystem 1003 * vnode to the socket node. 1004 * sti_ux_bound_vp and v_stream->sd_vnode form the 1005 * cross-linkage between the underlying filesystem 1006 * node and the socket node. 1007 */ 1008 1009 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 1010 VN_HOLD(rvp); 1011 VN_RELE(vp); 1012 vp = rvp; 1013 } 1014 1015 ASSERT(SOTOV(so)->v_stream); 1016 mutex_enter(&vp->v_lock); 1017 vp->v_stream = SOTOV(so)->v_stream; 1018 sti->sti_ux_bound_vp = vp; 1019 mutex_exit(&vp->v_lock); 1020 1021 /* 1022 * Use the vnode pointer value as a unique address 1023 * (together with the magic number to avoid conflicts 1024 * with implicit binds) in the transport provider. 1025 */ 1026 sti->sti_ux_laddr.soua_vp = 1027 (void *)sti->sti_ux_bound_vp; 1028 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1029 addr = &sti->sti_ux_laddr; 1030 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1031 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1032 addrlen, 1033 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1034 break; 1035 } 1036 } /* end switch (so->so_family) */ 1037 } 1038 1039 /* 1040 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1041 * the transport can start passing up T_CONN_IND messages 1042 * as soon as it receives the bind req and strsock_proto() 1043 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1044 */ 1045 if (flags & _SOBIND_LISTEN) { 1046 if ((so->so_state & SS_ACCEPTCONN) == 0) 1047 clear_acceptconn_on_err = B_TRUE; 1048 save_so_backlog = so->so_backlog; 1049 restore_backlog_on_err = B_TRUE; 1050 so->so_state |= SS_ACCEPTCONN; 1051 so->so_backlog = backlog; 1052 } 1053 1054 /* 1055 * If NL7C addr(s) have been configured check for addr/port match, 1056 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1057 * 1058 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1059 * family sockets only. If match mark as such. 1060 */ 1061 if (nl7c_enabled && ((addr != NULL && 1062 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1063 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1064 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1065 /* 1066 * NL7C is not supported in non-global zones, 1067 * we enforce this restriction here. 1068 */ 1069 if (so->so_zoneid == GLOBAL_ZONEID) { 1070 /* An NL7C socket, mark it */ 1071 sti->sti_nl7c_flags |= NL7C_ENABLED; 1072 if (nl7c == NULL) { 1073 /* 1074 * Was an AF_NCA bind() so add it to the 1075 * addr list for reporting purposes. 1076 */ 1077 nl7c = nl7c_add_addr(addr, addrlen); 1078 } 1079 } else 1080 nl7c = NULL; 1081 } 1082 1083 /* 1084 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1085 * for other transports we will send in a O_T_BIND_REQ. 1086 */ 1087 if (tcp_udp_xport && 1088 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1089 PRIM_type = T_BIND_REQ; 1090 1091 bind_req.PRIM_type = PRIM_type; 1092 bind_req.ADDR_length = addrlen; 1093 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1094 bind_req.CONIND_number = backlog; 1095 /* NOTE: holding so_lock while sleeping */ 1096 mp = soallocproto2(&bind_req, sizeof (bind_req), 1097 addr, addrlen, 0, _ALLOC_SLEEP); 1098 sti->sti_laddr_valid = 0; 1099 1100 /* Done using sti_laddr_sa - can drop the lock */ 1101 mutex_exit(&so->so_lock); 1102 1103 /* 1104 * Intercept the bind_req message here to check if this <address/port> 1105 * was configured as an SSL proxy server, or if another endpoint was 1106 * already configured to act as a proxy for us. 1107 * 1108 * Note, only if NL7C not enabled for this socket. 1109 */ 1110 if (nl7c == NULL && 1111 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1112 so->so_type == SOCK_STREAM) { 1113 1114 if (sti->sti_kssl_ent != NULL) { 1115 kssl_release_ent(sti->sti_kssl_ent, so, 1116 sti->sti_kssl_type); 1117 sti->sti_kssl_ent = NULL; 1118 } 1119 1120 sti->sti_kssl_type = kssl_check_proxy(mp, so, 1121 &sti->sti_kssl_ent); 1122 switch (sti->sti_kssl_type) { 1123 case KSSL_NO_PROXY: 1124 break; 1125 1126 case KSSL_HAS_PROXY: 1127 mutex_enter(&so->so_lock); 1128 goto skip_transport; 1129 1130 case KSSL_IS_PROXY: 1131 break; 1132 } 1133 } 1134 1135 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1136 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1137 if (error) { 1138 eprintsoline(so, error); 1139 mutex_enter(&so->so_lock); 1140 goto done; 1141 } 1142 1143 mutex_enter(&so->so_lock); 1144 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1145 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1146 if (error) { 1147 eprintsoline(so, error); 1148 goto done; 1149 } 1150 skip_transport: 1151 ASSERT(mp); 1152 /* 1153 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1154 * strsock_proto while the lock was dropped above, the bind 1155 * is allowed to complete. 1156 */ 1157 1158 /* Mark as bound. This will be undone if we detect errors below. */ 1159 if (flags & _SOBIND_NOXLATE) { 1160 ASSERT(so->so_family == AF_UNIX); 1161 sti->sti_faddr_noxlate = 1; 1162 } 1163 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1164 so->so_state |= SS_ISBOUND; 1165 ASSERT(sti->sti_unbind_mp); 1166 1167 /* note that we've already set SS_ACCEPTCONN above */ 1168 1169 /* 1170 * Recompute addrlen - an unspecied bind sent down an 1171 * address of length zero but we expect the appropriate length 1172 * in return. 1173 */ 1174 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1175 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1176 1177 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1178 /* 1179 * The alignment restriction is really too strict but 1180 * we want enough alignment to inspect the fields of 1181 * a sockaddr_in. 1182 */ 1183 addr = sogetoff(mp, bind_ack->ADDR_offset, 1184 bind_ack->ADDR_length, 1185 __TPI_ALIGN_SIZE); 1186 if (addr == NULL) { 1187 freemsg(mp); 1188 error = EPROTO; 1189 eprintsoline(so, error); 1190 goto done; 1191 } 1192 if (!(flags & _SOBIND_UNSPEC)) { 1193 /* 1194 * Verify that the transport didn't return something we 1195 * did not want e.g. an address other than what we asked for. 1196 * 1197 * NOTE: These checks would go away if/when we switch to 1198 * using the new TPI (in which the transport would fail 1199 * the request instead of assigning a different address). 1200 * 1201 * NOTE2: For protocols that we don't know (i.e. any 1202 * other than AF_INET6, AF_INET and AF_UNIX), we 1203 * cannot know if the transport should be expected to 1204 * return the same address as that requested. 1205 * 1206 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1207 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1208 * 1209 * For example, in the case of netatalk it may be 1210 * inappropriate for the transport to return the 1211 * requested address (as it may have allocated a local 1212 * port number in behaviour similar to that of an 1213 * AF_INET bind request with a port number of zero). 1214 * 1215 * Given the definition of O_T_BIND_REQ, where the 1216 * transport may bind to an address other than the 1217 * requested address, it's not possible to determine 1218 * whether a returned address that differs from the 1219 * requested address is a reason to fail (because the 1220 * requested address was not available) or succeed 1221 * (because the transport allocated an appropriate 1222 * address and/or port). 1223 * 1224 * sockfs currently requires that the transport return 1225 * the requested address in the T_BIND_ACK, unless 1226 * there is code here to allow for any discrepancy. 1227 * Such code exists for AF_INET and AF_INET6. 1228 * 1229 * Netatalk chooses to return the requested address 1230 * rather than the (correct) allocated address. This 1231 * means that netatalk violates the TPI specification 1232 * (and would not function correctly if used from a 1233 * TLI application), but it does mean that it works 1234 * with sockfs. 1235 * 1236 * As noted above, using the newer XTI bind primitive 1237 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1238 * allow sockfs to be more sure about whether or not 1239 * the bind request had succeeded (as transports are 1240 * not permitted to bind to a different address than 1241 * that requested - they must return failure). 1242 * Unfortunately, support for T_BIND_REQ may not be 1243 * present in all transport implementations (netatalk, 1244 * for example, doesn't have it), making the 1245 * transition difficult. 1246 */ 1247 if (bind_ack->ADDR_length != addrlen) { 1248 /* Assumes that the requested address was in use */ 1249 freemsg(mp); 1250 error = EADDRINUSE; 1251 eprintsoline(so, error); 1252 goto done; 1253 } 1254 1255 switch (so->so_family) { 1256 case AF_INET6: 1257 case AF_INET: { 1258 sin_t *rname, *aname; 1259 1260 rname = (sin_t *)addr; 1261 aname = (sin_t *)sti->sti_laddr_sa; 1262 1263 /* 1264 * Take advantage of the alignment 1265 * of sin_port and sin6_port which fall 1266 * in the same place in their data structures. 1267 * Just use sin_port for either address family. 1268 * 1269 * This may become a problem if (heaven forbid) 1270 * there's a separate ipv6port_reserved... :-P 1271 * 1272 * Binding to port 0 has the semantics of letting 1273 * the transport bind to any port. 1274 * 1275 * If the transport is TCP or UDP since we had sent 1276 * a T_BIND_REQ we would not get a port other than 1277 * what we asked for. 1278 */ 1279 if (tcp_udp_xport) { 1280 /* 1281 * Pick up the new port number if we bound to 1282 * port 0. 1283 */ 1284 if (aname->sin_port == 0) 1285 aname->sin_port = rname->sin_port; 1286 sti->sti_laddr_valid = 1; 1287 break; 1288 } 1289 if (aname->sin_port != 0 && 1290 aname->sin_port != rname->sin_port) { 1291 freemsg(mp); 1292 error = EADDRINUSE; 1293 eprintsoline(so, error); 1294 goto done; 1295 } 1296 /* 1297 * Pick up the new port number if we bound to port 0. 1298 */ 1299 aname->sin_port = rname->sin_port; 1300 1301 /* 1302 * Unfortunately, addresses aren't _quite_ the same. 1303 */ 1304 if (so->so_family == AF_INET) { 1305 if (aname->sin_addr.s_addr != 1306 rname->sin_addr.s_addr) { 1307 freemsg(mp); 1308 error = EADDRNOTAVAIL; 1309 eprintsoline(so, error); 1310 goto done; 1311 } 1312 } else { 1313 sin6_t *rname6 = (sin6_t *)rname; 1314 sin6_t *aname6 = (sin6_t *)aname; 1315 1316 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1317 &rname6->sin6_addr)) { 1318 freemsg(mp); 1319 error = EADDRNOTAVAIL; 1320 eprintsoline(so, error); 1321 goto done; 1322 } 1323 } 1324 break; 1325 } 1326 case AF_UNIX: 1327 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1328 freemsg(mp); 1329 error = EADDRINUSE; 1330 eprintsoline(so, error); 1331 eprintso(so, 1332 ("addrlen %d, addr 0x%x, vp %p\n", 1333 addrlen, *((int *)addr), 1334 (void *)sti->sti_ux_bound_vp)); 1335 goto done; 1336 } 1337 sti->sti_laddr_valid = 1; 1338 break; 1339 default: 1340 /* 1341 * NOTE: This assumes that addresses can be 1342 * byte-compared for equivalence. 1343 */ 1344 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1345 freemsg(mp); 1346 error = EADDRINUSE; 1347 eprintsoline(so, error); 1348 goto done; 1349 } 1350 /* 1351 * Don't mark sti_laddr_valid, as we cannot be 1352 * sure that the returned address is the real 1353 * bound address when talking to an unknown 1354 * transport. 1355 */ 1356 break; 1357 } 1358 } else { 1359 /* 1360 * Save for returned address for getsockname. 1361 * Needed for unspecific bind unless transport supports 1362 * the TI_GETMYNAME ioctl. 1363 * Do this for AF_INET{,6} even though they do, as 1364 * caching info here is much better performance than 1365 * a TPI/STREAMS trip to the transport for getsockname. 1366 * Any which can't for some reason _must_ _not_ set 1367 * sti_laddr_valid here for the caching version of 1368 * getsockname to not break; 1369 */ 1370 switch (so->so_family) { 1371 case AF_UNIX: 1372 /* 1373 * Record the address bound with the transport 1374 * for use by socketpair. 1375 */ 1376 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1377 sti->sti_laddr_valid = 1; 1378 break; 1379 case AF_INET: 1380 case AF_INET6: 1381 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1382 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1383 sti->sti_laddr_valid = 1; 1384 break; 1385 default: 1386 /* 1387 * Don't mark sti_laddr_valid, as we cannot be 1388 * sure that the returned address is the real 1389 * bound address when talking to an unknown 1390 * transport. 1391 */ 1392 break; 1393 } 1394 } 1395 1396 if (nl7c != NULL) { 1397 /* Register listen()er sonode pointer with NL7C */ 1398 nl7c_listener_addr(nl7c, so); 1399 } 1400 1401 freemsg(mp); 1402 1403 done: 1404 if (error) { 1405 /* reset state & backlog to values held on entry */ 1406 if (clear_acceptconn_on_err == B_TRUE) 1407 so->so_state &= ~SS_ACCEPTCONN; 1408 if (restore_backlog_on_err == B_TRUE) 1409 so->so_backlog = save_so_backlog; 1410 1411 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1412 int err; 1413 1414 err = sotpi_unbind(so, 0); 1415 /* LINTED - statement has no consequent: if */ 1416 if (err) { 1417 eprintsoline(so, error); 1418 } else { 1419 ASSERT(!(so->so_state & SS_ISBOUND)); 1420 } 1421 } 1422 } 1423 if (!(flags & _SOBIND_LOCK_HELD)) { 1424 so_unlock_single(so, SOLOCKED); 1425 mutex_exit(&so->so_lock); 1426 } else { 1427 ASSERT(MUTEX_HELD(&so->so_lock)); 1428 ASSERT(so->so_flag & SOLOCKED); 1429 } 1430 return (error); 1431 } 1432 1433 /* bind the socket */ 1434 static int 1435 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1436 int flags, struct cred *cr) 1437 { 1438 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1439 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1440 1441 flags &= ~_SOBIND_SOCKETPAIR; 1442 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1443 } 1444 1445 /* 1446 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1447 * address, or when listen needs to unbind and bind. 1448 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1449 * so that a sobind can pick them up. 1450 */ 1451 static int 1452 sotpi_unbind(struct sonode *so, int flags) 1453 { 1454 struct T_unbind_req unbind_req; 1455 int error = 0; 1456 mblk_t *mp; 1457 sotpi_info_t *sti = SOTOTPI(so); 1458 1459 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1460 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1461 1462 ASSERT(MUTEX_HELD(&so->so_lock)); 1463 ASSERT(so->so_flag & SOLOCKED); 1464 1465 if (!(so->so_state & SS_ISBOUND)) { 1466 error = EINVAL; 1467 eprintsoline(so, error); 1468 goto done; 1469 } 1470 1471 mutex_exit(&so->so_lock); 1472 1473 /* 1474 * Flush the read and write side (except stream head read queue) 1475 * and send down T_UNBIND_REQ. 1476 */ 1477 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1478 1479 unbind_req.PRIM_type = T_UNBIND_REQ; 1480 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1481 0, _ALLOC_SLEEP); 1482 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1483 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1484 mutex_enter(&so->so_lock); 1485 if (error) { 1486 eprintsoline(so, error); 1487 goto done; 1488 } 1489 1490 error = sowaitokack(so, T_UNBIND_REQ); 1491 if (error) { 1492 eprintsoline(so, error); 1493 goto done; 1494 } 1495 1496 /* 1497 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1498 * strsock_proto while the lock was dropped above, the unbind 1499 * is allowed to complete. 1500 */ 1501 if (!(flags & _SOUNBIND_REBIND)) { 1502 /* 1503 * Clear out bound address. 1504 */ 1505 vnode_t *vp; 1506 1507 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1508 1509 /* Undo any SSL proxy setup */ 1510 if ((so->so_family == AF_INET || 1511 so->so_family == AF_INET6) && 1512 (so->so_type == SOCK_STREAM) && 1513 (sti->sti_kssl_ent != NULL)) { 1514 kssl_release_ent(sti->sti_kssl_ent, so, 1515 sti->sti_kssl_type); 1516 sti->sti_kssl_ent = NULL; 1517 sti->sti_kssl_type = KSSL_NO_PROXY; 1518 } 1519 sti->sti_ux_bound_vp = NULL; 1520 vn_rele_stream(vp); 1521 } 1522 /* Clear out address */ 1523 sti->sti_laddr_len = 0; 1524 } 1525 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1526 sti->sti_laddr_valid = 0; 1527 1528 done: 1529 1530 /* If the caller held the lock don't release it here */ 1531 ASSERT(MUTEX_HELD(&so->so_lock)); 1532 ASSERT(so->so_flag & SOLOCKED); 1533 1534 return (error); 1535 } 1536 1537 /* 1538 * listen on the socket. 1539 * For TPI conforming transports this has to first unbind with the transport 1540 * and then bind again using the new backlog. 1541 */ 1542 /* ARGSUSED */ 1543 int 1544 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1545 { 1546 int error = 0; 1547 sotpi_info_t *sti = SOTOTPI(so); 1548 1549 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1550 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1551 1552 if (sti->sti_serv_type == T_CLTS) 1553 return (EOPNOTSUPP); 1554 1555 /* 1556 * If the socket is ready to accept connections already, then 1557 * return without doing anything. This avoids a problem where 1558 * a second listen() call fails if a connection is pending and 1559 * leaves the socket unbound. Only when we are not unbinding 1560 * with the transport can we safely increase the backlog. 1561 */ 1562 if (so->so_state & SS_ACCEPTCONN && 1563 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1564 /*CONSTCOND*/ 1565 !solisten_tpi_tcp)) 1566 return (0); 1567 1568 if (so->so_state & SS_ISCONNECTED) 1569 return (EINVAL); 1570 1571 mutex_enter(&so->so_lock); 1572 so_lock_single(so); /* Set SOLOCKED */ 1573 1574 /* 1575 * If the listen doesn't change the backlog we do nothing. 1576 * This avoids an EPROTO error from the transport. 1577 */ 1578 if ((so->so_state & SS_ACCEPTCONN) && 1579 so->so_backlog == backlog) 1580 goto done; 1581 1582 if (!(so->so_state & SS_ISBOUND)) { 1583 /* 1584 * Must have been explicitly bound in the UNIX domain. 1585 */ 1586 if (so->so_family == AF_UNIX) { 1587 error = EINVAL; 1588 goto done; 1589 } 1590 error = sotpi_bindlisten(so, NULL, 0, backlog, 1591 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1592 } else if (backlog > 0) { 1593 /* 1594 * AF_INET{,6} hack to avoid losing the port. 1595 * Assumes that all AF_INET{,6} transports can handle a 1596 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1597 * has already bound thus it is possible to avoid the unbind. 1598 */ 1599 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1600 /*CONSTCOND*/ 1601 !solisten_tpi_tcp)) { 1602 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1603 if (error) 1604 goto done; 1605 } 1606 error = sotpi_bindlisten(so, NULL, 0, backlog, 1607 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1608 } else { 1609 so->so_state |= SS_ACCEPTCONN; 1610 so->so_backlog = backlog; 1611 } 1612 if (error) 1613 goto done; 1614 ASSERT(so->so_state & SS_ACCEPTCONN); 1615 done: 1616 so_unlock_single(so, SOLOCKED); 1617 mutex_exit(&so->so_lock); 1618 return (error); 1619 } 1620 1621 /* 1622 * Disconnect either a specified seqno or all (-1). 1623 * The former is used on listening sockets only. 1624 * 1625 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1626 * the current use of sodisconnect(seqno == -1) is only for shutdown 1627 * so there is no point (and potentially incorrect) to unbind. 1628 */ 1629 static int 1630 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1631 { 1632 struct T_discon_req discon_req; 1633 int error = 0; 1634 mblk_t *mp; 1635 1636 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1637 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1638 1639 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1640 mutex_enter(&so->so_lock); 1641 so_lock_single(so); /* Set SOLOCKED */ 1642 } else { 1643 ASSERT(MUTEX_HELD(&so->so_lock)); 1644 ASSERT(so->so_flag & SOLOCKED); 1645 } 1646 1647 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1648 error = EINVAL; 1649 eprintsoline(so, error); 1650 goto done; 1651 } 1652 1653 mutex_exit(&so->so_lock); 1654 /* 1655 * Flush the write side (unless this is a listener) 1656 * and then send down a T_DISCON_REQ. 1657 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1658 * and other messages.) 1659 */ 1660 if (!(so->so_state & SS_ACCEPTCONN)) 1661 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1662 1663 discon_req.PRIM_type = T_DISCON_REQ; 1664 discon_req.SEQ_number = seqno; 1665 mp = soallocproto1(&discon_req, sizeof (discon_req), 1666 0, _ALLOC_SLEEP); 1667 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1668 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1669 mutex_enter(&so->so_lock); 1670 if (error) { 1671 eprintsoline(so, error); 1672 goto done; 1673 } 1674 1675 error = sowaitokack(so, T_DISCON_REQ); 1676 if (error) { 1677 eprintsoline(so, error); 1678 goto done; 1679 } 1680 /* 1681 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1682 * strsock_proto while the lock was dropped above, the disconnect 1683 * is allowed to complete. However, it is not possible to 1684 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1685 */ 1686 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1687 SOTOTPI(so)->sti_laddr_valid = 0; 1688 SOTOTPI(so)->sti_faddr_valid = 0; 1689 done: 1690 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1691 so_unlock_single(so, SOLOCKED); 1692 mutex_exit(&so->so_lock); 1693 } else { 1694 /* If the caller held the lock don't release it here */ 1695 ASSERT(MUTEX_HELD(&so->so_lock)); 1696 ASSERT(so->so_flag & SOLOCKED); 1697 } 1698 return (error); 1699 } 1700 1701 /* ARGSUSED */ 1702 int 1703 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1704 struct sonode **nsop) 1705 { 1706 struct T_conn_ind *conn_ind; 1707 struct T_conn_res *conn_res; 1708 int error = 0; 1709 mblk_t *mp, *ctxmp, *ack_mp; 1710 struct sonode *nso; 1711 vnode_t *nvp; 1712 void *src; 1713 t_uscalar_t srclen; 1714 void *opt; 1715 t_uscalar_t optlen; 1716 t_scalar_t PRIM_type; 1717 t_scalar_t SEQ_number; 1718 size_t sinlen; 1719 sotpi_info_t *sti = SOTOTPI(so); 1720 sotpi_info_t *nsti; 1721 1722 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1723 (void *)so, fflag, (void *)nsop, 1724 pr_state(so->so_state, so->so_mode))); 1725 1726 /* 1727 * Defer single-threading the accepting socket until 1728 * the T_CONN_IND has been received and parsed and the 1729 * new sonode has been opened. 1730 */ 1731 1732 /* Check that we are not already connected */ 1733 if ((so->so_state & SS_ACCEPTCONN) == 0) 1734 goto conn_bad; 1735 again: 1736 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1737 goto e_bad; 1738 1739 ASSERT(mp != NULL); 1740 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1741 ctxmp = mp->b_cont; 1742 1743 /* 1744 * Save SEQ_number for error paths. 1745 */ 1746 SEQ_number = conn_ind->SEQ_number; 1747 1748 srclen = conn_ind->SRC_length; 1749 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1750 if (src == NULL) { 1751 error = EPROTO; 1752 freemsg(mp); 1753 eprintsoline(so, error); 1754 goto disconnect_unlocked; 1755 } 1756 optlen = conn_ind->OPT_length; 1757 switch (so->so_family) { 1758 case AF_INET: 1759 case AF_INET6: 1760 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1761 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1762 &opt, conn_ind->OPT_length); 1763 } else { 1764 /* 1765 * The transport (in this case TCP) hasn't sent up 1766 * a pointer to an instance for the accept fast-path. 1767 * Disable fast-path completely because the call to 1768 * sotpi_create() below would otherwise create an 1769 * incomplete TCP instance, which would lead to 1770 * problems when sockfs sends a normal T_CONN_RES 1771 * message down the new stream. 1772 */ 1773 if (sti->sti_direct) { 1774 int rval; 1775 /* 1776 * For consistency we inform tcp to disable 1777 * direct interface on the listener, though 1778 * we can certainly live without doing this 1779 * because no data will ever travel upstream 1780 * on the listening socket. 1781 */ 1782 sti->sti_direct = 0; 1783 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1784 0, 0, K_TO_K, CRED(), &rval); 1785 } 1786 opt = NULL; 1787 optlen = 0; 1788 } 1789 break; 1790 case AF_UNIX: 1791 default: 1792 if (optlen != 0) { 1793 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1794 __TPI_ALIGN_SIZE); 1795 if (opt == NULL) { 1796 error = EPROTO; 1797 freemsg(mp); 1798 eprintsoline(so, error); 1799 goto disconnect_unlocked; 1800 } 1801 } 1802 if (so->so_family == AF_UNIX) { 1803 if (!sti->sti_faddr_noxlate) { 1804 src = NULL; 1805 srclen = 0; 1806 } 1807 /* Extract src address from options */ 1808 if (optlen != 0) 1809 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1810 } 1811 break; 1812 } 1813 1814 /* 1815 * Create the new socket. 1816 */ 1817 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1818 if (nso == NULL) { 1819 ASSERT(error != 0); 1820 /* 1821 * Accept can not fail with ENOBUFS. sotpi_create 1822 * sleeps waiting for memory until a signal is caught 1823 * so return EINTR. 1824 */ 1825 freemsg(mp); 1826 if (error == ENOBUFS) 1827 error = EINTR; 1828 goto e_disc_unl; 1829 } 1830 nvp = SOTOV(nso); 1831 nsti = SOTOTPI(nso); 1832 1833 /* 1834 * If the transport sent up an SSL connection context, then attach 1835 * it the new socket, and set the (sd_wputdatafunc)() and 1836 * (sd_rputdatafunc)() stream head hooks to intercept and process 1837 * SSL records. 1838 */ 1839 if (ctxmp != NULL) { 1840 /* 1841 * This kssl_ctx_t is already held for us by the transport. 1842 * So, we don't need to do a kssl_hold_ctx() here. 1843 */ 1844 nsti->sti_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1845 freemsg(ctxmp); 1846 mp->b_cont = NULL; 1847 strsetrwputdatahooks(nvp, strsock_kssl_input, 1848 strsock_kssl_output); 1849 1850 /* Disable sodirect if any */ 1851 if (nso->so_direct != NULL) { 1852 mutex_enter(nso->so_direct->sod_lockp); 1853 SOD_DISABLE(nso->so_direct); 1854 mutex_exit(nso->so_direct->sod_lockp); 1855 } 1856 } 1857 #ifdef DEBUG 1858 /* 1859 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1860 * it's inherited early to allow debugging of the accept code itself. 1861 */ 1862 nso->so_options |= so->so_options & SO_DEBUG; 1863 #endif /* DEBUG */ 1864 1865 /* 1866 * Save the SRC address from the T_CONN_IND 1867 * for getpeername to work on AF_UNIX and on transports that do not 1868 * support TI_GETPEERNAME. 1869 * 1870 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1871 * copyin_name(). 1872 */ 1873 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1874 error = EINVAL; 1875 freemsg(mp); 1876 eprintsoline(so, error); 1877 goto disconnect_vp_unlocked; 1878 } 1879 nsti->sti_faddr_len = (socklen_t)srclen; 1880 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1881 bcopy(src, nsti->sti_faddr_sa, srclen); 1882 nsti->sti_faddr_valid = 1; 1883 1884 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1885 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1886 cred_t *cr; 1887 1888 if ((cr = DB_CRED(mp)) != NULL) { 1889 crhold(cr); 1890 nso->so_peercred = cr; 1891 nso->so_cpid = DB_CPID(mp); 1892 } 1893 freemsg(mp); 1894 1895 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1896 sizeof (intptr_t), 0, _ALLOC_INTR); 1897 if (mp == NULL) { 1898 /* 1899 * Accept can not fail with ENOBUFS. 1900 * A signal was caught so return EINTR. 1901 */ 1902 error = EINTR; 1903 eprintsoline(so, error); 1904 goto disconnect_vp_unlocked; 1905 } 1906 conn_res = (struct T_conn_res *)mp->b_rptr; 1907 } else { 1908 nso->so_peercred = DB_CRED(mp); 1909 nso->so_cpid = DB_CPID(mp); 1910 DB_CRED(mp) = NULL; 1911 1912 mp->b_rptr = DB_BASE(mp); 1913 conn_res = (struct T_conn_res *)mp->b_rptr; 1914 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1915 } 1916 1917 /* 1918 * New socket must be bound at least in sockfs and, except for AF_INET, 1919 * (or AF_INET6) it also has to be bound in the transport provider. 1920 * We set the local address in the sonode from the T_OK_ACK of the 1921 * T_CONN_RES. For this reason the address we bind to here isn't 1922 * important. 1923 */ 1924 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1925 /*CONSTCOND*/ 1926 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1927 /* 1928 * Optimization for AF_INET{,6} transports 1929 * that can handle a T_CONN_RES without being bound. 1930 */ 1931 mutex_enter(&nso->so_lock); 1932 so_automatic_bind(nso); 1933 mutex_exit(&nso->so_lock); 1934 } else { 1935 /* Perform NULL bind with the transport provider. */ 1936 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1937 cr)) != 0) { 1938 ASSERT(error != ENOBUFS); 1939 freemsg(mp); 1940 eprintsoline(nso, error); 1941 goto disconnect_vp_unlocked; 1942 } 1943 } 1944 1945 /* 1946 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1947 * so that any data arriving on the new socket will cause the 1948 * appropriate signals to be delivered for the new socket. 1949 * 1950 * No other thread (except strsock_proto and strsock_misc) 1951 * can access the new socket thus we relax the locking. 1952 */ 1953 nso->so_pgrp = so->so_pgrp; 1954 nso->so_state |= so->so_state & SS_ASYNC; 1955 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1956 1957 if (nso->so_pgrp != 0) { 1958 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1959 eprintsoline(nso, error); 1960 error = 0; 1961 nso->so_pgrp = 0; 1962 } 1963 } 1964 1965 /* 1966 * Make note of the socket level options. TCP and IP level options 1967 * are already inherited. We could do all this after accept is 1968 * successful but doing it here simplifies code and no harm done 1969 * for error case. 1970 */ 1971 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1972 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1973 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1974 nso->so_sndbuf = so->so_sndbuf; 1975 nso->so_rcvbuf = so->so_rcvbuf; 1976 if (nso->so_options & SO_LINGER) 1977 nso->so_linger = so->so_linger; 1978 1979 /* 1980 * Note that the following sti_direct code path should be 1981 * removed once we are confident that the direct sockets 1982 * do not result in any degradation. 1983 */ 1984 if (sti->sti_direct) { 1985 1986 ASSERT(opt != NULL); 1987 1988 conn_res->OPT_length = optlen; 1989 conn_res->OPT_offset = MBLKL(mp); 1990 bcopy(&opt, mp->b_wptr, optlen); 1991 mp->b_wptr += optlen; 1992 conn_res->PRIM_type = T_CONN_RES; 1993 conn_res->ACCEPTOR_id = 0; 1994 PRIM_type = T_CONN_RES; 1995 1996 /* Send down the T_CONN_RES on acceptor STREAM */ 1997 error = kstrputmsg(SOTOV(nso), mp, NULL, 1998 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1999 if (error) { 2000 mutex_enter(&so->so_lock); 2001 so_lock_single(so); 2002 eprintsoline(so, error); 2003 goto disconnect_vp; 2004 } 2005 mutex_enter(&nso->so_lock); 2006 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 2007 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2008 if (error) { 2009 mutex_exit(&nso->so_lock); 2010 mutex_enter(&so->so_lock); 2011 so_lock_single(so); 2012 eprintsoline(so, error); 2013 goto disconnect_vp; 2014 } 2015 if (nso->so_family == AF_INET) { 2016 sin_t *sin; 2017 2018 sin = (sin_t *)(ack_mp->b_rptr + 2019 sizeof (struct T_ok_ack)); 2020 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 2021 nsti->sti_laddr_len = sizeof (sin_t); 2022 } else { 2023 sin6_t *sin6; 2024 2025 sin6 = (sin6_t *)(ack_mp->b_rptr + 2026 sizeof (struct T_ok_ack)); 2027 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 2028 nsti->sti_laddr_len = sizeof (sin6_t); 2029 } 2030 freemsg(ack_mp); 2031 2032 nso->so_state |= SS_ISCONNECTED; 2033 nso->so_proto_handle = (sock_lower_handle_t)opt; 2034 nsti->sti_laddr_valid = 1; 2035 2036 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 2037 /* 2038 * A NL7C marked listen()er so the new socket 2039 * inherits the listen()er's NL7C state, except 2040 * for NL7C_POLLIN. 2041 * 2042 * Only call NL7C to process the new socket if 2043 * the listen socket allows blocking i/o. 2044 */ 2045 nsti->sti_nl7c_flags = 2046 sti->sti_nl7c_flags & (~NL7C_POLLIN); 2047 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 2048 /* 2049 * Nonblocking accept() just make it 2050 * persist to defer processing to the 2051 * read-side syscall (e.g. read). 2052 */ 2053 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 2054 } else if (nl7c_process(nso, B_FALSE)) { 2055 /* 2056 * NL7C has completed processing on the 2057 * socket, close the socket and back to 2058 * the top to await the next T_CONN_IND. 2059 */ 2060 mutex_exit(&nso->so_lock); 2061 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 2062 CRED(), NULL); 2063 VN_RELE(nvp); 2064 goto again; 2065 } 2066 /* Pass the new socket out */ 2067 } 2068 2069 mutex_exit(&nso->so_lock); 2070 2071 /* 2072 * It's possible, through the use of autopush for example, 2073 * that the acceptor stream may not support sti_direct 2074 * semantics. If the new socket does not support sti_direct 2075 * we issue a _SIOCSOCKFALLBACK to inform the transport 2076 * as we would in the I_PUSH case. 2077 */ 2078 if (nsti->sti_direct == 0) { 2079 int rval; 2080 2081 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2082 0, 0, K_TO_K, CRED(), &rval)) != 0) { 2083 mutex_enter(&so->so_lock); 2084 so_lock_single(so); 2085 eprintsoline(so, error); 2086 goto disconnect_vp; 2087 } 2088 } 2089 2090 /* 2091 * Pass out new socket. 2092 */ 2093 if (nsop != NULL) 2094 *nsop = nso; 2095 2096 return (0); 2097 } 2098 2099 /* 2100 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2101 * which don't support the FireEngine accept fast-path. It is also 2102 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2103 * again. Neither sockfs nor TCP attempt to find out if some other 2104 * random module has been inserted in between (in which case we 2105 * should follow TLI accept behaviour). We blindly assume the worst 2106 * case and revert back to old behaviour i.e. TCP will not send us 2107 * any option (eager) and the accept should happen on the listener 2108 * queue. Any queued T_conn_ind have already got their options removed 2109 * by so_sock2_stream() when "sockmod" was I_POP'd. 2110 */ 2111 /* 2112 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2113 */ 2114 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2115 #ifdef _ILP32 2116 queue_t *q; 2117 2118 /* 2119 * Find read queue in driver 2120 * Can safely do this since we "own" nso/nvp. 2121 */ 2122 q = strvp2wq(nvp)->q_next; 2123 while (SAMESTR(q)) 2124 q = q->q_next; 2125 q = RD(q); 2126 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2127 #else 2128 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2129 #endif /* _ILP32 */ 2130 conn_res->PRIM_type = O_T_CONN_RES; 2131 PRIM_type = O_T_CONN_RES; 2132 } else { 2133 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2134 conn_res->PRIM_type = T_CONN_RES; 2135 PRIM_type = T_CONN_RES; 2136 } 2137 conn_res->SEQ_number = SEQ_number; 2138 conn_res->OPT_length = 0; 2139 conn_res->OPT_offset = 0; 2140 2141 mutex_enter(&so->so_lock); 2142 so_lock_single(so); /* Set SOLOCKED */ 2143 mutex_exit(&so->so_lock); 2144 2145 error = kstrputmsg(SOTOV(so), mp, NULL, 2146 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2147 mutex_enter(&so->so_lock); 2148 if (error) { 2149 eprintsoline(so, error); 2150 goto disconnect_vp; 2151 } 2152 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2153 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2154 if (error) { 2155 eprintsoline(so, error); 2156 goto disconnect_vp; 2157 } 2158 /* 2159 * If there is a sin/sin6 appended onto the T_OK_ACK use 2160 * that to set the local address. If this is not present 2161 * then we zero out the address and don't set the 2162 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2163 * the pathname from the listening socket. 2164 */ 2165 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2166 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2167 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2168 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2169 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2170 nsti->sti_laddr_len = sinlen; 2171 nsti->sti_laddr_valid = 1; 2172 } else if (nso->so_family == AF_UNIX) { 2173 ASSERT(so->so_family == AF_UNIX); 2174 nsti->sti_laddr_len = sti->sti_laddr_len; 2175 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2176 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2177 nsti->sti_laddr_len); 2178 nsti->sti_laddr_valid = 1; 2179 } else { 2180 nsti->sti_laddr_len = sti->sti_laddr_len; 2181 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2182 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2183 nsti->sti_laddr_sa->sa_family = nso->so_family; 2184 } 2185 freemsg(ack_mp); 2186 2187 so_unlock_single(so, SOLOCKED); 2188 mutex_exit(&so->so_lock); 2189 2190 nso->so_state |= SS_ISCONNECTED; 2191 2192 /* 2193 * Pass out new socket. 2194 */ 2195 if (nsop != NULL) 2196 *nsop = nso; 2197 2198 return (0); 2199 2200 2201 eproto_disc_unl: 2202 error = EPROTO; 2203 e_disc_unl: 2204 eprintsoline(so, error); 2205 goto disconnect_unlocked; 2206 2207 pr_disc_vp_unl: 2208 eprintsoline(so, error); 2209 disconnect_vp_unlocked: 2210 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 2211 VN_RELE(nvp); 2212 disconnect_unlocked: 2213 (void) sodisconnect(so, SEQ_number, 0); 2214 return (error); 2215 2216 pr_disc_vp: 2217 eprintsoline(so, error); 2218 disconnect_vp: 2219 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2220 so_unlock_single(so, SOLOCKED); 2221 mutex_exit(&so->so_lock); 2222 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 2223 VN_RELE(nvp); 2224 return (error); 2225 2226 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2227 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2228 ? EOPNOTSUPP : EINVAL; 2229 e_bad: 2230 eprintsoline(so, error); 2231 return (error); 2232 } 2233 2234 /* 2235 * connect a socket. 2236 * 2237 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2238 * unconnect (by specifying a null address). 2239 */ 2240 int 2241 sotpi_connect(struct sonode *so, 2242 const struct sockaddr *name, 2243 socklen_t namelen, 2244 int fflag, 2245 int flags, 2246 struct cred *cr) 2247 { 2248 struct T_conn_req conn_req; 2249 int error = 0; 2250 mblk_t *mp; 2251 void *src; 2252 socklen_t srclen; 2253 void *addr; 2254 socklen_t addrlen; 2255 boolean_t need_unlock; 2256 sotpi_info_t *sti = SOTOTPI(so); 2257 2258 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2259 (void *)so, (void *)name, namelen, fflag, flags, 2260 pr_state(so->so_state, so->so_mode))); 2261 2262 /* 2263 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2264 * avoid sleeping for memory with SOLOCKED held. 2265 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2266 * + sizeof (struct T_opthdr). 2267 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2268 * exceed sti_faddr_maxlen). 2269 */ 2270 mp = soallocproto(sizeof (struct T_conn_req) + 2271 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 2272 if (mp == NULL) { 2273 /* 2274 * Connect can not fail with ENOBUFS. A signal was 2275 * caught so return EINTR. 2276 */ 2277 error = EINTR; 2278 eprintsoline(so, error); 2279 return (error); 2280 } 2281 2282 mutex_enter(&so->so_lock); 2283 /* 2284 * Make sure there is a preallocated T_unbind_req message 2285 * before any binding. This message is allocated when the 2286 * socket is created. Since another thread can consume 2287 * so_unbind_mp by the time we return from so_lock_single(), 2288 * we should check the availability of so_unbind_mp after 2289 * we return from so_lock_single(). 2290 */ 2291 2292 so_lock_single(so); /* Set SOLOCKED */ 2293 need_unlock = B_TRUE; 2294 2295 if (sti->sti_unbind_mp == NULL) { 2296 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2297 /* NOTE: holding so_lock while sleeping */ 2298 sti->sti_unbind_mp = 2299 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 2300 if (sti->sti_unbind_mp == NULL) { 2301 error = EINTR; 2302 goto done; 2303 } 2304 } 2305 2306 /* 2307 * Can't have done a listen before connecting. 2308 */ 2309 if (so->so_state & SS_ACCEPTCONN) { 2310 error = EOPNOTSUPP; 2311 goto done; 2312 } 2313 2314 /* 2315 * Must be bound with the transport 2316 */ 2317 if (!(so->so_state & SS_ISBOUND)) { 2318 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2319 /*CONSTCOND*/ 2320 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2321 /* 2322 * Optimization for AF_INET{,6} transports 2323 * that can handle a T_CONN_REQ without being bound. 2324 */ 2325 so_automatic_bind(so); 2326 } else { 2327 error = sotpi_bind(so, NULL, 0, 2328 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2329 if (error) 2330 goto done; 2331 } 2332 ASSERT(so->so_state & SS_ISBOUND); 2333 flags |= _SOCONNECT_DID_BIND; 2334 } 2335 2336 /* 2337 * Handle a connect to a name parameter of type AF_UNSPEC like a 2338 * connect to a null address. This is the portable method to 2339 * unconnect a socket. 2340 */ 2341 if ((namelen >= sizeof (sa_family_t)) && 2342 (name->sa_family == AF_UNSPEC)) { 2343 name = NULL; 2344 namelen = 0; 2345 } 2346 2347 /* 2348 * Check that we are not already connected. 2349 * A connection-oriented socket cannot be reconnected. 2350 * A connected connection-less socket can be 2351 * - connected to a different address by a subsequent connect 2352 * - "unconnected" by a connect to the NULL address 2353 */ 2354 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2355 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2356 if (so->so_mode & SM_CONNREQUIRED) { 2357 /* Connection-oriented socket */ 2358 error = so->so_state & SS_ISCONNECTED ? 2359 EISCONN : EALREADY; 2360 goto done; 2361 } 2362 /* Connection-less socket */ 2363 if (name == NULL) { 2364 /* 2365 * Remove the connected state and clear SO_DGRAM_ERRIND 2366 * since it was set when the socket was connected. 2367 * If this is UDP also send down a T_DISCON_REQ. 2368 */ 2369 int val; 2370 2371 if ((so->so_family == AF_INET || 2372 so->so_family == AF_INET6) && 2373 (so->so_type == SOCK_DGRAM || 2374 so->so_type == SOCK_RAW) && 2375 /*CONSTCOND*/ 2376 !soconnect_tpi_udp) { 2377 /* XXX What about implicitly unbinding here? */ 2378 error = sodisconnect(so, -1, 2379 _SODISCONNECT_LOCK_HELD); 2380 } else { 2381 so->so_state &= 2382 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2383 sti->sti_faddr_valid = 0; 2384 sti->sti_faddr_len = 0; 2385 } 2386 2387 /* Remove SOLOCKED since setsockopt will grab it */ 2388 so_unlock_single(so, SOLOCKED); 2389 mutex_exit(&so->so_lock); 2390 2391 val = 0; 2392 (void) sotpi_setsockopt(so, SOL_SOCKET, 2393 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2394 cr); 2395 2396 mutex_enter(&so->so_lock); 2397 so_lock_single(so); /* Set SOLOCKED */ 2398 goto done; 2399 } 2400 } 2401 ASSERT(so->so_state & SS_ISBOUND); 2402 2403 if (name == NULL || namelen == 0) { 2404 error = EINVAL; 2405 goto done; 2406 } 2407 /* 2408 * Mark the socket if sti_faddr_sa represents the transport level 2409 * address. 2410 */ 2411 if (flags & _SOCONNECT_NOXLATE) { 2412 struct sockaddr_ux *soaddr_ux; 2413 2414 ASSERT(so->so_family == AF_UNIX); 2415 if (namelen != sizeof (struct sockaddr_ux)) { 2416 error = EINVAL; 2417 goto done; 2418 } 2419 soaddr_ux = (struct sockaddr_ux *)name; 2420 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2421 namelen = sizeof (soaddr_ux->sou_addr); 2422 sti->sti_faddr_noxlate = 1; 2423 } 2424 2425 /* 2426 * Length and family checks. 2427 */ 2428 error = so_addr_verify(so, name, namelen); 2429 if (error) 2430 goto bad; 2431 2432 /* 2433 * Save foreign address. Needed for AF_UNIX as well as 2434 * transport providers that do not support TI_GETPEERNAME. 2435 * Also used for cached foreign address for TCP and UDP. 2436 */ 2437 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2438 error = EINVAL; 2439 goto done; 2440 } 2441 sti->sti_faddr_len = (socklen_t)namelen; 2442 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2443 bcopy(name, sti->sti_faddr_sa, namelen); 2444 sti->sti_faddr_valid = 1; 2445 2446 if (so->so_family == AF_UNIX) { 2447 if (sti->sti_faddr_noxlate) { 2448 /* 2449 * Already have a transport internal address. Do not 2450 * pass any (transport internal) source address. 2451 */ 2452 addr = sti->sti_faddr_sa; 2453 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2454 src = NULL; 2455 srclen = 0; 2456 } else { 2457 /* 2458 * Pass the sockaddr_un source address as an option 2459 * and translate the remote address. 2460 * Holding so_lock thus sti_laddr_sa can not change. 2461 */ 2462 src = sti->sti_laddr_sa; 2463 srclen = (t_uscalar_t)sti->sti_laddr_len; 2464 dprintso(so, 1, 2465 ("sotpi_connect UNIX: srclen %d, src %p\n", 2466 srclen, src)); 2467 error = so_ux_addr_xlate(so, 2468 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2469 (flags & _SOCONNECT_XPG4_2), 2470 &addr, &addrlen); 2471 if (error) 2472 goto bad; 2473 } 2474 } else { 2475 addr = sti->sti_faddr_sa; 2476 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2477 src = NULL; 2478 srclen = 0; 2479 } 2480 /* 2481 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2482 * option which asks the transport provider to send T_UDERR_IND 2483 * messages. These T_UDERR_IND messages are used to return connected 2484 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2485 * 2486 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2487 * we send down a T_CONN_REQ. This is needed to let the 2488 * transport assign a local address that is consistent with 2489 * the remote address. Applications depend on a getsockname() 2490 * after a connect() to retrieve the "source" IP address for 2491 * the connected socket. Invalidate the cached local address 2492 * to force getsockname() to enquire of the transport. 2493 */ 2494 if (!(so->so_mode & SM_CONNREQUIRED)) { 2495 /* 2496 * Datagram socket. 2497 */ 2498 int32_t val; 2499 2500 so_unlock_single(so, SOLOCKED); 2501 mutex_exit(&so->so_lock); 2502 2503 val = 1; 2504 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2505 &val, (t_uscalar_t)sizeof (val), cr); 2506 2507 mutex_enter(&so->so_lock); 2508 so_lock_single(so); /* Set SOLOCKED */ 2509 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2510 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2511 soconnect_tpi_udp) { 2512 soisconnected(so); 2513 goto done; 2514 } 2515 /* 2516 * Send down T_CONN_REQ etc. 2517 * Clear fflag to avoid returning EWOULDBLOCK. 2518 */ 2519 fflag = 0; 2520 ASSERT(so->so_family != AF_UNIX); 2521 sti->sti_laddr_valid = 0; 2522 } else if (sti->sti_laddr_len != 0) { 2523 /* 2524 * If the local address or port was "any" then it may be 2525 * changed by the transport as a result of the 2526 * connect. Invalidate the cached version if we have one. 2527 */ 2528 switch (so->so_family) { 2529 case AF_INET: 2530 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2531 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2532 INADDR_ANY || 2533 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2534 sti->sti_laddr_valid = 0; 2535 break; 2536 2537 case AF_INET6: 2538 ASSERT(sti->sti_laddr_len == 2539 (socklen_t)sizeof (sin6_t)); 2540 if (IN6_IS_ADDR_UNSPECIFIED( 2541 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2542 IN6_IS_ADDR_V4MAPPED_ANY( 2543 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2544 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2545 sti->sti_laddr_valid = 0; 2546 break; 2547 2548 default: 2549 break; 2550 } 2551 } 2552 2553 /* 2554 * Check for failure of an earlier call 2555 */ 2556 if (so->so_error != 0) 2557 goto so_bad; 2558 2559 /* 2560 * Send down T_CONN_REQ. Message was allocated above. 2561 */ 2562 conn_req.PRIM_type = T_CONN_REQ; 2563 conn_req.DEST_length = addrlen; 2564 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2565 if (srclen == 0) { 2566 conn_req.OPT_length = 0; 2567 conn_req.OPT_offset = 0; 2568 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2569 soappendmsg(mp, addr, addrlen); 2570 } else { 2571 /* 2572 * There is a AF_UNIX sockaddr_un to include as a source 2573 * address option. 2574 */ 2575 struct T_opthdr toh; 2576 2577 toh.level = SOL_SOCKET; 2578 toh.name = SO_SRCADDR; 2579 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2580 toh.status = 0; 2581 conn_req.OPT_length = 2582 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2583 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2584 _TPI_ALIGN_TOPT(addrlen)); 2585 2586 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2587 soappendmsg(mp, addr, addrlen); 2588 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2589 soappendmsg(mp, &toh, sizeof (toh)); 2590 soappendmsg(mp, src, srclen); 2591 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2592 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2593 } 2594 /* 2595 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2596 * in order to have the right state when the T_CONN_CON shows up. 2597 */ 2598 soisconnecting(so); 2599 mutex_exit(&so->so_lock); 2600 2601 if (audit_active) 2602 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2603 2604 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2605 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2606 mp = NULL; 2607 mutex_enter(&so->so_lock); 2608 if (error != 0) 2609 goto bad; 2610 2611 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2612 goto bad; 2613 2614 /* Allow other threads to access the socket */ 2615 so_unlock_single(so, SOLOCKED); 2616 need_unlock = B_FALSE; 2617 2618 /* 2619 * Wait until we get a T_CONN_CON or an error 2620 */ 2621 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2622 so_lock_single(so); /* Set SOLOCKED */ 2623 need_unlock = B_TRUE; 2624 } 2625 2626 done: 2627 freemsg(mp); 2628 switch (error) { 2629 case EINPROGRESS: 2630 case EALREADY: 2631 case EISCONN: 2632 case EINTR: 2633 /* Non-fatal errors */ 2634 sti->sti_laddr_valid = 0; 2635 /* FALLTHRU */ 2636 case 0: 2637 break; 2638 default: 2639 ASSERT(need_unlock); 2640 /* 2641 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2642 * and invalidate local-address cache 2643 */ 2644 so->so_state &= ~SS_ISCONNECTING; 2645 sti->sti_laddr_valid = 0; 2646 /* A discon_ind might have already unbound us */ 2647 if ((flags & _SOCONNECT_DID_BIND) && 2648 (so->so_state & SS_ISBOUND)) { 2649 int err; 2650 2651 err = sotpi_unbind(so, 0); 2652 /* LINTED - statement has no conseq */ 2653 if (err) { 2654 eprintsoline(so, err); 2655 } 2656 } 2657 break; 2658 } 2659 if (need_unlock) 2660 so_unlock_single(so, SOLOCKED); 2661 mutex_exit(&so->so_lock); 2662 return (error); 2663 2664 so_bad: error = sogeterr(so, B_TRUE); 2665 bad: eprintsoline(so, error); 2666 goto done; 2667 } 2668 2669 /* ARGSUSED */ 2670 int 2671 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2672 { 2673 struct T_ordrel_req ordrel_req; 2674 mblk_t *mp; 2675 uint_t old_state, state_change; 2676 int error = 0; 2677 sotpi_info_t *sti = SOTOTPI(so); 2678 2679 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2680 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2681 2682 mutex_enter(&so->so_lock); 2683 so_lock_single(so); /* Set SOLOCKED */ 2684 2685 /* 2686 * SunOS 4.X has no check for datagram sockets. 2687 * 5.X checks that it is connected (ENOTCONN) 2688 * X/Open requires that we check the connected state. 2689 */ 2690 if (!(so->so_state & SS_ISCONNECTED)) { 2691 if (!xnet_skip_checks) { 2692 error = ENOTCONN; 2693 if (xnet_check_print) { 2694 printf("sockfs: X/Open shutdown check " 2695 "caused ENOTCONN\n"); 2696 } 2697 } 2698 goto done; 2699 } 2700 /* 2701 * Record the current state and then perform any state changes. 2702 * Then use the difference between the old and new states to 2703 * determine which messages need to be sent. 2704 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2705 * duplicate calls to shutdown(). 2706 */ 2707 old_state = so->so_state; 2708 2709 switch (how) { 2710 case 0: 2711 socantrcvmore(so); 2712 break; 2713 case 1: 2714 socantsendmore(so); 2715 break; 2716 case 2: 2717 socantsendmore(so); 2718 socantrcvmore(so); 2719 break; 2720 default: 2721 error = EINVAL; 2722 goto done; 2723 } 2724 2725 /* 2726 * Assumes that the SS_CANT* flags are never cleared in the above code. 2727 */ 2728 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2729 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2730 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2731 2732 switch (state_change) { 2733 case 0: 2734 dprintso(so, 1, 2735 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2736 so->so_state)); 2737 goto done; 2738 2739 case SS_CANTRCVMORE: 2740 mutex_exit(&so->so_lock); 2741 strseteof(SOTOV(so), 1); 2742 /* 2743 * strseteof takes care of read side wakeups, 2744 * pollwakeups, and signals. 2745 */ 2746 /* 2747 * Get the read lock before flushing data to avoid problems 2748 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2749 */ 2750 mutex_enter(&so->so_lock); 2751 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2752 mutex_exit(&so->so_lock); 2753 2754 /* Flush read side queue */ 2755 strflushrq(SOTOV(so), FLUSHALL); 2756 2757 mutex_enter(&so->so_lock); 2758 so_unlock_read(so); /* Clear SOREADLOCKED */ 2759 break; 2760 2761 case SS_CANTSENDMORE: 2762 mutex_exit(&so->so_lock); 2763 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2764 mutex_enter(&so->so_lock); 2765 break; 2766 2767 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2768 mutex_exit(&so->so_lock); 2769 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2770 strseteof(SOTOV(so), 1); 2771 /* 2772 * strseteof takes care of read side wakeups, 2773 * pollwakeups, and signals. 2774 */ 2775 /* 2776 * Get the read lock before flushing data to avoid problems 2777 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2778 */ 2779 mutex_enter(&so->so_lock); 2780 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2781 mutex_exit(&so->so_lock); 2782 2783 /* Flush read side queue */ 2784 strflushrq(SOTOV(so), FLUSHALL); 2785 2786 mutex_enter(&so->so_lock); 2787 so_unlock_read(so); /* Clear SOREADLOCKED */ 2788 break; 2789 } 2790 2791 ASSERT(MUTEX_HELD(&so->so_lock)); 2792 2793 /* 2794 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2795 * was set due to this call and the new state has both of them set: 2796 * Send the AF_UNIX close indication 2797 * For T_COTS send a discon_ind 2798 * 2799 * If cantsend was set due to this call: 2800 * For T_COTSORD send an ordrel_ind 2801 * 2802 * Note that for T_CLTS there is no message sent here. 2803 */ 2804 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2805 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2806 /* 2807 * For SunOS 4.X compatibility we tell the other end 2808 * that we are unable to receive at this point. 2809 */ 2810 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2811 so_unix_close(so); 2812 2813 if (sti->sti_serv_type == T_COTS) 2814 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2815 } 2816 if ((state_change & SS_CANTSENDMORE) && 2817 (sti->sti_serv_type == T_COTS_ORD)) { 2818 /* Send an orderly release */ 2819 ordrel_req.PRIM_type = T_ORDREL_REQ; 2820 2821 mutex_exit(&so->so_lock); 2822 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2823 0, _ALLOC_SLEEP); 2824 /* 2825 * Send down the T_ORDREL_REQ even if there is flow control. 2826 * This prevents shutdown from blocking. 2827 * Note that there is no T_OK_ACK for ordrel_req. 2828 */ 2829 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2830 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2831 mutex_enter(&so->so_lock); 2832 if (error) { 2833 eprintsoline(so, error); 2834 goto done; 2835 } 2836 } 2837 2838 done: 2839 so_unlock_single(so, SOLOCKED); 2840 mutex_exit(&so->so_lock); 2841 return (error); 2842 } 2843 2844 /* 2845 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2846 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2847 * that we have closed. 2848 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2849 * T_UNITDATA_REQ containing the same option. 2850 * 2851 * For SOCK_DGRAM half-connections (somebody connected to this end 2852 * but this end is not connect) we don't know where to send any 2853 * SO_UNIX_CLOSE. 2854 * 2855 * We have to ignore stream head errors just in case there has been 2856 * a shutdown(output). 2857 * Ignore any flow control to try to get the message more quickly to the peer. 2858 * While locally ignoring flow control solves the problem when there 2859 * is only the loopback transport on the stream it would not provide 2860 * the correct AF_UNIX socket semantics when one or more modules have 2861 * been pushed. 2862 */ 2863 void 2864 so_unix_close(struct sonode *so) 2865 { 2866 int error; 2867 struct T_opthdr toh; 2868 mblk_t *mp; 2869 sotpi_info_t *sti = SOTOTPI(so); 2870 2871 ASSERT(MUTEX_HELD(&so->so_lock)); 2872 2873 ASSERT(so->so_family == AF_UNIX); 2874 2875 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2876 (SS_ISCONNECTED|SS_ISBOUND)) 2877 return; 2878 2879 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2880 (void *)so, pr_state(so->so_state, so->so_mode))); 2881 2882 toh.level = SOL_SOCKET; 2883 toh.name = SO_UNIX_CLOSE; 2884 2885 /* zero length + header */ 2886 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2887 toh.status = 0; 2888 2889 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2890 struct T_optdata_req tdr; 2891 2892 tdr.PRIM_type = T_OPTDATA_REQ; 2893 tdr.DATA_flag = 0; 2894 2895 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2896 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2897 2898 /* NOTE: holding so_lock while sleeping */ 2899 mp = soallocproto2(&tdr, sizeof (tdr), 2900 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2901 } else { 2902 struct T_unitdata_req tudr; 2903 void *addr; 2904 socklen_t addrlen; 2905 void *src; 2906 socklen_t srclen; 2907 struct T_opthdr toh2; 2908 t_scalar_t size; 2909 2910 /* Connecteded DGRAM socket */ 2911 2912 /* 2913 * For AF_UNIX the destination address is translated to 2914 * an internal name and the source address is passed as 2915 * an option. 2916 */ 2917 /* 2918 * Length and family checks. 2919 */ 2920 error = so_addr_verify(so, sti->sti_faddr_sa, 2921 (t_uscalar_t)sti->sti_faddr_len); 2922 if (error) { 2923 eprintsoline(so, error); 2924 return; 2925 } 2926 if (sti->sti_faddr_noxlate) { 2927 /* 2928 * Already have a transport internal address. Do not 2929 * pass any (transport internal) source address. 2930 */ 2931 addr = sti->sti_faddr_sa; 2932 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2933 src = NULL; 2934 srclen = 0; 2935 } else { 2936 /* 2937 * Pass the sockaddr_un source address as an option 2938 * and translate the remote address. 2939 * Holding so_lock thus sti_laddr_sa can not change. 2940 */ 2941 src = sti->sti_laddr_sa; 2942 srclen = (socklen_t)sti->sti_laddr_len; 2943 dprintso(so, 1, 2944 ("so_ux_close: srclen %d, src %p\n", 2945 srclen, src)); 2946 error = so_ux_addr_xlate(so, 2947 sti->sti_faddr_sa, 2948 (socklen_t)sti->sti_faddr_len, 0, 2949 &addr, &addrlen); 2950 if (error) { 2951 eprintsoline(so, error); 2952 return; 2953 } 2954 } 2955 tudr.PRIM_type = T_UNITDATA_REQ; 2956 tudr.DEST_length = addrlen; 2957 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2958 if (srclen == 0) { 2959 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2960 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2961 _TPI_ALIGN_TOPT(addrlen)); 2962 2963 size = tudr.OPT_offset + tudr.OPT_length; 2964 /* NOTE: holding so_lock while sleeping */ 2965 mp = soallocproto2(&tudr, sizeof (tudr), 2966 addr, addrlen, size, _ALLOC_SLEEP); 2967 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2968 soappendmsg(mp, &toh, sizeof (toh)); 2969 } else { 2970 /* 2971 * There is a AF_UNIX sockaddr_un to include as a 2972 * source address option. 2973 */ 2974 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2975 _TPI_ALIGN_TOPT(srclen)); 2976 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2977 _TPI_ALIGN_TOPT(addrlen)); 2978 2979 toh2.level = SOL_SOCKET; 2980 toh2.name = SO_SRCADDR; 2981 toh2.len = (t_uscalar_t)(srclen + 2982 sizeof (struct T_opthdr)); 2983 toh2.status = 0; 2984 2985 size = tudr.OPT_offset + tudr.OPT_length; 2986 2987 /* NOTE: holding so_lock while sleeping */ 2988 mp = soallocproto2(&tudr, sizeof (tudr), 2989 addr, addrlen, size, _ALLOC_SLEEP); 2990 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2991 soappendmsg(mp, &toh, sizeof (toh)); 2992 soappendmsg(mp, &toh2, sizeof (toh2)); 2993 soappendmsg(mp, src, srclen); 2994 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2995 } 2996 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2997 } 2998 mutex_exit(&so->so_lock); 2999 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 3000 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 3001 mutex_enter(&so->so_lock); 3002 } 3003 3004 /* 3005 * Called by sotpi_recvmsg when reading a non-zero amount of data. 3006 * In addition, the caller typically verifies that there is some 3007 * potential state to clear by checking 3008 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 3009 * before calling this routine. 3010 * Note that such a check can be made without holding so_lock since 3011 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 3012 * decrements sti_oobsigcnt. 3013 * 3014 * When data is read *after* the point that all pending 3015 * oob data has been consumed the oob indication is cleared. 3016 * 3017 * This logic keeps select/poll returning POLLRDBAND and 3018 * SIOCATMARK returning true until we have read past 3019 * the mark. 3020 */ 3021 static void 3022 sorecv_update_oobstate(struct sonode *so) 3023 { 3024 sotpi_info_t *sti = SOTOTPI(so); 3025 3026 mutex_enter(&so->so_lock); 3027 ASSERT(so_verify_oobstate(so)); 3028 dprintso(so, 1, 3029 ("sorecv_update_oobstate: counts %d/%d state %s\n", 3030 sti->sti_oobsigcnt, 3031 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 3032 if (sti->sti_oobsigcnt == 0) { 3033 /* No more pending oob indications */ 3034 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 3035 freemsg(so->so_oobmsg); 3036 so->so_oobmsg = NULL; 3037 } 3038 ASSERT(so_verify_oobstate(so)); 3039 mutex_exit(&so->so_lock); 3040 } 3041 3042 /* 3043 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 3044 */ 3045 static int 3046 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 3047 { 3048 sotpi_info_t *sti = SOTOTPI(so); 3049 int error = 0; 3050 mblk_t *tmp = NULL; 3051 mblk_t *pmp = NULL; 3052 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 3053 3054 ASSERT(nmp != NULL); 3055 3056 while (nmp != NULL && uiop->uio_resid > 0) { 3057 ssize_t n; 3058 3059 if (DB_TYPE(nmp) == M_DATA) { 3060 /* 3061 * We have some data, uiomove up to resid bytes. 3062 */ 3063 n = MIN(MBLKL(nmp), uiop->uio_resid); 3064 if (n > 0) 3065 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3066 nmp->b_rptr += n; 3067 if (nmp->b_rptr == nmp->b_wptr) { 3068 pmp = nmp; 3069 nmp = nmp->b_cont; 3070 } 3071 if (error) 3072 break; 3073 } else { 3074 /* 3075 * We only handle data, save for caller to handle. 3076 */ 3077 if (pmp != NULL) { 3078 pmp->b_cont = nmp->b_cont; 3079 } 3080 nmp->b_cont = NULL; 3081 if (*rmp == NULL) { 3082 *rmp = nmp; 3083 } else { 3084 tmp->b_cont = nmp; 3085 } 3086 nmp = nmp->b_cont; 3087 tmp = nmp; 3088 } 3089 } 3090 if (pmp != NULL) { 3091 /* Free any mblk_t(s) which we have consumed */ 3092 pmp->b_cont = NULL; 3093 freemsg(sti->sti_nl7c_rcv_mp); 3094 } 3095 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3096 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3097 if (error == 0) { 3098 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3099 3100 error = p->r_v.r_v2; 3101 p->r_v.r_v2 = 0; 3102 } 3103 rp->r_vals = sti->sti_nl7c_rcv_rval; 3104 sti->sti_nl7c_rcv_rval = 0; 3105 } else { 3106 /* More mblk_t(s) to process so no rval to return */ 3107 rp->r_vals = 0; 3108 } 3109 return (error); 3110 } 3111 /* 3112 * Receive the next message on the queue. 3113 * If msg_controllen is non-zero when called the caller is interested in 3114 * any received control info (options). 3115 * If msg_namelen is non-zero when called the caller is interested in 3116 * any received source address. 3117 * The routine returns with msg_control and msg_name pointing to 3118 * kmem_alloc'ed memory which the caller has to free. 3119 */ 3120 /* ARGSUSED */ 3121 int 3122 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3123 struct cred *cr) 3124 { 3125 union T_primitives *tpr; 3126 mblk_t *mp; 3127 uchar_t pri; 3128 int pflag, opflag; 3129 void *control; 3130 t_uscalar_t controllen; 3131 t_uscalar_t namelen; 3132 int so_state = so->so_state; /* Snapshot */ 3133 ssize_t saved_resid; 3134 rval_t rval; 3135 int flags; 3136 clock_t timout; 3137 int error = 0; 3138 int reterr = 0; 3139 struct uio *suiop = NULL; 3140 sotpi_info_t *sti = SOTOTPI(so); 3141 3142 flags = msg->msg_flags; 3143 msg->msg_flags = 0; 3144 3145 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3146 (void *)so, (void *)msg, flags, 3147 pr_state(so->so_state, so->so_mode), so->so_error)); 3148 3149 if (so->so_version == SOV_STREAM) { 3150 so_update_attrs(so, SOACC); 3151 /* The imaginary "sockmod" has been popped - act as a stream */ 3152 return (strread(SOTOV(so), uiop, cr)); 3153 } 3154 3155 /* 3156 * If we are not connected because we have never been connected 3157 * we return ENOTCONN. If we have been connected (but are no longer 3158 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3159 * the EOF. 3160 * 3161 * An alternative would be to post an ENOTCONN error in stream head 3162 * (read+write) and clear it when we're connected. However, that error 3163 * would cause incorrect poll/select behavior! 3164 */ 3165 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3166 (so->so_mode & SM_CONNREQUIRED)) { 3167 return (ENOTCONN); 3168 } 3169 3170 /* 3171 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3172 * after checking that the read queue is empty) and returns zero. 3173 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3174 * is zero. 3175 */ 3176 3177 if (flags & MSG_OOB) { 3178 /* Check that the transport supports OOB */ 3179 if (!(so->so_mode & SM_EXDATA)) 3180 return (EOPNOTSUPP); 3181 so_update_attrs(so, SOACC); 3182 return (sorecvoob(so, msg, uiop, flags, 3183 (so->so_options & SO_OOBINLINE))); 3184 } 3185 3186 so_update_attrs(so, SOACC); 3187 3188 /* 3189 * Set msg_controllen and msg_namelen to zero here to make it 3190 * simpler in the cases that no control or name is returned. 3191 */ 3192 controllen = msg->msg_controllen; 3193 namelen = msg->msg_namelen; 3194 msg->msg_controllen = 0; 3195 msg->msg_namelen = 0; 3196 3197 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3198 namelen, controllen)); 3199 3200 mutex_enter(&so->so_lock); 3201 /* 3202 * If an NL7C enabled socket and not waiting for write data. 3203 */ 3204 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3205 NL7C_ENABLED) { 3206 if (sti->sti_nl7c_uri) { 3207 /* Close uri processing for a previous request */ 3208 nl7c_close(so); 3209 } 3210 if ((so_state & SS_CANTRCVMORE) && 3211 sti->sti_nl7c_rcv_mp == NULL) { 3212 /* Nothing to process, EOF */ 3213 mutex_exit(&so->so_lock); 3214 return (0); 3215 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3216 /* Persistent NL7C socket, try to process request */ 3217 boolean_t ret; 3218 3219 ret = nl7c_process(so, 3220 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3221 rval.r_vals = sti->sti_nl7c_rcv_rval; 3222 error = rval.r_v.r_v2; 3223 if (error) { 3224 /* Error of some sort, return it */ 3225 mutex_exit(&so->so_lock); 3226 return (error); 3227 } 3228 if (sti->sti_nl7c_flags && 3229 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3230 /* 3231 * Still an NL7C socket and no data 3232 * to pass up to the caller. 3233 */ 3234 mutex_exit(&so->so_lock); 3235 if (ret) { 3236 /* EOF */ 3237 return (0); 3238 } else { 3239 /* Need more data */ 3240 return (EAGAIN); 3241 } 3242 } 3243 } else { 3244 /* 3245 * Not persistent so no further NL7C processing. 3246 */ 3247 sti->sti_nl7c_flags = 0; 3248 } 3249 } 3250 /* 3251 * Only one reader is allowed at any given time. This is needed 3252 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3253 * 3254 * This is slightly different that BSD behavior in that it fails with 3255 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3256 * is single-threaded using sblock(), which is dropped while waiting 3257 * for data to appear. The difference shows up e.g. if one 3258 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3259 * does use nonblocking io and different threads are reading each 3260 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3261 * in this case as long as the read queue doesn't get empty. 3262 * In this implementation the thread using nonblocking io can 3263 * get an EWOULDBLOCK error due to the blocking thread executing 3264 * e.g. in the uiomove in kstrgetmsg. 3265 * This difference is not believed to be significant. 3266 */ 3267 /* Set SOREADLOCKED */ 3268 error = so_lock_read_intr(so, 3269 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3270 mutex_exit(&so->so_lock); 3271 if (error) 3272 return (error); 3273 3274 /* 3275 * Tell kstrgetmsg to not inspect the stream head errors until all 3276 * queued data has been consumed. 3277 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3278 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3279 * 3280 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3281 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3282 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3283 */ 3284 pflag = MSG_ANY | MSG_DELAYERROR; 3285 if (flags & MSG_PEEK) { 3286 pflag |= MSG_IPEEK; 3287 flags &= ~MSG_WAITALL; 3288 } 3289 if (so->so_mode & SM_ATOMIC) 3290 pflag |= MSG_DISCARDTAIL; 3291 3292 if (flags & MSG_DONTWAIT) 3293 timout = 0; 3294 else 3295 timout = -1; 3296 opflag = pflag; 3297 3298 suiop = sod_rcv_init(so, flags, &uiop); 3299 retry: 3300 saved_resid = uiop->uio_resid; 3301 pri = 0; 3302 mp = NULL; 3303 if (sti->sti_nl7c_rcv_mp != NULL) { 3304 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3305 error = nl7c_sorecv(so, &mp, uiop, &rval); 3306 } else { 3307 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3308 timout, &rval); 3309 } 3310 if (error != 0) { 3311 /* kstrgetmsg returns ETIME when timeout expires */ 3312 if (error == ETIME) 3313 error = EWOULDBLOCK; 3314 goto out; 3315 } 3316 /* 3317 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3318 * For non-datagrams MOREDATA is used to set MSG_EOR. 3319 */ 3320 ASSERT(!(rval.r_val1 & MORECTL)); 3321 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3322 msg->msg_flags |= MSG_TRUNC; 3323 3324 if (mp == NULL) { 3325 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3326 /* 3327 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3328 * The draft Posix socket spec states that the mark should 3329 * not be cleared when peeking. We follow the latter. 3330 */ 3331 if ((so->so_state & 3332 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3333 (uiop->uio_resid != saved_resid) && 3334 !(flags & MSG_PEEK)) { 3335 sorecv_update_oobstate(so); 3336 } 3337 3338 mutex_enter(&so->so_lock); 3339 /* Set MSG_EOR based on MOREDATA */ 3340 if (!(rval.r_val1 & MOREDATA)) { 3341 if (so->so_state & SS_SAVEDEOR) { 3342 msg->msg_flags |= MSG_EOR; 3343 so->so_state &= ~SS_SAVEDEOR; 3344 } 3345 } 3346 /* 3347 * If some data was received (i.e. not EOF) and the 3348 * read/recv* has not been satisfied wait for some more. 3349 */ 3350 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3351 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3352 mutex_exit(&so->so_lock); 3353 pflag = opflag | MSG_NOMARK; 3354 goto retry; 3355 } 3356 goto out_locked; 3357 } 3358 3359 /* strsock_proto has already verified length and alignment */ 3360 tpr = (union T_primitives *)mp->b_rptr; 3361 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3362 3363 switch (tpr->type) { 3364 case T_DATA_IND: { 3365 if ((so->so_state & 3366 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3367 (uiop->uio_resid != saved_resid) && 3368 !(flags & MSG_PEEK)) { 3369 sorecv_update_oobstate(so); 3370 } 3371 3372 /* 3373 * Set msg_flags to MSG_EOR based on 3374 * MORE_flag and MOREDATA. 3375 */ 3376 mutex_enter(&so->so_lock); 3377 so->so_state &= ~SS_SAVEDEOR; 3378 if (!(tpr->data_ind.MORE_flag & 1)) { 3379 if (!(rval.r_val1 & MOREDATA)) 3380 msg->msg_flags |= MSG_EOR; 3381 else 3382 so->so_state |= SS_SAVEDEOR; 3383 } 3384 freemsg(mp); 3385 /* 3386 * If some data was received (i.e. not EOF) and the 3387 * read/recv* has not been satisfied wait for some more. 3388 */ 3389 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3390 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3391 mutex_exit(&so->so_lock); 3392 pflag = opflag | MSG_NOMARK; 3393 goto retry; 3394 } 3395 goto out_locked; 3396 } 3397 case T_UNITDATA_IND: { 3398 void *addr; 3399 t_uscalar_t addrlen; 3400 void *abuf; 3401 t_uscalar_t optlen; 3402 void *opt; 3403 3404 if ((so->so_state & 3405 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3406 (uiop->uio_resid != saved_resid) && 3407 !(flags & MSG_PEEK)) { 3408 sorecv_update_oobstate(so); 3409 } 3410 3411 if (namelen != 0) { 3412 /* Caller wants source address */ 3413 addrlen = tpr->unitdata_ind.SRC_length; 3414 addr = sogetoff(mp, 3415 tpr->unitdata_ind.SRC_offset, 3416 addrlen, 1); 3417 if (addr == NULL) { 3418 freemsg(mp); 3419 error = EPROTO; 3420 eprintsoline(so, error); 3421 goto out; 3422 } 3423 if (so->so_family == AF_UNIX) { 3424 /* 3425 * Can not use the transport level address. 3426 * If there is a SO_SRCADDR option carrying 3427 * the socket level address it will be 3428 * extracted below. 3429 */ 3430 addr = NULL; 3431 addrlen = 0; 3432 } 3433 } 3434 optlen = tpr->unitdata_ind.OPT_length; 3435 if (optlen != 0) { 3436 t_uscalar_t ncontrollen; 3437 3438 /* 3439 * Extract any source address option. 3440 * Determine how large cmsg buffer is needed. 3441 */ 3442 opt = sogetoff(mp, 3443 tpr->unitdata_ind.OPT_offset, 3444 optlen, __TPI_ALIGN_SIZE); 3445 3446 if (opt == NULL) { 3447 freemsg(mp); 3448 error = EPROTO; 3449 eprintsoline(so, error); 3450 goto out; 3451 } 3452 if (so->so_family == AF_UNIX) 3453 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3454 ncontrollen = so_cmsglen(mp, opt, optlen, 3455 !(flags & MSG_XPG4_2)); 3456 if (controllen != 0) 3457 controllen = ncontrollen; 3458 else if (ncontrollen != 0) 3459 msg->msg_flags |= MSG_CTRUNC; 3460 } else { 3461 controllen = 0; 3462 } 3463 3464 if (namelen != 0) { 3465 /* 3466 * Return address to caller. 3467 * Caller handles truncation if length 3468 * exceeds msg_namelen. 3469 * NOTE: AF_UNIX NUL termination is ensured by 3470 * the sender's copyin_name(). 3471 */ 3472 abuf = kmem_alloc(addrlen, KM_SLEEP); 3473 3474 bcopy(addr, abuf, addrlen); 3475 msg->msg_name = abuf; 3476 msg->msg_namelen = addrlen; 3477 } 3478 3479 if (controllen != 0) { 3480 /* 3481 * Return control msg to caller. 3482 * Caller handles truncation if length 3483 * exceeds msg_controllen. 3484 */ 3485 control = kmem_zalloc(controllen, KM_SLEEP); 3486 3487 error = so_opt2cmsg(mp, opt, optlen, 3488 !(flags & MSG_XPG4_2), 3489 control, controllen); 3490 if (error) { 3491 freemsg(mp); 3492 if (msg->msg_namelen != 0) 3493 kmem_free(msg->msg_name, 3494 msg->msg_namelen); 3495 kmem_free(control, controllen); 3496 eprintsoline(so, error); 3497 goto out; 3498 } 3499 msg->msg_control = control; 3500 msg->msg_controllen = controllen; 3501 } 3502 3503 freemsg(mp); 3504 goto out; 3505 } 3506 case T_OPTDATA_IND: { 3507 struct T_optdata_req *tdr; 3508 void *opt; 3509 t_uscalar_t optlen; 3510 3511 if ((so->so_state & 3512 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3513 (uiop->uio_resid != saved_resid) && 3514 !(flags & MSG_PEEK)) { 3515 sorecv_update_oobstate(so); 3516 } 3517 3518 tdr = (struct T_optdata_req *)mp->b_rptr; 3519 optlen = tdr->OPT_length; 3520 if (optlen != 0) { 3521 t_uscalar_t ncontrollen; 3522 /* 3523 * Determine how large cmsg buffer is needed. 3524 */ 3525 opt = sogetoff(mp, 3526 tpr->optdata_ind.OPT_offset, 3527 optlen, __TPI_ALIGN_SIZE); 3528 3529 if (opt == NULL) { 3530 freemsg(mp); 3531 error = EPROTO; 3532 eprintsoline(so, error); 3533 goto out; 3534 } 3535 3536 ncontrollen = so_cmsglen(mp, opt, optlen, 3537 !(flags & MSG_XPG4_2)); 3538 if (controllen != 0) 3539 controllen = ncontrollen; 3540 else if (ncontrollen != 0) 3541 msg->msg_flags |= MSG_CTRUNC; 3542 } else { 3543 controllen = 0; 3544 } 3545 3546 if (controllen != 0) { 3547 /* 3548 * Return control msg to caller. 3549 * Caller handles truncation if length 3550 * exceeds msg_controllen. 3551 */ 3552 control = kmem_zalloc(controllen, KM_SLEEP); 3553 3554 error = so_opt2cmsg(mp, opt, optlen, 3555 !(flags & MSG_XPG4_2), 3556 control, controllen); 3557 if (error) { 3558 freemsg(mp); 3559 kmem_free(control, controllen); 3560 eprintsoline(so, error); 3561 goto out; 3562 } 3563 msg->msg_control = control; 3564 msg->msg_controllen = controllen; 3565 } 3566 3567 /* 3568 * Set msg_flags to MSG_EOR based on 3569 * DATA_flag and MOREDATA. 3570 */ 3571 mutex_enter(&so->so_lock); 3572 so->so_state &= ~SS_SAVEDEOR; 3573 if (!(tpr->data_ind.MORE_flag & 1)) { 3574 if (!(rval.r_val1 & MOREDATA)) 3575 msg->msg_flags |= MSG_EOR; 3576 else 3577 so->so_state |= SS_SAVEDEOR; 3578 } 3579 freemsg(mp); 3580 /* 3581 * If some data was received (i.e. not EOF) and the 3582 * read/recv* has not been satisfied wait for some more. 3583 * Not possible to wait if control info was received. 3584 */ 3585 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3586 controllen == 0 && 3587 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3588 mutex_exit(&so->so_lock); 3589 pflag = opflag | MSG_NOMARK; 3590 goto retry; 3591 } 3592 goto out_locked; 3593 } 3594 case T_EXDATA_IND: { 3595 dprintso(so, 1, 3596 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3597 "state %s\n", 3598 sti->sti_oobsigcnt, sti->sti_oobcnt, 3599 saved_resid - uiop->uio_resid, 3600 pr_state(so->so_state, so->so_mode))); 3601 /* 3602 * kstrgetmsg handles MSGMARK so there is nothing to 3603 * inspect in the T_EXDATA_IND. 3604 * strsock_proto makes the stream head queue the T_EXDATA_IND 3605 * as a separate message with no M_DATA component. Furthermore, 3606 * the stream head does not consolidate M_DATA messages onto 3607 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3608 * remains a message by itself. This is needed since MSGMARK 3609 * marks both the whole message as well as the last byte 3610 * of the message. 3611 */ 3612 freemsg(mp); 3613 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3614 if (flags & MSG_PEEK) { 3615 /* 3616 * Even though we are peeking we consume the 3617 * T_EXDATA_IND thereby moving the mark information 3618 * to SS_RCVATMARK. Then the oob code below will 3619 * retry the peeking kstrgetmsg. 3620 * Note that the stream head read queue is 3621 * never flushed without holding SOREADLOCKED 3622 * thus the T_EXDATA_IND can not disappear 3623 * underneath us. 3624 */ 3625 dprintso(so, 1, 3626 ("sotpi_recvmsg: consume EXDATA_IND " 3627 "counts %d/%d state %s\n", 3628 sti->sti_oobsigcnt, 3629 sti->sti_oobcnt, 3630 pr_state(so->so_state, so->so_mode))); 3631 3632 pflag = MSG_ANY | MSG_DELAYERROR; 3633 if (so->so_mode & SM_ATOMIC) 3634 pflag |= MSG_DISCARDTAIL; 3635 3636 pri = 0; 3637 mp = NULL; 3638 3639 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3640 &pri, &pflag, (clock_t)-1, &rval); 3641 ASSERT(uiop->uio_resid == saved_resid); 3642 3643 if (error) { 3644 #ifdef SOCK_DEBUG 3645 if (error != EWOULDBLOCK && error != EINTR) { 3646 eprintsoline(so, error); 3647 } 3648 #endif /* SOCK_DEBUG */ 3649 goto out; 3650 } 3651 ASSERT(mp); 3652 tpr = (union T_primitives *)mp->b_rptr; 3653 ASSERT(tpr->type == T_EXDATA_IND); 3654 freemsg(mp); 3655 } /* end "if (flags & MSG_PEEK)" */ 3656 3657 /* 3658 * Decrement the number of queued and pending oob. 3659 * 3660 * SS_RCVATMARK is cleared when we read past a mark. 3661 * SS_HAVEOOBDATA is cleared when we've read past the 3662 * last mark. 3663 * SS_OOBPEND is cleared if we've read past the last 3664 * mark and no (new) SIGURG has been posted. 3665 */ 3666 mutex_enter(&so->so_lock); 3667 ASSERT(so_verify_oobstate(so)); 3668 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3669 ASSERT(sti->sti_oobsigcnt > 0); 3670 sti->sti_oobsigcnt--; 3671 ASSERT(sti->sti_oobcnt > 0); 3672 sti->sti_oobcnt--; 3673 /* 3674 * Since the T_EXDATA_IND has been removed from the stream 3675 * head, but we have not read data past the mark, 3676 * sockfs needs to track that the socket is still at the mark. 3677 * 3678 * Since no data was received call kstrgetmsg again to wait 3679 * for data. 3680 */ 3681 so->so_state |= SS_RCVATMARK; 3682 mutex_exit(&so->so_lock); 3683 dprintso(so, 1, 3684 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3685 sti->sti_oobsigcnt, sti->sti_oobcnt, 3686 pr_state(so->so_state, so->so_mode))); 3687 pflag = opflag; 3688 goto retry; 3689 } 3690 default: 3691 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3692 (void *)so, tpr->type, (void *)mp); 3693 ASSERT(0); 3694 freemsg(mp); 3695 error = EPROTO; 3696 eprintsoline(so, error); 3697 goto out; 3698 } 3699 /* NOTREACHED */ 3700 out: 3701 mutex_enter(&so->so_lock); 3702 out_locked: 3703 if (so->so_direct != NULL) { 3704 mutex_enter(so->so_direct->sod_lockp); 3705 reterr = sod_rcv_done(so, suiop, uiop); 3706 mutex_exit(so->so_direct->sod_lockp); 3707 } 3708 if (reterr != 0 && error == 0) 3709 error = reterr; 3710 so_unlock_read(so); /* Clear SOREADLOCKED */ 3711 mutex_exit(&so->so_lock); 3712 return (error); 3713 } 3714 3715 /* 3716 * Sending data with options on a datagram socket. 3717 * Assumes caller has verified that SS_ISBOUND etc. are set. 3718 */ 3719 static int 3720 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3721 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3722 { 3723 struct T_unitdata_req tudr; 3724 mblk_t *mp; 3725 int error; 3726 void *addr; 3727 socklen_t addrlen; 3728 void *src; 3729 socklen_t srclen; 3730 ssize_t len; 3731 int size; 3732 struct T_opthdr toh; 3733 struct fdbuf *fdbuf; 3734 t_uscalar_t optlen; 3735 void *fds; 3736 int fdlen; 3737 sotpi_info_t *sti = SOTOTPI(so); 3738 3739 ASSERT(name && namelen); 3740 ASSERT(control && controllen); 3741 3742 len = uiop->uio_resid; 3743 if (len > (ssize_t)sti->sti_tidu_size) { 3744 return (EMSGSIZE); 3745 } 3746 3747 /* 3748 * For AF_UNIX the destination address is translated to an internal 3749 * name and the source address is passed as an option. 3750 * Also, file descriptors are passed as file pointers in an 3751 * option. 3752 */ 3753 3754 /* 3755 * Length and family checks. 3756 */ 3757 error = so_addr_verify(so, name, namelen); 3758 if (error) { 3759 eprintsoline(so, error); 3760 return (error); 3761 } 3762 if (so->so_family == AF_UNIX) { 3763 if (sti->sti_faddr_noxlate) { 3764 /* 3765 * Already have a transport internal address. Do not 3766 * pass any (transport internal) source address. 3767 */ 3768 addr = name; 3769 addrlen = namelen; 3770 src = NULL; 3771 srclen = 0; 3772 } else { 3773 /* 3774 * Pass the sockaddr_un source address as an option 3775 * and translate the remote address. 3776 * 3777 * Note that this code does not prevent sti_laddr_sa 3778 * from changing while it is being used. Thus 3779 * if an unbind+bind occurs concurrently with this 3780 * send the peer might see a partially new and a 3781 * partially old "from" address. 3782 */ 3783 src = sti->sti_laddr_sa; 3784 srclen = (t_uscalar_t)sti->sti_laddr_len; 3785 dprintso(so, 1, 3786 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3787 srclen, src)); 3788 error = so_ux_addr_xlate(so, name, namelen, 3789 (flags & MSG_XPG4_2), 3790 &addr, &addrlen); 3791 if (error) { 3792 eprintsoline(so, error); 3793 return (error); 3794 } 3795 } 3796 } else { 3797 addr = name; 3798 addrlen = namelen; 3799 src = NULL; 3800 srclen = 0; 3801 } 3802 optlen = so_optlen(control, controllen, 3803 !(flags & MSG_XPG4_2)); 3804 tudr.PRIM_type = T_UNITDATA_REQ; 3805 tudr.DEST_length = addrlen; 3806 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3807 if (srclen != 0) 3808 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3809 _TPI_ALIGN_TOPT(srclen)); 3810 else 3811 tudr.OPT_length = optlen; 3812 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3813 _TPI_ALIGN_TOPT(addrlen)); 3814 3815 size = tudr.OPT_offset + tudr.OPT_length; 3816 3817 /* 3818 * File descriptors only when SM_FDPASSING set. 3819 */ 3820 error = so_getfdopt(control, controllen, 3821 !(flags & MSG_XPG4_2), &fds, &fdlen); 3822 if (error) 3823 return (error); 3824 if (fdlen != -1) { 3825 if (!(so->so_mode & SM_FDPASSING)) 3826 return (EOPNOTSUPP); 3827 3828 error = fdbuf_create(fds, fdlen, &fdbuf); 3829 if (error) 3830 return (error); 3831 mp = fdbuf_allocmsg(size, fdbuf); 3832 } else { 3833 mp = soallocproto(size, _ALLOC_INTR); 3834 if (mp == NULL) { 3835 /* 3836 * Caught a signal waiting for memory. 3837 * Let send* return EINTR. 3838 */ 3839 return (EINTR); 3840 } 3841 } 3842 soappendmsg(mp, &tudr, sizeof (tudr)); 3843 soappendmsg(mp, addr, addrlen); 3844 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3845 3846 if (fdlen != -1) { 3847 ASSERT(fdbuf != NULL); 3848 toh.level = SOL_SOCKET; 3849 toh.name = SO_FILEP; 3850 toh.len = fdbuf->fd_size + 3851 (t_uscalar_t)sizeof (struct T_opthdr); 3852 toh.status = 0; 3853 soappendmsg(mp, &toh, sizeof (toh)); 3854 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3855 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3856 } 3857 if (srclen != 0) { 3858 /* 3859 * There is a AF_UNIX sockaddr_un to include as a source 3860 * address option. 3861 */ 3862 toh.level = SOL_SOCKET; 3863 toh.name = SO_SRCADDR; 3864 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3865 toh.status = 0; 3866 soappendmsg(mp, &toh, sizeof (toh)); 3867 soappendmsg(mp, src, srclen); 3868 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3869 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3870 } 3871 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3872 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3873 /* At most 3 bytes left in the message */ 3874 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3875 ASSERT(MBLKL(mp) <= (ssize_t)size); 3876 3877 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3878 if (audit_active) 3879 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3880 3881 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3882 #ifdef SOCK_DEBUG 3883 if (error) { 3884 eprintsoline(so, error); 3885 } 3886 #endif /* SOCK_DEBUG */ 3887 return (error); 3888 } 3889 3890 /* 3891 * Sending data with options on a connected stream socket. 3892 * Assumes caller has verified that SS_ISCONNECTED is set. 3893 */ 3894 static int 3895 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3896 t_uscalar_t controllen, int flags) 3897 { 3898 struct T_optdata_req tdr; 3899 mblk_t *mp; 3900 int error; 3901 ssize_t iosize; 3902 int size; 3903 struct fdbuf *fdbuf; 3904 t_uscalar_t optlen; 3905 void *fds; 3906 int fdlen; 3907 struct T_opthdr toh; 3908 sotpi_info_t *sti = SOTOTPI(so); 3909 3910 dprintso(so, 1, 3911 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3912 3913 /* 3914 * Has to be bound and connected. However, since no locks are 3915 * held the state could have changed after sotpi_sendmsg checked it 3916 * thus it is not possible to ASSERT on the state. 3917 */ 3918 3919 /* Options on connection-oriented only when SM_OPTDATA set. */ 3920 if (!(so->so_mode & SM_OPTDATA)) 3921 return (EOPNOTSUPP); 3922 3923 do { 3924 /* 3925 * Set the MORE flag if uio_resid does not fit in this 3926 * message or if the caller passed in "more". 3927 * Error for transports with zero tidu_size. 3928 */ 3929 tdr.PRIM_type = T_OPTDATA_REQ; 3930 iosize = sti->sti_tidu_size; 3931 if (iosize <= 0) 3932 return (EMSGSIZE); 3933 if (uiop->uio_resid > iosize) { 3934 tdr.DATA_flag = 1; 3935 } else { 3936 if (more) 3937 tdr.DATA_flag = 1; 3938 else 3939 tdr.DATA_flag = 0; 3940 iosize = uiop->uio_resid; 3941 } 3942 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3943 tdr.DATA_flag, iosize)); 3944 3945 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3946 tdr.OPT_length = optlen; 3947 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3948 3949 size = (int)sizeof (tdr) + optlen; 3950 /* 3951 * File descriptors only when SM_FDPASSING set. 3952 */ 3953 error = so_getfdopt(control, controllen, 3954 !(flags & MSG_XPG4_2), &fds, &fdlen); 3955 if (error) 3956 return (error); 3957 if (fdlen != -1) { 3958 if (!(so->so_mode & SM_FDPASSING)) 3959 return (EOPNOTSUPP); 3960 3961 error = fdbuf_create(fds, fdlen, &fdbuf); 3962 if (error) 3963 return (error); 3964 mp = fdbuf_allocmsg(size, fdbuf); 3965 } else { 3966 mp = soallocproto(size, _ALLOC_INTR); 3967 if (mp == NULL) { 3968 /* 3969 * Caught a signal waiting for memory. 3970 * Let send* return EINTR. 3971 */ 3972 return (EINTR); 3973 } 3974 } 3975 soappendmsg(mp, &tdr, sizeof (tdr)); 3976 3977 if (fdlen != -1) { 3978 ASSERT(fdbuf != NULL); 3979 toh.level = SOL_SOCKET; 3980 toh.name = SO_FILEP; 3981 toh.len = fdbuf->fd_size + 3982 (t_uscalar_t)sizeof (struct T_opthdr); 3983 toh.status = 0; 3984 soappendmsg(mp, &toh, sizeof (toh)); 3985 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3986 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3987 } 3988 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3989 /* At most 3 bytes left in the message */ 3990 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3991 ASSERT(MBLKL(mp) <= (ssize_t)size); 3992 3993 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3994 3995 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3996 0, MSG_BAND, 0); 3997 if (error) { 3998 eprintsoline(so, error); 3999 return (error); 4000 } 4001 control = NULL; 4002 if (uiop->uio_resid > 0) { 4003 /* 4004 * Recheck for fatal errors. Fail write even though 4005 * some data have been written. This is consistent 4006 * with strwrite semantics and BSD sockets semantics. 4007 */ 4008 if (so->so_state & SS_CANTSENDMORE) { 4009 eprintsoline(so, error); 4010 return (EPIPE); 4011 } 4012 if (so->so_error != 0) { 4013 mutex_enter(&so->so_lock); 4014 error = sogeterr(so, B_TRUE); 4015 mutex_exit(&so->so_lock); 4016 if (error != 0) { 4017 eprintsoline(so, error); 4018 return (error); 4019 } 4020 } 4021 } 4022 } while (uiop->uio_resid > 0); 4023 return (0); 4024 } 4025 4026 /* 4027 * Sending data on a datagram socket. 4028 * Assumes caller has verified that SS_ISBOUND etc. are set. 4029 * 4030 * For AF_UNIX the destination address is translated to an internal 4031 * name and the source address is passed as an option. 4032 */ 4033 int 4034 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4035 struct uio *uiop, int flags) 4036 { 4037 struct T_unitdata_req tudr; 4038 mblk_t *mp; 4039 int error; 4040 void *addr; 4041 socklen_t addrlen; 4042 void *src; 4043 socklen_t srclen; 4044 ssize_t len; 4045 sotpi_info_t *sti = SOTOTPI(so); 4046 4047 ASSERT(name != NULL && namelen != 0); 4048 4049 len = uiop->uio_resid; 4050 if (len > sti->sti_tidu_size) { 4051 error = EMSGSIZE; 4052 goto done; 4053 } 4054 4055 /* Length and family checks */ 4056 error = so_addr_verify(so, name, namelen); 4057 if (error != 0) 4058 goto done; 4059 4060 if (sti->sti_direct) 4061 return (sodgram_direct(so, name, namelen, uiop, flags)); 4062 4063 if (so->so_family == AF_UNIX) { 4064 if (sti->sti_faddr_noxlate) { 4065 /* 4066 * Already have a transport internal address. Do not 4067 * pass any (transport internal) source address. 4068 */ 4069 addr = name; 4070 addrlen = namelen; 4071 src = NULL; 4072 srclen = 0; 4073 } else { 4074 /* 4075 * Pass the sockaddr_un source address as an option 4076 * and translate the remote address. 4077 * 4078 * Note that this code does not prevent sti_laddr_sa 4079 * from changing while it is being used. Thus 4080 * if an unbind+bind occurs concurrently with this 4081 * send the peer might see a partially new and a 4082 * partially old "from" address. 4083 */ 4084 src = sti->sti_laddr_sa; 4085 srclen = (socklen_t)sti->sti_laddr_len; 4086 dprintso(so, 1, 4087 ("sosend_dgram UNIX: srclen %d, src %p\n", 4088 srclen, src)); 4089 error = so_ux_addr_xlate(so, name, namelen, 4090 (flags & MSG_XPG4_2), 4091 &addr, &addrlen); 4092 if (error) { 4093 eprintsoline(so, error); 4094 goto done; 4095 } 4096 } 4097 } else { 4098 addr = name; 4099 addrlen = namelen; 4100 src = NULL; 4101 srclen = 0; 4102 } 4103 tudr.PRIM_type = T_UNITDATA_REQ; 4104 tudr.DEST_length = addrlen; 4105 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4106 if (srclen == 0) { 4107 tudr.OPT_length = 0; 4108 tudr.OPT_offset = 0; 4109 4110 mp = soallocproto2(&tudr, sizeof (tudr), 4111 addr, addrlen, 0, _ALLOC_INTR); 4112 if (mp == NULL) { 4113 /* 4114 * Caught a signal waiting for memory. 4115 * Let send* return EINTR. 4116 */ 4117 error = EINTR; 4118 goto done; 4119 } 4120 } else { 4121 /* 4122 * There is a AF_UNIX sockaddr_un to include as a source 4123 * address option. 4124 */ 4125 struct T_opthdr toh; 4126 ssize_t size; 4127 4128 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4129 _TPI_ALIGN_TOPT(srclen)); 4130 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4131 _TPI_ALIGN_TOPT(addrlen)); 4132 4133 toh.level = SOL_SOCKET; 4134 toh.name = SO_SRCADDR; 4135 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4136 toh.status = 0; 4137 4138 size = tudr.OPT_offset + tudr.OPT_length; 4139 mp = soallocproto2(&tudr, sizeof (tudr), 4140 addr, addrlen, size, _ALLOC_INTR); 4141 if (mp == NULL) { 4142 /* 4143 * Caught a signal waiting for memory. 4144 * Let send* return EINTR. 4145 */ 4146 error = EINTR; 4147 goto done; 4148 } 4149 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4150 soappendmsg(mp, &toh, sizeof (toh)); 4151 soappendmsg(mp, src, srclen); 4152 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4153 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4154 } 4155 4156 if (audit_active) 4157 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4158 4159 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4160 done: 4161 #ifdef SOCK_DEBUG 4162 if (error) { 4163 eprintsoline(so, error); 4164 } 4165 #endif /* SOCK_DEBUG */ 4166 return (error); 4167 } 4168 4169 /* 4170 * Sending data on a connected stream socket. 4171 * Assumes caller has verified that SS_ISCONNECTED is set. 4172 */ 4173 int 4174 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4175 int sflag) 4176 { 4177 struct T_data_req tdr; 4178 mblk_t *mp; 4179 int error; 4180 ssize_t iosize; 4181 sotpi_info_t *sti = SOTOTPI(so); 4182 4183 dprintso(so, 1, 4184 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4185 (void *)so, uiop->uio_resid, prim, sflag)); 4186 4187 /* 4188 * Has to be bound and connected. However, since no locks are 4189 * held the state could have changed after sotpi_sendmsg checked it 4190 * thus it is not possible to ASSERT on the state. 4191 */ 4192 4193 do { 4194 /* 4195 * Set the MORE flag if uio_resid does not fit in this 4196 * message or if the caller passed in "more". 4197 * Error for transports with zero tidu_size. 4198 */ 4199 tdr.PRIM_type = prim; 4200 iosize = sti->sti_tidu_size; 4201 if (iosize <= 0) 4202 return (EMSGSIZE); 4203 if (uiop->uio_resid > iosize) { 4204 tdr.MORE_flag = 1; 4205 } else { 4206 if (more) 4207 tdr.MORE_flag = 1; 4208 else 4209 tdr.MORE_flag = 0; 4210 iosize = uiop->uio_resid; 4211 } 4212 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4213 prim, tdr.MORE_flag, iosize)); 4214 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 4215 if (mp == NULL) { 4216 /* 4217 * Caught a signal waiting for memory. 4218 * Let send* return EINTR. 4219 */ 4220 return (EINTR); 4221 } 4222 4223 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4224 0, sflag | MSG_BAND, 0); 4225 if (error) { 4226 eprintsoline(so, error); 4227 return (error); 4228 } 4229 if (uiop->uio_resid > 0) { 4230 /* 4231 * Recheck for fatal errors. Fail write even though 4232 * some data have been written. This is consistent 4233 * with strwrite semantics and BSD sockets semantics. 4234 */ 4235 if (so->so_state & SS_CANTSENDMORE) { 4236 eprintsoline(so, error); 4237 return (EPIPE); 4238 } 4239 if (so->so_error != 0) { 4240 mutex_enter(&so->so_lock); 4241 error = sogeterr(so, B_TRUE); 4242 mutex_exit(&so->so_lock); 4243 if (error != 0) { 4244 eprintsoline(so, error); 4245 return (error); 4246 } 4247 } 4248 } 4249 } while (uiop->uio_resid > 0); 4250 return (0); 4251 } 4252 4253 /* 4254 * Check the state for errors and call the appropriate send function. 4255 * 4256 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4257 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4258 * after sending the message. 4259 */ 4260 static int 4261 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4262 struct cred *cr) 4263 { 4264 int so_state; 4265 int so_mode; 4266 int error; 4267 struct sockaddr *name; 4268 t_uscalar_t namelen; 4269 int dontroute; 4270 int flags; 4271 sotpi_info_t *sti = SOTOTPI(so); 4272 4273 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4274 (void *)so, (void *)msg, msg->msg_flags, 4275 pr_state(so->so_state, so->so_mode), so->so_error)); 4276 4277 if (so->so_version == SOV_STREAM) { 4278 /* The imaginary "sockmod" has been popped - act as a stream */ 4279 so_update_attrs(so, SOMOD); 4280 return (strwrite(SOTOV(so), uiop, cr)); 4281 } 4282 4283 mutex_enter(&so->so_lock); 4284 so_state = so->so_state; 4285 4286 if (so_state & SS_CANTSENDMORE) { 4287 mutex_exit(&so->so_lock); 4288 return (EPIPE); 4289 } 4290 4291 if (so->so_error != 0) { 4292 error = sogeterr(so, B_TRUE); 4293 if (error != 0) { 4294 mutex_exit(&so->so_lock); 4295 return (error); 4296 } 4297 } 4298 4299 name = (struct sockaddr *)msg->msg_name; 4300 namelen = msg->msg_namelen; 4301 4302 so_mode = so->so_mode; 4303 4304 if (name == NULL) { 4305 if (!(so_state & SS_ISCONNECTED)) { 4306 mutex_exit(&so->so_lock); 4307 if (so_mode & SM_CONNREQUIRED) 4308 return (ENOTCONN); 4309 else 4310 return (EDESTADDRREQ); 4311 } 4312 if (so_mode & SM_CONNREQUIRED) { 4313 name = NULL; 4314 namelen = 0; 4315 } else { 4316 /* 4317 * Note that this code does not prevent sti_faddr_sa 4318 * from changing while it is being used. Thus 4319 * if an "unconnect"+connect occurs concurrently with 4320 * this send the datagram might be delivered to a 4321 * garbaled address. 4322 */ 4323 ASSERT(sti->sti_faddr_sa); 4324 name = sti->sti_faddr_sa; 4325 namelen = (t_uscalar_t)sti->sti_faddr_len; 4326 } 4327 } else { 4328 if (!(so_state & SS_ISCONNECTED) && 4329 (so_mode & SM_CONNREQUIRED)) { 4330 /* Required but not connected */ 4331 mutex_exit(&so->so_lock); 4332 return (ENOTCONN); 4333 } 4334 /* 4335 * Ignore the address on connection-oriented sockets. 4336 * Just like BSD this code does not generate an error for 4337 * TCP (a CONNREQUIRED socket) when sending to an address 4338 * passed in with sendto/sendmsg. Instead the data is 4339 * delivered on the connection as if no address had been 4340 * supplied. 4341 */ 4342 if ((so_state & SS_ISCONNECTED) && 4343 !(so_mode & SM_CONNREQUIRED)) { 4344 mutex_exit(&so->so_lock); 4345 return (EISCONN); 4346 } 4347 if (!(so_state & SS_ISBOUND)) { 4348 so_lock_single(so); /* Set SOLOCKED */ 4349 error = sotpi_bind(so, NULL, 0, 4350 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4351 so_unlock_single(so, SOLOCKED); 4352 if (error) { 4353 mutex_exit(&so->so_lock); 4354 eprintsoline(so, error); 4355 return (error); 4356 } 4357 } 4358 /* 4359 * Handle delayed datagram errors. These are only queued 4360 * when the application sets SO_DGRAM_ERRIND. 4361 * Return the error if we are sending to the address 4362 * that was returned in the last T_UDERROR_IND. 4363 * If sending to some other address discard the delayed 4364 * error indication. 4365 */ 4366 if (sti->sti_delayed_error) { 4367 struct T_uderror_ind *tudi; 4368 void *addr; 4369 t_uscalar_t addrlen; 4370 boolean_t match = B_FALSE; 4371 4372 ASSERT(sti->sti_eaddr_mp); 4373 error = sti->sti_delayed_error; 4374 sti->sti_delayed_error = 0; 4375 tudi = 4376 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4377 addrlen = tudi->DEST_length; 4378 addr = sogetoff(sti->sti_eaddr_mp, 4379 tudi->DEST_offset, addrlen, 1); 4380 ASSERT(addr); /* Checked by strsock_proto */ 4381 switch (so->so_family) { 4382 case AF_INET: { 4383 /* Compare just IP address and port */ 4384 sin_t *sin1 = (sin_t *)name; 4385 sin_t *sin2 = (sin_t *)addr; 4386 4387 if (addrlen == sizeof (sin_t) && 4388 namelen == addrlen && 4389 sin1->sin_port == sin2->sin_port && 4390 sin1->sin_addr.s_addr == 4391 sin2->sin_addr.s_addr) 4392 match = B_TRUE; 4393 break; 4394 } 4395 case AF_INET6: { 4396 /* Compare just IP address and port. Not flow */ 4397 sin6_t *sin1 = (sin6_t *)name; 4398 sin6_t *sin2 = (sin6_t *)addr; 4399 4400 if (addrlen == sizeof (sin6_t) && 4401 namelen == addrlen && 4402 sin1->sin6_port == sin2->sin6_port && 4403 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4404 &sin2->sin6_addr)) 4405 match = B_TRUE; 4406 break; 4407 } 4408 case AF_UNIX: 4409 default: 4410 if (namelen == addrlen && 4411 bcmp(name, addr, namelen) == 0) 4412 match = B_TRUE; 4413 } 4414 if (match) { 4415 freemsg(sti->sti_eaddr_mp); 4416 sti->sti_eaddr_mp = NULL; 4417 mutex_exit(&so->so_lock); 4418 #ifdef DEBUG 4419 dprintso(so, 0, 4420 ("sockfs delayed error %d for %s\n", 4421 error, 4422 pr_addr(so->so_family, name, namelen))); 4423 #endif /* DEBUG */ 4424 return (error); 4425 } 4426 freemsg(sti->sti_eaddr_mp); 4427 sti->sti_eaddr_mp = NULL; 4428 } 4429 } 4430 mutex_exit(&so->so_lock); 4431 4432 flags = msg->msg_flags; 4433 dontroute = 0; 4434 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4435 uint32_t val; 4436 4437 val = 1; 4438 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4439 &val, (t_uscalar_t)sizeof (val), cr); 4440 if (error) 4441 return (error); 4442 dontroute = 1; 4443 } 4444 4445 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4446 error = EOPNOTSUPP; 4447 goto done; 4448 } 4449 if (msg->msg_controllen != 0) { 4450 if (!(so_mode & SM_CONNREQUIRED)) { 4451 so_update_attrs(so, SOMOD); 4452 error = sosend_dgramcmsg(so, name, namelen, uiop, 4453 msg->msg_control, msg->msg_controllen, flags); 4454 } else { 4455 if (flags & MSG_OOB) { 4456 /* Can't generate T_EXDATA_REQ with options */ 4457 error = EOPNOTSUPP; 4458 goto done; 4459 } 4460 so_update_attrs(so, SOMOD); 4461 error = sosend_svccmsg(so, uiop, 4462 !(flags & MSG_EOR), 4463 msg->msg_control, msg->msg_controllen, 4464 flags); 4465 } 4466 goto done; 4467 } 4468 4469 so_update_attrs(so, SOMOD); 4470 if (!(so_mode & SM_CONNREQUIRED)) { 4471 /* 4472 * If there is no SO_DONTROUTE to turn off return immediately 4473 * from send_dgram. This can allow tail-call optimizations. 4474 */ 4475 if (!dontroute) { 4476 return (sosend_dgram(so, name, namelen, uiop, flags)); 4477 } 4478 error = sosend_dgram(so, name, namelen, uiop, flags); 4479 } else { 4480 t_scalar_t prim; 4481 int sflag; 4482 4483 /* Ignore msg_name in the connected state */ 4484 if (flags & MSG_OOB) { 4485 prim = T_EXDATA_REQ; 4486 /* 4487 * Send down T_EXDATA_REQ even if there is flow 4488 * control for data. 4489 */ 4490 sflag = MSG_IGNFLOW; 4491 } else { 4492 if (so_mode & SM_BYTESTREAM) { 4493 /* Byte stream transport - use write */ 4494 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4495 4496 /* Send M_DATA messages */ 4497 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4498 (error = nl7c_data(so, uiop)) >= 0) { 4499 /* NL7C consumed the data */ 4500 return (error); 4501 } 4502 /* 4503 * If there is no SO_DONTROUTE to turn off, 4504 * sti_direct is on, and there is no flow 4505 * control, we can take the fast path. 4506 */ 4507 if (!dontroute && sti->sti_direct != 0 && 4508 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4509 return (sostream_direct(so, uiop, 4510 NULL, cr)); 4511 } 4512 error = strwrite(SOTOV(so), uiop, cr); 4513 goto done; 4514 } 4515 prim = T_DATA_REQ; 4516 sflag = 0; 4517 } 4518 /* 4519 * If there is no SO_DONTROUTE to turn off return immediately 4520 * from sosend_svc. This can allow tail-call optimizations. 4521 */ 4522 if (!dontroute) 4523 return (sosend_svc(so, uiop, prim, 4524 !(flags & MSG_EOR), sflag)); 4525 error = sosend_svc(so, uiop, prim, 4526 !(flags & MSG_EOR), sflag); 4527 } 4528 ASSERT(dontroute); 4529 done: 4530 if (dontroute) { 4531 uint32_t val; 4532 4533 val = 0; 4534 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4535 &val, (t_uscalar_t)sizeof (val), cr); 4536 } 4537 return (error); 4538 } 4539 4540 /* 4541 * kstrwritemp() has very similar semantics as that of strwrite(). 4542 * The main difference is it obtains mblks from the caller and also 4543 * does not do any copy as done in strwrite() from user buffers to 4544 * kernel buffers. 4545 * 4546 * Currently, this routine is used by sendfile to send data allocated 4547 * within the kernel without any copying. This interface does not use the 4548 * synchronous stream interface as synch. stream interface implies 4549 * copying. 4550 */ 4551 int 4552 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4553 { 4554 struct stdata *stp; 4555 struct queue *wqp; 4556 mblk_t *newmp; 4557 char waitflag; 4558 int tempmode; 4559 int error = 0; 4560 int done = 0; 4561 struct sonode *so; 4562 boolean_t direct; 4563 4564 ASSERT(vp->v_stream); 4565 stp = vp->v_stream; 4566 4567 so = VTOSO(vp); 4568 direct = _SOTOTPI(so)->sti_direct; 4569 4570 /* 4571 * This is the sockfs direct fast path. canputnext() need 4572 * not be accurate so we don't grab the sd_lock here. If 4573 * we get flow-controlled, we grab sd_lock just before the 4574 * do..while loop below to emulate what strwrite() does. 4575 */ 4576 wqp = stp->sd_wrq; 4577 if (canputnext(wqp) && direct && 4578 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4579 return (sostream_direct(so, NULL, mp, CRED())); 4580 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4581 /* Fast check of flags before acquiring the lock */ 4582 mutex_enter(&stp->sd_lock); 4583 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4584 mutex_exit(&stp->sd_lock); 4585 if (error != 0) { 4586 if (!(stp->sd_flag & STPLEX) && 4587 (stp->sd_wput_opt & SW_SIGPIPE)) { 4588 error = EPIPE; 4589 } 4590 return (error); 4591 } 4592 } 4593 4594 waitflag = WRITEWAIT; 4595 if (stp->sd_flag & OLDNDELAY) 4596 tempmode = fmode & ~FNDELAY; 4597 else 4598 tempmode = fmode; 4599 4600 mutex_enter(&stp->sd_lock); 4601 do { 4602 if (canputnext(wqp)) { 4603 mutex_exit(&stp->sd_lock); 4604 if (stp->sd_wputdatafunc != NULL) { 4605 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4606 NULL, NULL, NULL); 4607 if (newmp == NULL) { 4608 /* The caller will free mp */ 4609 return (ECOMM); 4610 } 4611 mp = newmp; 4612 } 4613 putnext(wqp, mp); 4614 return (0); 4615 } 4616 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4617 &done); 4618 } while (error == 0 && !done); 4619 4620 mutex_exit(&stp->sd_lock); 4621 /* 4622 * EAGAIN tells the application to try again. ENOMEM 4623 * is returned only if the memory allocation size 4624 * exceeds the physical limits of the system. ENOMEM 4625 * can't be true here. 4626 */ 4627 if (error == ENOMEM) 4628 error = EAGAIN; 4629 return (error); 4630 } 4631 4632 /* ARGSUSED */ 4633 static int 4634 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4635 struct cred *cr, mblk_t **mpp) 4636 { 4637 int error; 4638 4639 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4640 return (EAFNOSUPPORT); 4641 4642 if (so->so_state & SS_CANTSENDMORE) 4643 return (EPIPE); 4644 4645 if (so->so_type != SOCK_STREAM) 4646 return (EOPNOTSUPP); 4647 4648 if ((so->so_state & SS_ISCONNECTED) == 0) 4649 return (ENOTCONN); 4650 4651 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4652 if (error == 0) 4653 *mpp = NULL; 4654 return (error); 4655 } 4656 4657 /* 4658 * Sending data on a datagram socket. 4659 * Assumes caller has verified that SS_ISBOUND etc. are set. 4660 */ 4661 /* ARGSUSED */ 4662 static int 4663 sodgram_direct(struct sonode *so, struct sockaddr *name, 4664 socklen_t namelen, struct uio *uiop, int flags) 4665 { 4666 struct T_unitdata_req tudr; 4667 mblk_t *mp = NULL; 4668 int error = 0; 4669 void *addr; 4670 socklen_t addrlen; 4671 ssize_t len; 4672 struct stdata *stp = SOTOV(so)->v_stream; 4673 int so_state; 4674 queue_t *udp_wq; 4675 boolean_t connected; 4676 mblk_t *mpdata = NULL; 4677 sotpi_info_t *sti = SOTOTPI(so); 4678 4679 ASSERT(name != NULL && namelen != 0); 4680 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4681 ASSERT(!(so->so_mode & SM_EXDATA)); 4682 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4683 ASSERT(SOTOV(so)->v_type == VSOCK); 4684 4685 /* Caller checked for proper length */ 4686 len = uiop->uio_resid; 4687 ASSERT(len <= sti->sti_tidu_size); 4688 4689 /* Length and family checks have been done by caller */ 4690 ASSERT(name->sa_family == so->so_family); 4691 ASSERT(so->so_family == AF_INET || 4692 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4693 ASSERT(so->so_family == AF_INET6 || 4694 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4695 4696 addr = name; 4697 addrlen = namelen; 4698 4699 if (stp->sd_sidp != NULL && 4700 (error = straccess(stp, JCWRITE)) != 0) 4701 goto done; 4702 4703 so_state = so->so_state; 4704 4705 connected = so_state & SS_ISCONNECTED; 4706 if (!connected) { 4707 tudr.PRIM_type = T_UNITDATA_REQ; 4708 tudr.DEST_length = addrlen; 4709 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4710 tudr.OPT_length = 0; 4711 tudr.OPT_offset = 0; 4712 4713 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4714 _ALLOC_INTR); 4715 if (mp == NULL) { 4716 /* 4717 * Caught a signal waiting for memory. 4718 * Let send* return EINTR. 4719 */ 4720 error = EINTR; 4721 goto done; 4722 } 4723 } 4724 4725 /* 4726 * For UDP we don't break up the copyin into smaller pieces 4727 * as in the TCP case. That means if ENOMEM is returned by 4728 * mcopyinuio() then the uio vector has not been modified at 4729 * all and we fallback to either strwrite() or kstrputmsg() 4730 * below. Note also that we never generate priority messages 4731 * from here. 4732 */ 4733 udp_wq = stp->sd_wrq->q_next; 4734 if (canput(udp_wq) && 4735 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4736 ASSERT(DB_TYPE(mpdata) == M_DATA); 4737 ASSERT(uiop->uio_resid == 0); 4738 if (!connected) 4739 linkb(mp, mpdata); 4740 else 4741 mp = mpdata; 4742 if (audit_active) 4743 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4744 4745 udp_wput(udp_wq, mp); 4746 return (0); 4747 } 4748 4749 ASSERT(mpdata == NULL); 4750 if (error != 0 && error != ENOMEM) { 4751 freemsg(mp); 4752 return (error); 4753 } 4754 4755 /* 4756 * For connected, let strwrite() handle the blocking case. 4757 * Otherwise we fall thru and use kstrputmsg(). 4758 */ 4759 if (connected) 4760 return (strwrite(SOTOV(so), uiop, CRED())); 4761 4762 if (audit_active) 4763 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4764 4765 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4766 done: 4767 #ifdef SOCK_DEBUG 4768 if (error != 0) { 4769 eprintsoline(so, error); 4770 } 4771 #endif /* SOCK_DEBUG */ 4772 return (error); 4773 } 4774 4775 int 4776 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4777 { 4778 struct stdata *stp = SOTOV(so)->v_stream; 4779 ssize_t iosize, rmax, maxblk; 4780 queue_t *tcp_wq = stp->sd_wrq->q_next; 4781 mblk_t *newmp; 4782 int error = 0, wflag = 0; 4783 4784 ASSERT(so->so_mode & SM_BYTESTREAM); 4785 ASSERT(SOTOV(so)->v_type == VSOCK); 4786 4787 if (stp->sd_sidp != NULL && 4788 (error = straccess(stp, JCWRITE)) != 0) 4789 return (error); 4790 4791 if (uiop == NULL) { 4792 /* 4793 * kstrwritemp() should have checked sd_flag and 4794 * flow-control before coming here. If we end up 4795 * here it means that we can simply pass down the 4796 * data to tcp. 4797 */ 4798 ASSERT(mp != NULL); 4799 if (stp->sd_wputdatafunc != NULL) { 4800 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4801 NULL, NULL, NULL); 4802 if (newmp == NULL) { 4803 /* The caller will free mp */ 4804 return (ECOMM); 4805 } 4806 mp = newmp; 4807 } 4808 tcp_wput(tcp_wq, mp); 4809 return (0); 4810 } 4811 4812 /* Fallback to strwrite() to do proper error handling */ 4813 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4814 return (strwrite(SOTOV(so), uiop, cr)); 4815 4816 rmax = stp->sd_qn_maxpsz; 4817 ASSERT(rmax >= 0 || rmax == INFPSZ); 4818 if (rmax == 0 || uiop->uio_resid <= 0) 4819 return (0); 4820 4821 if (rmax == INFPSZ) 4822 rmax = uiop->uio_resid; 4823 4824 maxblk = stp->sd_maxblk; 4825 4826 for (;;) { 4827 iosize = MIN(uiop->uio_resid, rmax); 4828 4829 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4830 if (mp == NULL) { 4831 /* 4832 * Fallback to strwrite() for ENOMEM; if this 4833 * is our first time in this routine and the uio 4834 * vector has not been modified, we will end up 4835 * calling strwrite() without any flag set. 4836 */ 4837 if (error == ENOMEM) 4838 goto slow_send; 4839 else 4840 return (error); 4841 } 4842 ASSERT(uiop->uio_resid >= 0); 4843 /* 4844 * If mp is non-NULL and ENOMEM is set, it means that 4845 * mcopyinuio() was able to break down some of the user 4846 * data into one or more mblks. Send the partial data 4847 * to tcp and let the rest be handled in strwrite(). 4848 */ 4849 ASSERT(error == 0 || error == ENOMEM); 4850 if (stp->sd_wputdatafunc != NULL) { 4851 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4852 NULL, NULL, NULL); 4853 if (newmp == NULL) { 4854 /* The caller will free mp */ 4855 return (ECOMM); 4856 } 4857 mp = newmp; 4858 } 4859 tcp_wput(tcp_wq, mp); 4860 4861 wflag |= NOINTR; 4862 4863 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4864 ASSERT(error == 0); 4865 break; 4866 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4867 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4868 slow_send: 4869 /* 4870 * We were able to send down partial data using 4871 * the direct call interface, but are now relying 4872 * on strwrite() to handle the non-fastpath cases. 4873 * If the socket is blocking we will sleep in 4874 * strwaitq() until write is permitted, otherwise, 4875 * we will need to return the amount of bytes 4876 * written so far back to the app. This is the 4877 * reason why we pass NOINTR flag to strwrite() 4878 * for non-blocking socket, because we don't want 4879 * to return EAGAIN when portion of the user data 4880 * has actually been sent down. 4881 */ 4882 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4883 } 4884 } 4885 return (0); 4886 } 4887 4888 /* 4889 * Update sti_faddr by asking the transport (unless AF_UNIX). 4890 */ 4891 /* ARGSUSED */ 4892 int 4893 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4894 boolean_t accept, struct cred *cr) 4895 { 4896 struct strbuf strbuf; 4897 int error = 0, res; 4898 void *addr; 4899 t_uscalar_t addrlen; 4900 k_sigset_t smask; 4901 sotpi_info_t *sti = SOTOTPI(so); 4902 4903 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4904 (void *)so, pr_state(so->so_state, so->so_mode))); 4905 4906 ASSERT(*namelen > 0); 4907 mutex_enter(&so->so_lock); 4908 so_lock_single(so); /* Set SOLOCKED */ 4909 4910 if (accept) { 4911 bcopy(sti->sti_faddr_sa, name, 4912 MIN(*namelen, sti->sti_faddr_len)); 4913 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4914 goto done; 4915 } 4916 4917 if (!(so->so_state & SS_ISCONNECTED)) { 4918 error = ENOTCONN; 4919 goto done; 4920 } 4921 /* Added this check for X/Open */ 4922 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4923 error = EINVAL; 4924 if (xnet_check_print) { 4925 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4926 } 4927 goto done; 4928 } 4929 4930 if (sti->sti_faddr_valid) { 4931 bcopy(sti->sti_faddr_sa, name, 4932 MIN(*namelen, sti->sti_faddr_len)); 4933 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4934 goto done; 4935 } 4936 4937 #ifdef DEBUG 4938 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4939 pr_addr(so->so_family, sti->sti_faddr_sa, 4940 (t_uscalar_t)sti->sti_faddr_len))); 4941 #endif /* DEBUG */ 4942 4943 if (so->so_family == AF_UNIX) { 4944 /* Transport has different name space - return local info */ 4945 if (sti->sti_faddr_noxlate) 4946 *namelen = 0; 4947 error = 0; 4948 goto done; 4949 } 4950 4951 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4952 4953 ASSERT(sti->sti_faddr_sa); 4954 /* Allocate local buffer to use with ioctl */ 4955 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4956 mutex_exit(&so->so_lock); 4957 addr = kmem_alloc(addrlen, KM_SLEEP); 4958 4959 /* 4960 * Issue TI_GETPEERNAME with signals masked. 4961 * Put the result in sti_faddr_sa so that getpeername works after 4962 * a shutdown(output). 4963 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4964 * back to the socket. 4965 */ 4966 strbuf.buf = addr; 4967 strbuf.maxlen = addrlen; 4968 strbuf.len = 0; 4969 4970 sigintr(&smask, 0); 4971 res = 0; 4972 ASSERT(cr); 4973 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4974 0, K_TO_K, cr, &res); 4975 sigunintr(&smask); 4976 4977 mutex_enter(&so->so_lock); 4978 /* 4979 * If there is an error record the error in so_error put don't fail 4980 * the getpeername. Instead fallback on the recorded 4981 * sti->sti_faddr_sa. 4982 */ 4983 if (error) { 4984 /* 4985 * Various stream head errors can be returned to the ioctl. 4986 * However, it is impossible to determine which ones of 4987 * these are really socket level errors that were incorrectly 4988 * consumed by the ioctl. Thus this code silently ignores the 4989 * error - to code explicitly does not reinstate the error 4990 * using soseterror(). 4991 * Experiments have shows that at least this set of 4992 * errors are reported and should not be reinstated on the 4993 * socket: 4994 * EINVAL E.g. if an I_LINK was in effect when 4995 * getpeername was called. 4996 * EPIPE The ioctl error semantics prefer the write 4997 * side error over the read side error. 4998 * ENOTCONN The transport just got disconnected but 4999 * sockfs had not yet seen the T_DISCON_IND 5000 * when issuing the ioctl. 5001 */ 5002 error = 0; 5003 } else if (res == 0 && strbuf.len > 0 && 5004 (so->so_state & SS_ISCONNECTED)) { 5005 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 5006 sti->sti_faddr_len = (socklen_t)strbuf.len; 5007 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 5008 sti->sti_faddr_valid = 1; 5009 5010 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 5011 *namelen = sti->sti_faddr_len; 5012 } 5013 kmem_free(addr, addrlen); 5014 #ifdef DEBUG 5015 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 5016 pr_addr(so->so_family, sti->sti_faddr_sa, 5017 (t_uscalar_t)sti->sti_faddr_len))); 5018 #endif /* DEBUG */ 5019 done: 5020 so_unlock_single(so, SOLOCKED); 5021 mutex_exit(&so->so_lock); 5022 return (error); 5023 } 5024 5025 /* 5026 * Update sti_laddr by asking the transport (unless AF_UNIX). 5027 */ 5028 int 5029 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5030 struct cred *cr) 5031 { 5032 struct strbuf strbuf; 5033 int error = 0, res; 5034 void *addr; 5035 t_uscalar_t addrlen; 5036 k_sigset_t smask; 5037 sotpi_info_t *sti = SOTOTPI(so); 5038 5039 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5040 (void *)so, pr_state(so->so_state, so->so_mode))); 5041 5042 ASSERT(*namelen > 0); 5043 mutex_enter(&so->so_lock); 5044 so_lock_single(so); /* Set SOLOCKED */ 5045 5046 #ifdef DEBUG 5047 5048 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5049 pr_addr(so->so_family, sti->sti_laddr_sa, 5050 (t_uscalar_t)sti->sti_laddr_len))); 5051 #endif /* DEBUG */ 5052 if (sti->sti_laddr_valid) { 5053 bcopy(sti->sti_laddr_sa, name, 5054 MIN(*namelen, sti->sti_laddr_len)); 5055 *namelen = sti->sti_laddr_len; 5056 goto done; 5057 } 5058 5059 if (so->so_family == AF_UNIX) { 5060 /* Transport has different name space - return local info */ 5061 error = 0; 5062 *namelen = 0; 5063 goto done; 5064 } 5065 if (!(so->so_state & SS_ISBOUND)) { 5066 /* If not bound, then nothing to return. */ 5067 error = 0; 5068 goto done; 5069 } 5070 5071 /* Allocate local buffer to use with ioctl */ 5072 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5073 mutex_exit(&so->so_lock); 5074 addr = kmem_alloc(addrlen, KM_SLEEP); 5075 5076 /* 5077 * Issue TI_GETMYNAME with signals masked. 5078 * Put the result in sti_laddr_sa so that getsockname works after 5079 * a shutdown(output). 5080 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5081 * back to the socket. 5082 */ 5083 strbuf.buf = addr; 5084 strbuf.maxlen = addrlen; 5085 strbuf.len = 0; 5086 5087 sigintr(&smask, 0); 5088 res = 0; 5089 ASSERT(cr); 5090 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5091 0, K_TO_K, cr, &res); 5092 sigunintr(&smask); 5093 5094 mutex_enter(&so->so_lock); 5095 /* 5096 * If there is an error record the error in so_error put don't fail 5097 * the getsockname. Instead fallback on the recorded 5098 * sti->sti_laddr_sa. 5099 */ 5100 if (error) { 5101 /* 5102 * Various stream head errors can be returned to the ioctl. 5103 * However, it is impossible to determine which ones of 5104 * these are really socket level errors that were incorrectly 5105 * consumed by the ioctl. Thus this code silently ignores the 5106 * error - to code explicitly does not reinstate the error 5107 * using soseterror(). 5108 * Experiments have shows that at least this set of 5109 * errors are reported and should not be reinstated on the 5110 * socket: 5111 * EINVAL E.g. if an I_LINK was in effect when 5112 * getsockname was called. 5113 * EPIPE The ioctl error semantics prefer the write 5114 * side error over the read side error. 5115 */ 5116 error = 0; 5117 } else if (res == 0 && strbuf.len > 0 && 5118 (so->so_state & SS_ISBOUND)) { 5119 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5120 sti->sti_laddr_len = (socklen_t)strbuf.len; 5121 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5122 sti->sti_laddr_valid = 1; 5123 5124 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5125 *namelen = sti->sti_laddr_len; 5126 } 5127 kmem_free(addr, addrlen); 5128 #ifdef DEBUG 5129 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5130 pr_addr(so->so_family, sti->sti_laddr_sa, 5131 (t_uscalar_t)sti->sti_laddr_len))); 5132 #endif /* DEBUG */ 5133 done: 5134 so_unlock_single(so, SOLOCKED); 5135 mutex_exit(&so->so_lock); 5136 return (error); 5137 } 5138 5139 /* 5140 * Get socket options. For SOL_SOCKET options some options are handled 5141 * by the sockfs while others use the value recorded in the sonode as a 5142 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5143 * 5144 * On the return most *optlenp bytes are copied to optval. 5145 */ 5146 /* ARGSUSED */ 5147 int 5148 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5149 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5150 { 5151 struct T_optmgmt_req optmgmt_req; 5152 struct T_optmgmt_ack *optmgmt_ack; 5153 struct opthdr oh; 5154 struct opthdr *opt_res; 5155 mblk_t *mp = NULL; 5156 int error = 0; 5157 void *option = NULL; /* Set if fallback value */ 5158 t_uscalar_t maxlen = *optlenp; 5159 t_uscalar_t len; 5160 uint32_t value; 5161 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5162 struct timeval32 tmo_val32; 5163 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5164 5165 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5166 (void *)so, level, option_name, optval, (void *)optlenp, 5167 pr_state(so->so_state, so->so_mode))); 5168 5169 mutex_enter(&so->so_lock); 5170 so_lock_single(so); /* Set SOLOCKED */ 5171 5172 /* 5173 * Check for SOL_SOCKET options. 5174 * Certain SOL_SOCKET options are returned directly whereas 5175 * others only provide a default (fallback) value should 5176 * the T_SVR4_OPTMGMT_REQ fail. 5177 */ 5178 if (level == SOL_SOCKET) { 5179 /* Check parameters */ 5180 switch (option_name) { 5181 case SO_TYPE: 5182 case SO_ERROR: 5183 case SO_DEBUG: 5184 case SO_ACCEPTCONN: 5185 case SO_REUSEADDR: 5186 case SO_KEEPALIVE: 5187 case SO_DONTROUTE: 5188 case SO_BROADCAST: 5189 case SO_USELOOPBACK: 5190 case SO_OOBINLINE: 5191 case SO_SNDBUF: 5192 case SO_RCVBUF: 5193 #ifdef notyet 5194 case SO_SNDLOWAT: 5195 case SO_RCVLOWAT: 5196 #endif /* notyet */ 5197 case SO_DOMAIN: 5198 case SO_DGRAM_ERRIND: 5199 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5200 error = EINVAL; 5201 eprintsoline(so, error); 5202 goto done2; 5203 } 5204 break; 5205 case SO_RCVTIMEO: 5206 case SO_SNDTIMEO: 5207 if (get_udatamodel() == DATAMODEL_NONE || 5208 get_udatamodel() == DATAMODEL_NATIVE) { 5209 if (maxlen < sizeof (struct timeval)) { 5210 error = EINVAL; 5211 eprintsoline(so, error); 5212 goto done2; 5213 } 5214 } else { 5215 if (maxlen < sizeof (struct timeval32)) { 5216 error = EINVAL; 5217 eprintsoline(so, error); 5218 goto done2; 5219 } 5220 5221 } 5222 break; 5223 case SO_LINGER: 5224 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5225 error = EINVAL; 5226 eprintsoline(so, error); 5227 goto done2; 5228 } 5229 break; 5230 case SO_SND_BUFINFO: 5231 if (maxlen < (t_uscalar_t) 5232 sizeof (struct so_snd_bufinfo)) { 5233 error = EINVAL; 5234 eprintsoline(so, error); 5235 goto done2; 5236 } 5237 break; 5238 } 5239 5240 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5241 5242 switch (option_name) { 5243 case SO_TYPE: 5244 value = so->so_type; 5245 option = &value; 5246 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5247 5248 case SO_ERROR: 5249 value = sogeterr(so, B_TRUE); 5250 option = &value; 5251 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5252 5253 case SO_ACCEPTCONN: 5254 if (so->so_state & SS_ACCEPTCONN) 5255 value = SO_ACCEPTCONN; 5256 else 5257 value = 0; 5258 #ifdef DEBUG 5259 if (value) { 5260 dprintso(so, 1, 5261 ("sotpi_getsockopt: 0x%x is set\n", 5262 option_name)); 5263 } else { 5264 dprintso(so, 1, 5265 ("sotpi_getsockopt: 0x%x not set\n", 5266 option_name)); 5267 } 5268 #endif /* DEBUG */ 5269 option = &value; 5270 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5271 5272 case SO_DEBUG: 5273 case SO_REUSEADDR: 5274 case SO_KEEPALIVE: 5275 case SO_DONTROUTE: 5276 case SO_BROADCAST: 5277 case SO_USELOOPBACK: 5278 case SO_OOBINLINE: 5279 case SO_DGRAM_ERRIND: 5280 value = (so->so_options & option_name); 5281 #ifdef DEBUG 5282 if (value) { 5283 dprintso(so, 1, 5284 ("sotpi_getsockopt: 0x%x is set\n", 5285 option_name)); 5286 } else { 5287 dprintso(so, 1, 5288 ("sotpi_getsockopt: 0x%x not set\n", 5289 option_name)); 5290 } 5291 #endif /* DEBUG */ 5292 option = &value; 5293 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5294 5295 /* 5296 * The following options are only returned by sockfs when the 5297 * T_SVR4_OPTMGMT_REQ fails. 5298 */ 5299 case SO_LINGER: 5300 option = &so->so_linger; 5301 len = (t_uscalar_t)sizeof (struct linger); 5302 break; 5303 case SO_SNDBUF: { 5304 ssize_t lvalue; 5305 5306 /* 5307 * If the option has not been set then get a default 5308 * value from the read queue. This value is 5309 * returned if the transport fails 5310 * the T_SVR4_OPTMGMT_REQ. 5311 */ 5312 lvalue = so->so_sndbuf; 5313 if (lvalue == 0) { 5314 mutex_exit(&so->so_lock); 5315 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5316 QHIWAT, 0, &lvalue); 5317 mutex_enter(&so->so_lock); 5318 dprintso(so, 1, 5319 ("got SO_SNDBUF %ld from q\n", lvalue)); 5320 } 5321 value = (int)lvalue; 5322 option = &value; 5323 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5324 break; 5325 } 5326 case SO_RCVBUF: { 5327 ssize_t lvalue; 5328 5329 /* 5330 * If the option has not been set then get a default 5331 * value from the read queue. This value is 5332 * returned if the transport fails 5333 * the T_SVR4_OPTMGMT_REQ. 5334 * 5335 * XXX If SO_RCVBUF has been set and this is an 5336 * XPG 4.2 application then do not ask the transport 5337 * since the transport might adjust the value and not 5338 * return exactly what was set by the application. 5339 * For non-XPG 4.2 application we return the value 5340 * that the transport is actually using. 5341 */ 5342 lvalue = so->so_rcvbuf; 5343 if (lvalue == 0) { 5344 mutex_exit(&so->so_lock); 5345 (void) strqget(RD(strvp2wq(SOTOV(so))), 5346 QHIWAT, 0, &lvalue); 5347 mutex_enter(&so->so_lock); 5348 dprintso(so, 1, 5349 ("got SO_RCVBUF %ld from q\n", lvalue)); 5350 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5351 value = (int)lvalue; 5352 option = &value; 5353 goto copyout; /* skip asking transport */ 5354 } 5355 value = (int)lvalue; 5356 option = &value; 5357 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5358 break; 5359 } 5360 case SO_DOMAIN: 5361 value = so->so_family; 5362 option = &value; 5363 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5364 5365 #ifdef notyet 5366 /* 5367 * We do not implement the semantics of these options 5368 * thus we shouldn't implement the options either. 5369 */ 5370 case SO_SNDLOWAT: 5371 value = so->so_sndlowat; 5372 option = &value; 5373 break; 5374 case SO_RCVLOWAT: 5375 value = so->so_rcvlowat; 5376 option = &value; 5377 break; 5378 #endif /* notyet */ 5379 case SO_SNDTIMEO: 5380 case SO_RCVTIMEO: { 5381 clock_t val; 5382 5383 if (option_name == SO_RCVTIMEO) 5384 val = drv_hztousec(so->so_rcvtimeo); 5385 else 5386 val = drv_hztousec(so->so_sndtimeo); 5387 tmo_val.tv_sec = val / (1000 * 1000); 5388 tmo_val.tv_usec = val % (1000 * 1000); 5389 if (get_udatamodel() == DATAMODEL_NONE || 5390 get_udatamodel() == DATAMODEL_NATIVE) { 5391 option = &tmo_val; 5392 len = sizeof (struct timeval); 5393 } else { 5394 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5395 option = &tmo_val32; 5396 len = sizeof (struct timeval32); 5397 } 5398 break; 5399 } 5400 case SO_SND_BUFINFO: { 5401 snd_bufinfo.sbi_wroff = 5402 (so->so_proto_props).sopp_wroff; 5403 snd_bufinfo.sbi_maxblk = 5404 (so->so_proto_props).sopp_maxblk; 5405 snd_bufinfo.sbi_maxpsz = 5406 (so->so_proto_props).sopp_maxpsz; 5407 snd_bufinfo.sbi_tail = 5408 (so->so_proto_props).sopp_tail; 5409 option = &snd_bufinfo; 5410 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5411 break; 5412 } 5413 } 5414 } 5415 5416 mutex_exit(&so->so_lock); 5417 5418 /* Send request */ 5419 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5420 optmgmt_req.MGMT_flags = T_CHECK; 5421 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5422 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5423 5424 oh.level = level; 5425 oh.name = option_name; 5426 oh.len = maxlen; 5427 5428 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5429 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 5430 /* Let option management work in the presence of data flow control */ 5431 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5432 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5433 mp = NULL; 5434 mutex_enter(&so->so_lock); 5435 if (error) { 5436 eprintsoline(so, error); 5437 goto done2; 5438 } 5439 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5440 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5441 if (error) { 5442 if (option != NULL) { 5443 /* We have a fallback value */ 5444 error = 0; 5445 goto copyout; 5446 } 5447 eprintsoline(so, error); 5448 goto done2; 5449 } 5450 ASSERT(mp); 5451 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5452 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5453 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5454 if (opt_res == NULL) { 5455 if (option != NULL) { 5456 /* We have a fallback value */ 5457 error = 0; 5458 goto copyout; 5459 } 5460 error = EPROTO; 5461 eprintsoline(so, error); 5462 goto done; 5463 } 5464 option = &opt_res[1]; 5465 5466 /* check to ensure that the option is within bounds */ 5467 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5468 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5469 if (option != NULL) { 5470 /* We have a fallback value */ 5471 error = 0; 5472 goto copyout; 5473 } 5474 error = EPROTO; 5475 eprintsoline(so, error); 5476 goto done; 5477 } 5478 5479 len = opt_res->len; 5480 5481 copyout: { 5482 t_uscalar_t size = MIN(len, maxlen); 5483 bcopy(option, optval, size); 5484 bcopy(&size, optlenp, sizeof (size)); 5485 } 5486 done: 5487 freemsg(mp); 5488 done2: 5489 so_unlock_single(so, SOLOCKED); 5490 mutex_exit(&so->so_lock); 5491 5492 return (error); 5493 } 5494 5495 /* 5496 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5497 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5498 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5499 * setsockopt has to work even if the transport does not support the option. 5500 */ 5501 /* ARGSUSED */ 5502 int 5503 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5504 const void *optval, t_uscalar_t optlen, struct cred *cr) 5505 { 5506 struct T_optmgmt_req optmgmt_req; 5507 struct opthdr oh; 5508 mblk_t *mp; 5509 int error = 0; 5510 boolean_t handled = B_FALSE; 5511 5512 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5513 (void *)so, level, option_name, optval, optlen, 5514 pr_state(so->so_state, so->so_mode))); 5515 5516 /* X/Open requires this check */ 5517 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5518 if (xnet_check_print) 5519 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5520 return (EINVAL); 5521 } 5522 5523 mutex_enter(&so->so_lock); 5524 so_lock_single(so); /* Set SOLOCKED */ 5525 mutex_exit(&so->so_lock); 5526 5527 /* 5528 * For SOCKET or TCP level options, try to set it here itself 5529 * provided socket has not been popped and we know the tcp 5530 * structure (stored in so_priv). 5531 */ 5532 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5533 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5534 (so->so_version == SOV_SOCKSTREAM) && 5535 (so->so_proto_handle != NULL)) { 5536 tcp_t *tcp = (tcp_t *)so->so_proto_handle; 5537 boolean_t onoff; 5538 5539 #define intvalue (*(int32_t *)optval) 5540 5541 switch (level) { 5542 case SOL_SOCKET: 5543 switch (option_name) { /* Check length param */ 5544 case SO_DEBUG: 5545 case SO_REUSEADDR: 5546 case SO_DONTROUTE: 5547 case SO_BROADCAST: 5548 case SO_USELOOPBACK: 5549 case SO_OOBINLINE: 5550 case SO_DGRAM_ERRIND: 5551 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5552 error = EINVAL; 5553 eprintsoline(so, error); 5554 mutex_enter(&so->so_lock); 5555 goto done2; 5556 } 5557 ASSERT(optval); 5558 onoff = intvalue != 0; 5559 handled = B_TRUE; 5560 break; 5561 case SO_SNDTIMEO: 5562 case SO_RCVTIMEO: 5563 if (get_udatamodel() == DATAMODEL_NONE || 5564 get_udatamodel() == DATAMODEL_NATIVE) { 5565 if (optlen != 5566 sizeof (struct timeval)) { 5567 error = EINVAL; 5568 eprintsoline(so, error); 5569 mutex_enter(&so->so_lock); 5570 goto done2; 5571 } 5572 } else { 5573 if (optlen != 5574 sizeof (struct timeval32)) { 5575 error = EINVAL; 5576 eprintsoline(so, error); 5577 mutex_enter(&so->so_lock); 5578 goto done2; 5579 } 5580 } 5581 ASSERT(optval); 5582 handled = B_TRUE; 5583 break; 5584 case SO_LINGER: 5585 if (optlen != 5586 (t_uscalar_t)sizeof (struct linger)) { 5587 error = EINVAL; 5588 eprintsoline(so, error); 5589 mutex_enter(&so->so_lock); 5590 goto done2; 5591 } 5592 ASSERT(optval); 5593 handled = B_TRUE; 5594 break; 5595 } 5596 5597 switch (option_name) { /* Do actions */ 5598 case SO_LINGER: { 5599 struct linger *lgr = (struct linger *)optval; 5600 5601 if (lgr->l_onoff) { 5602 tcp->tcp_linger = 1; 5603 tcp->tcp_lingertime = lgr->l_linger; 5604 so->so_linger.l_onoff = SO_LINGER; 5605 so->so_options |= SO_LINGER; 5606 } else { 5607 tcp->tcp_linger = 0; 5608 tcp->tcp_lingertime = 0; 5609 so->so_linger.l_onoff = 0; 5610 so->so_options &= ~SO_LINGER; 5611 } 5612 so->so_linger.l_linger = lgr->l_linger; 5613 handled = B_TRUE; 5614 break; 5615 } 5616 case SO_SNDTIMEO: 5617 case SO_RCVTIMEO: { 5618 struct timeval tl; 5619 clock_t val; 5620 5621 if (get_udatamodel() == DATAMODEL_NONE || 5622 get_udatamodel() == DATAMODEL_NATIVE) 5623 bcopy(&tl, (struct timeval *)optval, 5624 sizeof (struct timeval)); 5625 else 5626 TIMEVAL32_TO_TIMEVAL(&tl, 5627 (struct timeval32 *)optval); 5628 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5629 if (option_name == SO_RCVTIMEO) 5630 so->so_rcvtimeo = drv_usectohz(val); 5631 else 5632 so->so_sndtimeo = drv_usectohz(val); 5633 break; 5634 } 5635 5636 case SO_DEBUG: 5637 tcp->tcp_debug = onoff; 5638 #ifdef SOCK_TEST 5639 if (intvalue & 2) 5640 sock_test_timelimit = 10 * hz; 5641 else 5642 sock_test_timelimit = 0; 5643 5644 if (intvalue & 4) 5645 do_useracc = 0; 5646 else 5647 do_useracc = 1; 5648 #endif /* SOCK_TEST */ 5649 break; 5650 case SO_DONTROUTE: 5651 /* 5652 * SO_DONTROUTE, SO_USELOOPBACK and 5653 * SO_BROADCAST are only of interest to IP. 5654 * We track them here only so 5655 * that we can report their current value. 5656 */ 5657 tcp->tcp_dontroute = onoff; 5658 if (onoff) 5659 so->so_options |= option_name; 5660 else 5661 so->so_options &= ~option_name; 5662 break; 5663 case SO_USELOOPBACK: 5664 tcp->tcp_useloopback = onoff; 5665 if (onoff) 5666 so->so_options |= option_name; 5667 else 5668 so->so_options &= ~option_name; 5669 break; 5670 case SO_BROADCAST: 5671 tcp->tcp_broadcast = onoff; 5672 if (onoff) 5673 so->so_options |= option_name; 5674 else 5675 so->so_options &= ~option_name; 5676 break; 5677 case SO_REUSEADDR: 5678 tcp->tcp_reuseaddr = onoff; 5679 if (onoff) 5680 so->so_options |= option_name; 5681 else 5682 so->so_options &= ~option_name; 5683 break; 5684 case SO_OOBINLINE: 5685 tcp->tcp_oobinline = onoff; 5686 if (onoff) 5687 so->so_options |= option_name; 5688 else 5689 so->so_options &= ~option_name; 5690 break; 5691 case SO_DGRAM_ERRIND: 5692 tcp->tcp_dgram_errind = onoff; 5693 if (onoff) 5694 so->so_options |= option_name; 5695 else 5696 so->so_options &= ~option_name; 5697 break; 5698 } 5699 break; 5700 case IPPROTO_TCP: 5701 switch (option_name) { 5702 case TCP_NODELAY: 5703 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5704 error = EINVAL; 5705 eprintsoline(so, error); 5706 mutex_enter(&so->so_lock); 5707 goto done2; 5708 } 5709 ASSERT(optval); 5710 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5711 handled = B_TRUE; 5712 break; 5713 } 5714 break; 5715 default: 5716 handled = B_FALSE; 5717 break; 5718 } 5719 } 5720 5721 if (handled) { 5722 mutex_enter(&so->so_lock); 5723 goto done2; 5724 } 5725 5726 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5727 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5728 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5729 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5730 5731 oh.level = level; 5732 oh.name = option_name; 5733 oh.len = optlen; 5734 5735 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5736 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5737 /* Let option management work in the presence of data flow control */ 5738 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5739 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5740 mp = NULL; 5741 mutex_enter(&so->so_lock); 5742 if (error) { 5743 eprintsoline(so, error); 5744 goto done2; 5745 } 5746 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5747 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5748 if (error) { 5749 eprintsoline(so, error); 5750 goto done; 5751 } 5752 ASSERT(mp); 5753 /* No need to verify T_optmgmt_ack */ 5754 freemsg(mp); 5755 done: 5756 /* 5757 * Check for SOL_SOCKET options and record their values. 5758 * If we know about a SOL_SOCKET parameter and the transport 5759 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5760 * EPROTO) we let the setsockopt succeed. 5761 */ 5762 if (level == SOL_SOCKET) { 5763 /* Check parameters */ 5764 switch (option_name) { 5765 case SO_DEBUG: 5766 case SO_REUSEADDR: 5767 case SO_KEEPALIVE: 5768 case SO_DONTROUTE: 5769 case SO_BROADCAST: 5770 case SO_USELOOPBACK: 5771 case SO_OOBINLINE: 5772 case SO_SNDBUF: 5773 case SO_RCVBUF: 5774 #ifdef notyet 5775 case SO_SNDLOWAT: 5776 case SO_RCVLOWAT: 5777 #endif /* notyet */ 5778 case SO_DGRAM_ERRIND: 5779 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5780 error = EINVAL; 5781 eprintsoline(so, error); 5782 goto done2; 5783 } 5784 ASSERT(optval); 5785 handled = B_TRUE; 5786 break; 5787 case SO_SNDTIMEO: 5788 case SO_RCVTIMEO: 5789 if (get_udatamodel() == DATAMODEL_NONE || 5790 get_udatamodel() == DATAMODEL_NATIVE) { 5791 if (optlen != sizeof (struct timeval)) { 5792 error = EINVAL; 5793 eprintsoline(so, error); 5794 goto done2; 5795 } 5796 } else { 5797 if (optlen != sizeof (struct timeval32)) { 5798 error = EINVAL; 5799 eprintsoline(so, error); 5800 goto done2; 5801 } 5802 } 5803 ASSERT(optval); 5804 handled = B_TRUE; 5805 break; 5806 case SO_LINGER: 5807 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5808 error = EINVAL; 5809 eprintsoline(so, error); 5810 goto done2; 5811 } 5812 ASSERT(optval); 5813 handled = B_TRUE; 5814 break; 5815 } 5816 5817 #define intvalue (*(int32_t *)optval) 5818 5819 switch (option_name) { 5820 case SO_TYPE: 5821 case SO_ERROR: 5822 case SO_ACCEPTCONN: 5823 /* Can't be set */ 5824 error = ENOPROTOOPT; 5825 goto done2; 5826 case SO_LINGER: { 5827 struct linger *l = (struct linger *)optval; 5828 5829 so->so_linger.l_linger = l->l_linger; 5830 if (l->l_onoff) { 5831 so->so_linger.l_onoff = SO_LINGER; 5832 so->so_options |= SO_LINGER; 5833 } else { 5834 so->so_linger.l_onoff = 0; 5835 so->so_options &= ~SO_LINGER; 5836 } 5837 break; 5838 } 5839 5840 case SO_DEBUG: 5841 #ifdef SOCK_TEST 5842 if (intvalue & 2) 5843 sock_test_timelimit = 10 * hz; 5844 else 5845 sock_test_timelimit = 0; 5846 5847 if (intvalue & 4) 5848 do_useracc = 0; 5849 else 5850 do_useracc = 1; 5851 #endif /* SOCK_TEST */ 5852 /* FALLTHRU */ 5853 case SO_REUSEADDR: 5854 case SO_KEEPALIVE: 5855 case SO_DONTROUTE: 5856 case SO_BROADCAST: 5857 case SO_USELOOPBACK: 5858 case SO_OOBINLINE: 5859 case SO_DGRAM_ERRIND: 5860 if (intvalue != 0) { 5861 dprintso(so, 1, 5862 ("socket_setsockopt: setting 0x%x\n", 5863 option_name)); 5864 so->so_options |= option_name; 5865 } else { 5866 dprintso(so, 1, 5867 ("socket_setsockopt: clearing 0x%x\n", 5868 option_name)); 5869 so->so_options &= ~option_name; 5870 } 5871 break; 5872 /* 5873 * The following options are only returned by us when the 5874 * transport layer fails. 5875 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5876 * since the transport might adjust the value and not 5877 * return exactly what was set by the application. 5878 */ 5879 case SO_SNDBUF: 5880 so->so_sndbuf = intvalue; 5881 break; 5882 case SO_RCVBUF: 5883 so->so_rcvbuf = intvalue; 5884 break; 5885 case SO_RCVPSH: 5886 so->so_rcv_timer_interval = intvalue; 5887 break; 5888 #ifdef notyet 5889 /* 5890 * We do not implement the semantics of these options 5891 * thus we shouldn't implement the options either. 5892 */ 5893 case SO_SNDLOWAT: 5894 so->so_sndlowat = intvalue; 5895 break; 5896 case SO_RCVLOWAT: 5897 so->so_rcvlowat = intvalue; 5898 break; 5899 #endif /* notyet */ 5900 case SO_SNDTIMEO: 5901 case SO_RCVTIMEO: { 5902 struct timeval tl; 5903 clock_t val; 5904 5905 if (get_udatamodel() == DATAMODEL_NONE || 5906 get_udatamodel() == DATAMODEL_NATIVE) 5907 bcopy(&tl, (struct timeval *)optval, 5908 sizeof (struct timeval)); 5909 else 5910 TIMEVAL32_TO_TIMEVAL(&tl, 5911 (struct timeval32 *)optval); 5912 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5913 if (option_name == SO_RCVTIMEO) 5914 so->so_rcvtimeo = drv_usectohz(val); 5915 else 5916 so->so_sndtimeo = drv_usectohz(val); 5917 break; 5918 } 5919 } 5920 #undef intvalue 5921 5922 if (error) { 5923 if ((error == ENOPROTOOPT || error == EPROTO || 5924 error == EINVAL) && handled) { 5925 dprintso(so, 1, 5926 ("setsockopt: ignoring error %d for 0x%x\n", 5927 error, option_name)); 5928 error = 0; 5929 } 5930 } 5931 } 5932 done2: 5933 so_unlock_single(so, SOLOCKED); 5934 mutex_exit(&so->so_lock); 5935 return (error); 5936 } 5937 5938 /* 5939 * sotpi_close() is called when the last open reference goes away. 5940 */ 5941 /* ARGSUSED */ 5942 int 5943 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5944 { 5945 struct vnode *vp = SOTOV(so); 5946 dev_t dev; 5947 int error = 0; 5948 sotpi_info_t *sti = SOTOTPI(so); 5949 5950 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5951 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5952 5953 dev = sti->sti_dev; 5954 5955 ASSERT(STREAMSTAB(getmajor(dev))); 5956 5957 mutex_enter(&so->so_lock); 5958 so_lock_single(so); /* Set SOLOCKED */ 5959 5960 ASSERT(so_verify_oobstate(so)); 5961 5962 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5963 sti->sti_nl7c_flags = 0; 5964 nl7c_close(so); 5965 } 5966 5967 if (vp->v_stream != NULL) { 5968 vnode_t *ux_vp; 5969 5970 if (so->so_family == AF_UNIX) { 5971 /* Could avoid this when CANTSENDMORE for !dgram */ 5972 so_unix_close(so); 5973 } 5974 5975 mutex_exit(&so->so_lock); 5976 /* 5977 * Disassemble the linkage from the AF_UNIX underlying file 5978 * system vnode to this socket (by atomically clearing 5979 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5980 * and frees the stream head. 5981 */ 5982 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5983 ASSERT(ux_vp->v_stream); 5984 sti->sti_ux_bound_vp = NULL; 5985 vn_rele_stream(ux_vp); 5986 } 5987 if (so->so_family == AF_INET || so->so_family == AF_INET6) { 5988 strsetrwputdatahooks(SOTOV(so), NULL, NULL); 5989 if (sti->sti_kssl_ent != NULL) { 5990 kssl_release_ent(sti->sti_kssl_ent, so, 5991 sti->sti_kssl_type); 5992 sti->sti_kssl_ent = NULL; 5993 } 5994 if (sti->sti_kssl_ctx != NULL) { 5995 kssl_release_ctx(sti->sti_kssl_ctx); 5996 sti->sti_kssl_ctx = NULL; 5997 } 5998 sti->sti_kssl_type = KSSL_NO_PROXY; 5999 } 6000 error = strclose(vp, flag, cr); 6001 vp->v_stream = NULL; 6002 mutex_enter(&so->so_lock); 6003 } 6004 6005 /* 6006 * Flush the T_DISCON_IND on sti_discon_ind_mp. 6007 */ 6008 so_flush_discon_ind(so); 6009 6010 so_unlock_single(so, SOLOCKED); 6011 mutex_exit(&so->so_lock); 6012 6013 /* 6014 * Needed for STREAMs. 6015 * Decrement the device driver's reference count for streams 6016 * opened via the clone dip. The driver was held in clone_open(). 6017 * The absence of clone_close() forces this asymmetry. 6018 */ 6019 if (so->so_flag & SOCLONE) 6020 ddi_rele_driver(getmajor(dev)); 6021 6022 return (error); 6023 } 6024 6025 static int 6026 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 6027 struct cred *cr, int32_t *rvalp) 6028 { 6029 struct vnode *vp = SOTOV(so); 6030 sotpi_info_t *sti = SOTOTPI(so); 6031 int error = 0; 6032 6033 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 6034 cmd, arg, pr_state(so->so_state, so->so_mode))); 6035 6036 switch (cmd) { 6037 case SIOCSQPTR: 6038 /* 6039 * SIOCSQPTR is valid only when helper stream is created 6040 * by the protocol. 6041 */ 6042 case _I_INSERT: 6043 case _I_REMOVE: 6044 /* 6045 * Since there's no compelling reason to support these ioctls 6046 * on sockets, and doing so would increase the complexity 6047 * markedly, prevent it. 6048 */ 6049 return (EOPNOTSUPP); 6050 6051 case I_FIND: 6052 case I_LIST: 6053 case I_LOOK: 6054 case I_POP: 6055 case I_PUSH: 6056 /* 6057 * To prevent races and inconsistencies between the actual 6058 * state of the stream and the state according to the sonode, 6059 * we serialize all operations which modify or operate on the 6060 * list of modules on the socket's stream. 6061 */ 6062 mutex_enter(&sti->sti_plumb_lock); 6063 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 6064 mutex_exit(&sti->sti_plumb_lock); 6065 return (error); 6066 6067 default: 6068 if (so->so_version != SOV_STREAM) 6069 break; 6070 6071 /* 6072 * The imaginary "sockmod" has been popped; act as a stream. 6073 */ 6074 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6075 } 6076 6077 ASSERT(so->so_version != SOV_STREAM); 6078 6079 /* 6080 * Process socket-specific ioctls. 6081 */ 6082 switch (cmd) { 6083 case FIONBIO: { 6084 int32_t value; 6085 6086 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6087 (mode & (int)FKIOCTL))) 6088 return (EFAULT); 6089 6090 mutex_enter(&so->so_lock); 6091 if (value) { 6092 so->so_state |= SS_NDELAY; 6093 } else { 6094 so->so_state &= ~SS_NDELAY; 6095 } 6096 mutex_exit(&so->so_lock); 6097 return (0); 6098 } 6099 6100 case FIOASYNC: { 6101 int32_t value; 6102 6103 if (so_copyin((void *)arg, &value, sizeof (int32_t), 6104 (mode & (int)FKIOCTL))) 6105 return (EFAULT); 6106 6107 mutex_enter(&so->so_lock); 6108 /* 6109 * SS_ASYNC flag not already set correctly? 6110 * (!value != !(so->so_state & SS_ASYNC)) 6111 * but some engineers find that too hard to read. 6112 */ 6113 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 6114 value != 0 && (so->so_state & SS_ASYNC) == 0) 6115 error = so_flip_async(so, vp, mode, cr); 6116 mutex_exit(&so->so_lock); 6117 return (error); 6118 } 6119 6120 case SIOCSPGRP: 6121 case FIOSETOWN: { 6122 pid_t pgrp; 6123 6124 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 6125 (mode & (int)FKIOCTL))) 6126 return (EFAULT); 6127 6128 mutex_enter(&so->so_lock); 6129 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 6130 /* Any change? */ 6131 if (pgrp != so->so_pgrp) 6132 error = so_set_siggrp(so, vp, pgrp, mode, cr); 6133 mutex_exit(&so->so_lock); 6134 return (error); 6135 } 6136 case SIOCGPGRP: 6137 case FIOGETOWN: 6138 if (so_copyout(&so->so_pgrp, (void *)arg, 6139 sizeof (pid_t), (mode & (int)FKIOCTL))) 6140 return (EFAULT); 6141 return (0); 6142 6143 case SIOCATMARK: { 6144 int retval; 6145 uint_t so_state; 6146 6147 /* 6148 * strwaitmark has a finite timeout after which it 6149 * returns -1 if the mark state is undetermined. 6150 * In order to avoid any race between the mark state 6151 * in sockfs and the mark state in the stream head this 6152 * routine loops until the mark state can be determined 6153 * (or the urgent data indication has been removed by some 6154 * other thread). 6155 */ 6156 do { 6157 mutex_enter(&so->so_lock); 6158 so_state = so->so_state; 6159 mutex_exit(&so->so_lock); 6160 if (so_state & SS_RCVATMARK) { 6161 retval = 1; 6162 } else if (!(so_state & SS_OOBPEND)) { 6163 /* 6164 * No SIGURG has been generated -- there is no 6165 * pending or present urgent data. Thus can't 6166 * possibly be at the mark. 6167 */ 6168 retval = 0; 6169 } else { 6170 /* 6171 * Have the stream head wait until there is 6172 * either some messages on the read queue, or 6173 * STRATMARK or STRNOTATMARK gets set. The 6174 * STRNOTATMARK flag is used so that the 6175 * transport can send up a MSGNOTMARKNEXT 6176 * M_DATA to indicate that it is not 6177 * at the mark and additional data is not about 6178 * to be send upstream. 6179 * 6180 * If the mark state is undetermined this will 6181 * return -1 and we will loop rechecking the 6182 * socket state. 6183 */ 6184 retval = strwaitmark(vp); 6185 } 6186 } while (retval == -1); 6187 6188 if (so_copyout(&retval, (void *)arg, sizeof (int), 6189 (mode & (int)FKIOCTL))) 6190 return (EFAULT); 6191 return (0); 6192 } 6193 6194 case I_FDINSERT: 6195 case I_SENDFD: 6196 case I_RECVFD: 6197 case I_ATMARK: 6198 case _SIOCSOCKFALLBACK: 6199 /* 6200 * These ioctls do not apply to sockets. I_FDINSERT can be 6201 * used to send M_PROTO messages without modifying the socket 6202 * state. I_SENDFD/RECVFD should not be used for socket file 6203 * descriptor passing since they assume a twisted stream. 6204 * SIOCATMARK must be used instead of I_ATMARK. 6205 * 6206 * _SIOCSOCKFALLBACK from an application should never be 6207 * processed. It is only generated by socktpi_open() or 6208 * in response to I_POP or I_PUSH. 6209 */ 6210 #ifdef DEBUG 6211 zcmn_err(getzoneid(), CE_WARN, 6212 "Unsupported STREAMS ioctl 0x%x on socket. " 6213 "Pid = %d\n", cmd, curproc->p_pid); 6214 #endif /* DEBUG */ 6215 return (EOPNOTSUPP); 6216 6217 case _I_GETPEERCRED: 6218 if ((mode & FKIOCTL) == 0) 6219 return (EINVAL); 6220 6221 mutex_enter(&so->so_lock); 6222 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6223 error = ENOTSUP; 6224 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6225 error = ENOTCONN; 6226 } else if (so->so_peercred != NULL) { 6227 k_peercred_t *kp = (k_peercred_t *)arg; 6228 kp->pc_cr = so->so_peercred; 6229 kp->pc_cpid = so->so_cpid; 6230 crhold(so->so_peercred); 6231 } else { 6232 error = EINVAL; 6233 } 6234 mutex_exit(&so->so_lock); 6235 return (error); 6236 6237 default: 6238 /* 6239 * Do the higher-order bits of the ioctl cmd indicate 6240 * that it is an I_* streams ioctl? 6241 */ 6242 if ((cmd & 0xffffff00U) == STR && 6243 so->so_version == SOV_SOCKBSD) { 6244 #ifdef DEBUG 6245 zcmn_err(getzoneid(), CE_WARN, 6246 "Unsupported STREAMS ioctl 0x%x on socket. " 6247 "Pid = %d\n", cmd, curproc->p_pid); 6248 #endif /* DEBUG */ 6249 return (EOPNOTSUPP); 6250 } 6251 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6252 } 6253 } 6254 6255 /* 6256 * Handle plumbing-related ioctls. 6257 */ 6258 static int 6259 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6260 struct cred *cr, int32_t *rvalp) 6261 { 6262 static const char sockmod_name[] = "sockmod"; 6263 struct sonode *so = VTOSO(vp); 6264 char mname[FMNAMESZ + 1]; 6265 int error; 6266 sotpi_info_t *sti = SOTOTPI(so); 6267 6268 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6269 6270 if (so->so_version == SOV_SOCKBSD) 6271 return (EOPNOTSUPP); 6272 6273 if (so->so_version == SOV_STREAM) { 6274 /* 6275 * The imaginary "sockmod" has been popped - act as a stream. 6276 * If this is a push of sockmod then change back to a socket. 6277 */ 6278 if (cmd == I_PUSH) { 6279 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6280 (void *)arg, mname, sizeof (mname), NULL); 6281 6282 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6283 dprintso(so, 0, ("socktpi_ioctl: going to " 6284 "socket version\n")); 6285 so_stream2sock(so); 6286 return (0); 6287 } 6288 } 6289 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6290 } 6291 6292 switch (cmd) { 6293 case I_PUSH: 6294 if (sti->sti_direct) { 6295 mutex_enter(&so->so_lock); 6296 so_lock_single(so); 6297 mutex_exit(&so->so_lock); 6298 6299 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6300 CRED(), rvalp); 6301 6302 mutex_enter(&so->so_lock); 6303 if (error == 0) 6304 sti->sti_direct = 0; 6305 so_unlock_single(so, SOLOCKED); 6306 mutex_exit(&so->so_lock); 6307 6308 if (error != 0) 6309 return (error); 6310 } 6311 6312 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6313 if (error == 0) 6314 sti->sti_pushcnt++; 6315 return (error); 6316 6317 case I_POP: 6318 if (sti->sti_pushcnt == 0) { 6319 /* Emulate sockmod being popped */ 6320 dprintso(so, 0, 6321 ("socktpi_ioctl: going to STREAMS version\n")); 6322 return (so_sock2stream(so)); 6323 } 6324 6325 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6326 if (error == 0) 6327 sti->sti_pushcnt--; 6328 return (error); 6329 6330 case I_LIST: { 6331 struct str_mlist *kmlistp, *umlistp; 6332 struct str_list kstrlist; 6333 ssize_t kstrlistsize; 6334 int i, nmods; 6335 6336 STRUCT_DECL(str_list, ustrlist); 6337 STRUCT_INIT(ustrlist, mode); 6338 6339 if (arg == NULL) { 6340 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6341 if (error == 0) 6342 (*rvalp)++; /* Add one for sockmod */ 6343 return (error); 6344 } 6345 6346 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6347 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6348 if (error != 0) 6349 return (error); 6350 6351 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6352 if (nmods <= 0) 6353 return (EINVAL); 6354 /* 6355 * Ceiling nmods at nstrpush to prevent someone from 6356 * maliciously consuming lots of kernel memory. 6357 */ 6358 nmods = MIN(nmods, nstrpush); 6359 6360 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6361 kstrlist.sl_nmods = nmods; 6362 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6363 6364 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6365 cr, rvalp); 6366 if (error != 0) 6367 goto done; 6368 6369 /* 6370 * Considering the module list as a 0-based array of sl_nmods 6371 * modules, sockmod should conceptually exist at slot 6372 * sti_pushcnt. Insert sockmod at this location by sliding all 6373 * of the module names after so_pushcnt over by one. We know 6374 * that there will be room to do this since we allocated 6375 * sl_modlist with an additional slot. 6376 */ 6377 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6378 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6379 6380 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6381 kstrlist.sl_nmods++; 6382 6383 /* 6384 * Copy all of the entries out to ustrlist. 6385 */ 6386 kmlistp = kstrlist.sl_modlist; 6387 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6388 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6389 error = so_copyout(kmlistp++, umlistp++, 6390 sizeof (struct str_mlist), mode & FKIOCTL); 6391 if (error != 0) 6392 goto done; 6393 } 6394 6395 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6396 mode & FKIOCTL); 6397 if (error == 0) 6398 *rvalp = 0; 6399 done: 6400 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6401 return (error); 6402 } 6403 case I_LOOK: 6404 if (sti->sti_pushcnt == 0) { 6405 return (so_copyout(sockmod_name, (void *)arg, 6406 sizeof (sockmod_name), mode & FKIOCTL)); 6407 } 6408 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6409 6410 case I_FIND: 6411 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6412 if (error && error != EINVAL) 6413 return (error); 6414 6415 /* if not found and string was sockmod return 1 */ 6416 if (*rvalp == 0 || error == EINVAL) { 6417 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6418 (void *)arg, mname, sizeof (mname), NULL); 6419 if (error == ENAMETOOLONG) 6420 error = EINVAL; 6421 6422 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6423 *rvalp = 1; 6424 } 6425 return (error); 6426 6427 default: 6428 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6429 break; 6430 } 6431 6432 return (0); 6433 } 6434 6435 /* 6436 * Wrapper around the streams poll routine that implements socket poll 6437 * semantics. 6438 * The sockfs never calls pollwakeup itself - the stream head take care 6439 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6440 * stream head there can never be a deadlock due to holding so_lock across 6441 * pollwakeup and acquiring so_lock in this routine. 6442 * 6443 * However, since the performance of VOP_POLL is critical we avoid 6444 * acquiring so_lock here. This is based on two assumptions: 6445 * - The poll implementation holds locks to serialize the VOP_POLL call 6446 * and a pollwakeup for the same pollhead. This ensures that should 6447 * e.g. so_state change during a socktpi_poll call the pollwakeup 6448 * (which strsock_* and strrput conspire to issue) is issued after 6449 * the state change. Thus the pollwakeup will block until VOP_POLL has 6450 * returned and then wake up poll and have it call VOP_POLL again. 6451 * - The reading of so_state without holding so_lock does not result in 6452 * stale data that is older than the latest state change that has dropped 6453 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6454 * memory barrier to force the data into the coherency domain. 6455 */ 6456 static int 6457 sotpi_poll( 6458 struct sonode *so, 6459 short events, 6460 int anyyet, 6461 short *reventsp, 6462 struct pollhead **phpp) 6463 { 6464 short origevents = events; 6465 struct vnode *vp = SOTOV(so); 6466 int error; 6467 int so_state = so->so_state; /* snapshot */ 6468 sotpi_info_t *sti = SOTOTPI(so); 6469 6470 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6471 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6472 6473 ASSERT(vp->v_type == VSOCK); 6474 ASSERT(vp->v_stream != NULL); 6475 6476 if (so->so_version == SOV_STREAM) { 6477 /* The imaginary "sockmod" has been popped - act as a stream */ 6478 return (strpoll(vp->v_stream, events, anyyet, 6479 reventsp, phpp)); 6480 } 6481 6482 if (!(so_state & SS_ISCONNECTED) && 6483 (so->so_mode & SM_CONNREQUIRED)) { 6484 /* Not connected yet - turn off write side events */ 6485 events &= ~(POLLOUT|POLLWRBAND); 6486 } 6487 /* 6488 * Check for errors without calling strpoll if the caller wants them. 6489 * In sockets the errors are represented as input/output events 6490 * and there is no need to ask the stream head for this information. 6491 */ 6492 if (so->so_error != 0 && 6493 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6494 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6495 return (0); 6496 } 6497 /* 6498 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6499 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6500 * will not trigger a POLLIN event with POLLRDDATA set. 6501 * The handling of urgent data (causing POLLRDBAND) is done by 6502 * inspecting SS_OOBPEND below. 6503 */ 6504 events |= POLLRDDATA; 6505 6506 /* 6507 * After shutdown(output) a stream head write error is set. 6508 * However, we should not return output events. 6509 */ 6510 events |= POLLNOERR; 6511 error = strpoll(vp->v_stream, events, anyyet, 6512 reventsp, phpp); 6513 if (error) 6514 return (error); 6515 6516 ASSERT(!(*reventsp & POLLERR)); 6517 6518 /* 6519 * Notes on T_CONN_IND handling for sockets. 6520 * 6521 * If strpoll() returned without events, SR_POLLIN is guaranteed 6522 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6523 * 6524 * Since the so_lock is not held, soqueueconnind() may have run 6525 * and a T_CONN_IND may be waiting. We now check for any queued 6526 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6527 * to ensure poll returns. 6528 * 6529 * However: 6530 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6531 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6532 * the following actions will occur; taken together they ensure the 6533 * syscall will return. 6534 * 6535 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6536 * the accept() was run on a non-blocking socket sowaitconnind() 6537 * may have already returned EWOULDBLOCK, so not be waiting to 6538 * process the message. Additionally socktpi_poll() has probably 6539 * proceeded past the sti_conn_ind_head check below. 6540 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6541 * this thread, however that could occur before poll_common() 6542 * has entered cv_wait. 6543 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6544 * 6545 * Before proceeding to cv_wait() in poll_common() for an event, 6546 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6547 * and if set, re-calls strpoll() to ensure the late arriving 6548 * T_CONN_IND is recognized, and pollsys() returns. 6549 */ 6550 6551 if (sti->sti_conn_ind_head != NULL) 6552 *reventsp |= (POLLIN|POLLRDNORM) & events; 6553 6554 if (so->so_state & SS_OOBPEND) 6555 *reventsp |= POLLRDBAND & events; 6556 6557 if (sti->sti_nl7c_rcv_mp != NULL) { 6558 *reventsp |= (POLLIN|POLLRDNORM) & events; 6559 } 6560 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6561 ((POLLIN|POLLRDNORM) & *reventsp)) { 6562 sti->sti_nl7c_flags |= NL7C_POLLIN; 6563 } 6564 6565 return (0); 6566 } 6567 6568 /*ARGSUSED*/ 6569 static int 6570 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6571 { 6572 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6573 int error = 0; 6574 6575 error = sonode_constructor(buf, cdrarg, kmflags); 6576 if (error != 0) 6577 return (error); 6578 6579 error = i_sotpi_info_constructor(&st->st_info); 6580 if (error != 0) 6581 sonode_destructor(buf, cdrarg); 6582 6583 st->st_sonode.so_priv = &st->st_info; 6584 6585 return (error); 6586 } 6587 6588 /*ARGSUSED1*/ 6589 static void 6590 socktpi_destructor(void *buf, void *cdrarg) 6591 { 6592 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6593 6594 ASSERT(st->st_sonode.so_priv == &st->st_info); 6595 st->st_sonode.so_priv = NULL; 6596 6597 i_sotpi_info_destructor(&st->st_info); 6598 sonode_destructor(buf, cdrarg); 6599 } 6600 6601 static int 6602 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6603 { 6604 int retval; 6605 6606 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6607 struct sonode *so = (struct sonode *)buf; 6608 sotpi_info_t *sti = SOTOTPI(so); 6609 6610 mutex_enter(&socklist.sl_lock); 6611 6612 sti->sti_next_so = socklist.sl_list; 6613 sti->sti_prev_so = NULL; 6614 if (sti->sti_next_so != NULL) 6615 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6616 socklist.sl_list = so; 6617 6618 mutex_exit(&socklist.sl_lock); 6619 6620 } 6621 return (retval); 6622 } 6623 6624 static void 6625 socktpi_unix_destructor(void *buf, void *cdrarg) 6626 { 6627 struct sonode *so = (struct sonode *)buf; 6628 sotpi_info_t *sti = SOTOTPI(so); 6629 6630 mutex_enter(&socklist.sl_lock); 6631 6632 if (sti->sti_next_so != NULL) 6633 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6634 if (sti->sti_prev_so != NULL) 6635 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6636 else 6637 socklist.sl_list = sti->sti_next_so; 6638 6639 mutex_exit(&socklist.sl_lock); 6640 6641 socktpi_destructor(buf, cdrarg); 6642 } 6643 6644 int 6645 socktpi_init(void) 6646 { 6647 /* 6648 * Create sonode caches. We create a special one for AF_UNIX so 6649 * that we can track them for netstat(1m). 6650 */ 6651 socktpi_cache = kmem_cache_create("socktpi_cache", 6652 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6653 socktpi_destructor, NULL, NULL, NULL, 0); 6654 6655 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6656 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6657 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6658 6659 return (0); 6660 } 6661 6662 /* 6663 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6664 * 6665 * Caller must still update state and mode using sotpi_update_state(). 6666 * 6667 * Returns the STREAM queue that the protocol should use. 6668 */ 6669 queue_t * 6670 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6671 boolean_t *direct, struct cred *cr) 6672 { 6673 sotpi_info_t *sti; 6674 struct sockparams *origsp = so->so_sockparams; 6675 sock_lower_handle_t handle = so->so_proto_handle; 6676 uint_t old_state = so->so_state; 6677 struct stdata *stp; 6678 struct vnode *vp; 6679 queue_t *q; 6680 6681 *direct = B_FALSE; 6682 so->so_sockparams = newsp; 6683 /* 6684 * Allocate and initalize fields required by TPI. 6685 */ 6686 (void) sotpi_info_create(so, KM_SLEEP); 6687 sotpi_info_init(so); 6688 6689 if (sotpi_init(so, NULL, cr, SO_FALLBACK) != 0) { 6690 sotpi_info_fini(so); 6691 sotpi_info_destroy(so); 6692 so->so_state = old_state; 6693 return (NULL); 6694 } 6695 ASSERT(handle == so->so_proto_handle); 6696 sti = SOTOTPI(so); 6697 if (sti->sti_direct != 0) 6698 *direct = B_TRUE; 6699 6700 /* 6701 * Keep the original sp around so we can properly dispose of the 6702 * sonode when the socket is being closed. 6703 */ 6704 sti->sti_orig_sp = origsp; 6705 6706 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6707 so_alloc_addr(so, so->so_max_addr_len); 6708 6709 /* 6710 * If the application has done a SIOCSPGRP, make sure the 6711 * STREAM head is aware. This needs to take place before 6712 * the protocol start sending up messages. Otherwise we 6713 * might miss to generate SIGPOLL. 6714 * 6715 * It is possible that the application will receive duplicate 6716 * signals if some were already generated for either data or 6717 * connection indications. 6718 */ 6719 if (so->so_pgrp != 0) { 6720 mutex_enter(&so->so_lock); 6721 if (so_set_events(so, so->so_vnode, cr) != 0) 6722 so->so_pgrp = 0; 6723 mutex_exit(&so->so_lock); 6724 } 6725 6726 /* 6727 * Determine which queue to use. 6728 */ 6729 vp = SOTOV(so); 6730 stp = vp->v_stream; 6731 ASSERT(stp != NULL); 6732 q = stp->sd_wrq->q_next; 6733 6734 /* 6735 * Skip any modules that may have been auto pushed when the device 6736 * was opened 6737 */ 6738 while (q->q_next != NULL) 6739 q = q->q_next; 6740 q = _RD(q); 6741 6742 return (q); 6743 } 6744 6745 void 6746 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6747 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6748 socklen_t faddrlen, short opts) 6749 { 6750 sotpi_info_t *sti = SOTOTPI(so); 6751 6752 so_proc_tcapability_ack(so, tcap); 6753 6754 so->so_options |= opts; 6755 6756 /* 6757 * Determine whether the foreign and local address are valid 6758 */ 6759 if (laddrlen != 0) { 6760 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6761 sti->sti_laddr_len = laddrlen; 6762 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6763 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6764 } 6765 6766 if (faddrlen != 0) { 6767 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6768 sti->sti_faddr_len = faddrlen; 6769 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6770 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6771 } 6772 6773 } 6774 6775 /* 6776 * Allocate enough space to cache the local and foreign addresses. 6777 */ 6778 void 6779 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6780 { 6781 sotpi_info_t *sti = SOTOTPI(so); 6782 6783 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6784 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6785 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6786 P2ROUNDUP(maxlen, KMEM_ALIGN); 6787 so->so_max_addr_len = sti->sti_laddr_maxlen; 6788 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6789 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6790 + sti->sti_laddr_maxlen); 6791 6792 if (so->so_family == AF_UNIX) { 6793 /* 6794 * Initialize AF_UNIX related fields. 6795 */ 6796 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6797 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6798 } 6799 } 6800 6801 6802 sotpi_info_t * 6803 sotpi_sototpi(struct sonode *so) 6804 { 6805 sotpi_info_t *sti; 6806 6807 if (so == NULL) 6808 return (NULL); 6809 6810 sti = (sotpi_info_t *)so->so_priv; 6811 6812 ASSERT(sti != NULL); 6813 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6814 6815 return (sti); 6816 } 6817 6818 static int 6819 i_sotpi_info_constructor(sotpi_info_t *sti) 6820 { 6821 sti->sti_magic = SOTPI_INFO_MAGIC; 6822 sti->sti_ack_mp = NULL; 6823 sti->sti_discon_ind_mp = NULL; 6824 sti->sti_ux_bound_vp = NULL; 6825 sti->sti_unbind_mp = NULL; 6826 6827 sti->sti_conn_ind_head = NULL; 6828 sti->sti_conn_ind_tail = NULL; 6829 6830 sti->sti_laddr_sa = NULL; 6831 sti->sti_faddr_sa = NULL; 6832 6833 sti->sti_nl7c_flags = 0; 6834 sti->sti_nl7c_uri = NULL; 6835 sti->sti_nl7c_rcv_mp = NULL; 6836 6837 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6838 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6839 6840 return (0); 6841 } 6842 6843 static void 6844 i_sotpi_info_destructor(sotpi_info_t *sti) 6845 { 6846 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6847 ASSERT(sti->sti_ack_mp == NULL); 6848 ASSERT(sti->sti_discon_ind_mp == NULL); 6849 ASSERT(sti->sti_ux_bound_vp == NULL); 6850 ASSERT(sti->sti_unbind_mp == NULL); 6851 6852 ASSERT(sti->sti_conn_ind_head == NULL); 6853 ASSERT(sti->sti_conn_ind_tail == NULL); 6854 6855 ASSERT(sti->sti_laddr_sa == NULL); 6856 ASSERT(sti->sti_faddr_sa == NULL); 6857 6858 ASSERT(sti->sti_nl7c_flags == 0); 6859 ASSERT(sti->sti_nl7c_uri == NULL); 6860 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6861 6862 mutex_destroy(&sti->sti_plumb_lock); 6863 cv_destroy(&sti->sti_ack_cv); 6864 } 6865 6866 /* 6867 * Creates and attaches TPI information to the given sonode 6868 */ 6869 static boolean_t 6870 sotpi_info_create(struct sonode *so, int kmflags) 6871 { 6872 sotpi_info_t *sti; 6873 6874 ASSERT(so->so_priv == NULL); 6875 6876 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6877 return (B_FALSE); 6878 6879 if (i_sotpi_info_constructor(sti) != 0) { 6880 kmem_free(sti, sizeof (*sti)); 6881 return (B_FALSE); 6882 } 6883 6884 so->so_priv = (void *)sti; 6885 return (B_TRUE); 6886 } 6887 6888 /* 6889 * Initializes the TPI information. 6890 */ 6891 static void 6892 sotpi_info_init(struct sonode *so) 6893 { 6894 struct vnode *vp = SOTOV(so); 6895 sotpi_info_t *sti = SOTOTPI(so); 6896 time_t now; 6897 6898 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6899 vp->v_rdev = sti->sti_dev; 6900 6901 sti->sti_orig_sp = NULL; 6902 6903 sti->sti_pushcnt = 0; 6904 6905 now = gethrestime_sec(); 6906 sti->sti_atime = now; 6907 sti->sti_mtime = now; 6908 sti->sti_ctime = now; 6909 6910 sti->sti_eaddr_mp = NULL; 6911 sti->sti_delayed_error = 0; 6912 6913 sti->sti_provinfo = NULL; 6914 6915 sti->sti_oobcnt = 0; 6916 sti->sti_oobsigcnt = 0; 6917 6918 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6919 6920 sti->sti_laddr_sa = 0; 6921 sti->sti_faddr_sa = 0; 6922 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6923 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6924 6925 sti->sti_laddr_valid = 0; 6926 sti->sti_faddr_valid = 0; 6927 sti->sti_faddr_noxlate = 0; 6928 6929 sti->sti_direct = 0; 6930 6931 ASSERT(sti->sti_ack_mp == NULL); 6932 ASSERT(sti->sti_ux_bound_vp == NULL); 6933 ASSERT(sti->sti_unbind_mp == NULL); 6934 6935 ASSERT(sti->sti_conn_ind_head == NULL); 6936 ASSERT(sti->sti_conn_ind_tail == NULL); 6937 6938 /* Initialize the kernel SSL proxy fields */ 6939 sti->sti_kssl_type = KSSL_NO_PROXY; 6940 sti->sti_kssl_ent = NULL; 6941 sti->sti_kssl_ctx = NULL; 6942 } 6943 6944 /* 6945 * Given a sonode, grab the TPI info and free any data. 6946 */ 6947 static void 6948 sotpi_info_fini(struct sonode *so) 6949 { 6950 sotpi_info_t *sti = SOTOTPI(so); 6951 mblk_t *mp; 6952 6953 ASSERT(sti->sti_discon_ind_mp == NULL); 6954 6955 if ((mp = sti->sti_conn_ind_head) != NULL) { 6956 mblk_t *mp1; 6957 6958 while (mp) { 6959 mp1 = mp->b_next; 6960 mp->b_next = NULL; 6961 freemsg(mp); 6962 mp = mp1; 6963 } 6964 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6965 } 6966 6967 /* 6968 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6969 * indirect them. It also uses so_count as a validity test. 6970 */ 6971 mutex_enter(&so->so_lock); 6972 6973 if (sti->sti_laddr_sa) { 6974 ASSERT((caddr_t)sti->sti_faddr_sa == 6975 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6976 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6977 sti->sti_laddr_valid = 0; 6978 sti->sti_faddr_valid = 0; 6979 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6980 sti->sti_laddr_sa = NULL; 6981 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6982 sti->sti_faddr_sa = NULL; 6983 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6984 } 6985 6986 mutex_exit(&so->so_lock); 6987 6988 if ((mp = sti->sti_eaddr_mp) != NULL) { 6989 freemsg(mp); 6990 sti->sti_eaddr_mp = NULL; 6991 sti->sti_delayed_error = 0; 6992 } 6993 6994 if ((mp = sti->sti_ack_mp) != NULL) { 6995 freemsg(mp); 6996 sti->sti_ack_mp = NULL; 6997 } 6998 6999 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 7000 sti->sti_nl7c_rcv_mp = NULL; 7001 freemsg(mp); 7002 } 7003 sti->sti_nl7c_rcv_rval = 0; 7004 if (sti->sti_nl7c_uri != NULL) { 7005 nl7c_urifree(so); 7006 /* urifree() cleared nl7c_uri */ 7007 } 7008 if (sti->sti_nl7c_flags) { 7009 sti->sti_nl7c_flags = 0; 7010 } 7011 7012 ASSERT(sti->sti_ux_bound_vp == NULL); 7013 if ((mp = sti->sti_unbind_mp) != NULL) { 7014 freemsg(mp); 7015 sti->sti_unbind_mp = NULL; 7016 } 7017 } 7018 7019 /* 7020 * Destroys the TPI information attached to a sonode. 7021 */ 7022 static void 7023 sotpi_info_destroy(struct sonode *so) 7024 { 7025 sotpi_info_t *sti = SOTOTPI(so); 7026 7027 i_sotpi_info_destructor(sti); 7028 kmem_free(sti, sizeof (*sti)); 7029 7030 so->so_priv = NULL; 7031 } 7032 7033 /* 7034 * Create the global sotpi socket module entry. It will never be freed. 7035 */ 7036 smod_info_t * 7037 sotpi_smod_create(void) 7038 { 7039 smod_info_t *smodp; 7040 7041 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 7042 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 7043 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 7044 /* 7045 * Initialize the smod_refcnt to 1 so it will never be freed. 7046 */ 7047 smodp->smod_refcnt = 1; 7048 smodp->smod_uc_version = SOCK_UC_VERSION; 7049 smodp->smod_dc_version = SOCK_DC_VERSION; 7050 smodp->smod_sock_create_func = &sotpi_create; 7051 smodp->smod_sock_destroy_func = &sotpi_destroy; 7052 return (smodp); 7053 } 7054