1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/kmem_impl.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/open.h> 46 #include <sys/user.h> 47 #include <sys/termios.h> 48 #include <sys/stream.h> 49 #include <sys/strsubr.h> 50 #include <sys/strsun.h> 51 #include <sys/suntpi.h> 52 #include <sys/ddi.h> 53 #include <sys/esunddi.h> 54 #include <sys/flock.h> 55 #include <sys/modctl.h> 56 #include <sys/vtrace.h> 57 #include <sys/cmn_err.h> 58 #include <sys/pathname.h> 59 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <sys/un.h> 65 #include <sys/strsun.h> 66 67 #include <sys/tiuser.h> 68 #define _SUN_TPI_VERSION 2 69 #include <sys/tihdr.h> 70 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 71 72 #include <c2/audit.h> 73 74 #include <inet/common.h> 75 #include <inet/ip.h> 76 #include <inet/ip6.h> 77 #include <inet/tcp.h> 78 #include <inet/udp_impl.h> 79 80 #include <sys/zone.h> 81 82 #include <fs/sockfs/nl7c.h> 83 #include <fs/sockfs/nl7curi.h> 84 85 #include <fs/sockfs/sockcommon.h> 86 #include <fs/sockfs/socktpi.h> 87 #include <fs/sockfs/socktpi_impl.h> 88 89 /* 90 * Possible failures when memory can't be allocated. The documented behavior: 91 * 92 * 5.5: 4.X: XNET: 93 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 94 * EINTR 95 * (4.X does not document EINTR but returns it) 96 * bind: ENOSR - ENOBUFS/ENOSR 97 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 98 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 99 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 100 * (4.X getpeername and getsockname do not fail in practice) 101 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 102 * listen: - - ENOBUFS 103 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 104 * EINTR 105 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 106 * EINTR 107 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 108 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 109 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 110 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 111 * 112 * Resolution. When allocation fails: 113 * recv: return EINTR 114 * send: return EINTR 115 * connect, accept: EINTR 116 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 117 * socket, socketpair: ENOBUFS 118 * getpeername, getsockname: sleep 119 * getsockopt, setsockopt: sleep 120 */ 121 122 #ifdef SOCK_TEST 123 /* 124 * Variables that make sockfs do something other than the standard TPI 125 * for the AF_INET transports. 126 * 127 * solisten_tpi_tcp: 128 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 129 * the transport is already bound. This is needed to avoid loosing the 130 * port number should listen() do a T_UNBIND_REQ followed by a 131 * O_T_BIND_REQ. 132 * 133 * soconnect_tpi_udp: 134 * UDP and ICMP can handle a T_CONN_REQ. 135 * This is needed to make the sequence of connect(), getsockname() 136 * return the local IP address used to send packets to the connected to 137 * destination. 138 * 139 * soconnect_tpi_tcp: 140 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 141 * Set this to non-zero to send TPI conformant messages to TCP in this 142 * respect. This is a performance optimization. 143 * 144 * soaccept_tpi_tcp: 145 * TCP can handle a T_CONN_REQ without the acceptor being bound. 146 * This is a performance optimization that has been picked up in XTI. 147 * 148 * soaccept_tpi_multioptions: 149 * When inheriting SOL_SOCKET options from the listener to the accepting 150 * socket send them as a single message for AF_INET{,6}. 151 */ 152 int solisten_tpi_tcp = 0; 153 int soconnect_tpi_udp = 0; 154 int soconnect_tpi_tcp = 0; 155 int soaccept_tpi_tcp = 0; 156 int soaccept_tpi_multioptions = 1; 157 #else /* SOCK_TEST */ 158 #define soconnect_tpi_tcp 0 159 #define soconnect_tpi_udp 0 160 #define solisten_tpi_tcp 0 161 #define soaccept_tpi_tcp 0 162 #define soaccept_tpi_multioptions 1 163 #endif /* SOCK_TEST */ 164 165 #ifdef SOCK_TEST 166 extern int do_useracc; 167 extern clock_t sock_test_timelimit; 168 #endif /* SOCK_TEST */ 169 170 /* 171 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 172 * applications working. Turn on this flag to disable these checks. 173 */ 174 int xnet_skip_checks = 0; 175 int xnet_check_print = 0; 176 int xnet_truncate_print = 0; 177 178 static void sotpi_destroy(struct sonode *); 179 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 180 int, int *, cred_t *cr); 181 182 static boolean_t sotpi_info_create(struct sonode *, int); 183 static void sotpi_info_init(struct sonode *); 184 static void sotpi_info_fini(struct sonode *); 185 static void sotpi_info_destroy(struct sonode *); 186 187 /* 188 * Do direct function call to the transport layer below; this would 189 * also allow the transport to utilize read-side synchronous stream 190 * interface if necessary. This is a /etc/system tunable that must 191 * not be modified on a running system. By default this is enabled 192 * for performance reasons and may be disabled for debugging purposes. 193 */ 194 boolean_t socktpi_direct = B_TRUE; 195 196 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 197 198 extern void sigintr(k_sigset_t *, int); 199 extern void sigunintr(k_sigset_t *); 200 201 static int sotpi_unbind(struct sonode *, int); 202 203 /* TPI sockfs sonode operations */ 204 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 205 int); 206 static int sotpi_accept(struct sonode *, int, struct cred *, 207 struct sonode **); 208 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 209 int, struct cred *); 210 static int sotpi_listen(struct sonode *, int, struct cred *); 211 static int sotpi_connect(struct sonode *, struct sockaddr *, 212 socklen_t, int, int, struct cred *); 213 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 214 struct uio *, struct cred *); 215 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 216 struct uio *, struct cred *); 217 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 218 struct cred *, mblk_t **); 219 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 220 struct uio *, void *, t_uscalar_t, int); 221 static int sodgram_direct(struct sonode *, struct sockaddr *, 222 socklen_t, struct uio *, int); 223 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 224 socklen_t *, boolean_t, struct cred *); 225 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 226 socklen_t *, struct cred *); 227 static int sotpi_shutdown(struct sonode *, int, struct cred *); 228 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 229 socklen_t *, int, struct cred *); 230 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 231 socklen_t, struct cred *); 232 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 233 int32_t *); 234 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 235 struct cred *, int32_t *); 236 static int sotpi_poll(struct sonode *, short, int, short *, 237 struct pollhead **); 238 static int sotpi_close(struct sonode *, int, struct cred *); 239 240 static int i_sotpi_info_constructor(sotpi_info_t *); 241 static void i_sotpi_info_destructor(sotpi_info_t *); 242 243 sonodeops_t sotpi_sonodeops = { 244 sotpi_init, /* sop_init */ 245 sotpi_accept, /* sop_accept */ 246 sotpi_bind, /* sop_bind */ 247 sotpi_listen, /* sop_listen */ 248 sotpi_connect, /* sop_connect */ 249 sotpi_recvmsg, /* sop_recvmsg */ 250 sotpi_sendmsg, /* sop_sendmsg */ 251 sotpi_sendmblk, /* sop_sendmblk */ 252 sotpi_getpeername, /* sop_getpeername */ 253 sotpi_getsockname, /* sop_getsockname */ 254 sotpi_shutdown, /* sop_shutdown */ 255 sotpi_getsockopt, /* sop_getsockopt */ 256 sotpi_setsockopt, /* sop_setsockopt */ 257 sotpi_ioctl, /* sop_ioctl */ 258 sotpi_poll, /* sop_poll */ 259 sotpi_close, /* sop_close */ 260 }; 261 262 /* 263 * Return a TPI socket vnode. 264 * 265 * Note that sockets assume that the driver will clone (either itself 266 * or by using the clone driver) i.e. a socket() call will always 267 * result in a new vnode being created. 268 */ 269 270 /* 271 * Common create code for socket and accept. If tso is set the values 272 * from that node is used instead of issuing a T_INFO_REQ. 273 */ 274 275 /* ARGSUSED */ 276 static struct sonode * 277 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 278 int version, int sflags, int *errorp, cred_t *cr) 279 { 280 struct sonode *so; 281 kmem_cache_t *cp; 282 int sfamily = family; 283 284 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 285 286 if (family == AF_NCA) { 287 /* 288 * The request is for an NCA socket so for NL7C use the 289 * INET domain instead and mark NL7C_AF_NCA below. 290 */ 291 family = AF_INET; 292 /* 293 * NL7C is not supported in the non-global zone, 294 * we enforce this restriction here. 295 */ 296 if (getzoneid() != GLOBAL_ZONEID) { 297 *errorp = ENOTSUP; 298 return (NULL); 299 } 300 } 301 302 /* 303 * to be compatible with old tpi socket implementation ignore 304 * sleep flag (sflags) passed in 305 */ 306 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 307 so = kmem_cache_alloc(cp, KM_SLEEP); 308 if (so == NULL) { 309 *errorp = ENOMEM; 310 return (NULL); 311 } 312 313 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 314 sotpi_info_init(so); 315 316 if (sfamily == AF_NCA) { 317 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 318 } 319 320 if (version == SOV_DEFAULT) 321 version = so_default_version; 322 323 so->so_version = (short)version; 324 *errorp = 0; 325 326 return (so); 327 } 328 329 static void 330 sotpi_destroy(struct sonode *so) 331 { 332 kmem_cache_t *cp; 333 struct sockparams *origsp; 334 335 /* 336 * If there is a new dealloc function (ie. smod_destroy_func), 337 * then it should check the correctness of the ops. 338 */ 339 340 ASSERT(so->so_ops == &sotpi_sonodeops); 341 342 origsp = SOTOTPI(so)->sti_orig_sp; 343 344 sotpi_info_fini(so); 345 346 if (so->so_state & SS_FALLBACK_COMP) { 347 /* 348 * A fallback happend, which means that a sotpi_info_t struct 349 * was allocated (as opposed to being allocated from the TPI 350 * sonode cache. Therefore we explicitly free the struct 351 * here. 352 */ 353 sotpi_info_destroy(so); 354 ASSERT(origsp != NULL); 355 356 origsp->sp_smod_info->smod_sock_destroy_func(so); 357 SOCKPARAMS_DEC_REF(origsp); 358 } else { 359 sonode_fini(so); 360 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 361 socktpi_cache; 362 kmem_cache_free(cp, so); 363 } 364 } 365 366 /* ARGSUSED1 */ 367 int 368 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 369 { 370 major_t maj; 371 dev_t newdev; 372 struct vnode *vp; 373 int error = 0; 374 struct stdata *stp; 375 376 sotpi_info_t *sti = SOTOTPI(so); 377 378 dprint(1, ("sotpi_init()\n")); 379 380 /* 381 * over write the sleep flag passed in but that is ok 382 * as tpi socket does not honor sleep flag. 383 */ 384 flags |= FREAD|FWRITE; 385 386 /* 387 * Record in so_flag that it is a clone. 388 */ 389 if (getmajor(sti->sti_dev) == clone_major) 390 so->so_flag |= SOCLONE; 391 392 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 393 (so->so_family == AF_INET || so->so_family == AF_INET6) && 394 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 395 so->so_protocol == IPPROTO_IP)) { 396 /* Tell tcp or udp that it's talking to sockets */ 397 flags |= SO_SOCKSTR; 398 399 /* 400 * Here we indicate to socktpi_open() our attempt to 401 * make direct calls between sockfs and transport. 402 * The final decision is left to socktpi_open(). 403 */ 404 sti->sti_direct = 1; 405 406 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 407 if (so->so_type == SOCK_STREAM && tso != NULL) { 408 if (SOTOTPI(tso)->sti_direct) { 409 /* 410 * Inherit sti_direct from listener and pass 411 * SO_ACCEPTOR open flag to tcp, indicating 412 * that this is an accept fast-path instance. 413 */ 414 flags |= SO_ACCEPTOR; 415 } else { 416 /* 417 * sti_direct is not set on listener, meaning 418 * that the listener has been converted from 419 * a socket to a stream. Ensure that the 420 * acceptor inherits these settings. 421 */ 422 sti->sti_direct = 0; 423 flags &= ~SO_SOCKSTR; 424 } 425 } 426 } 427 428 /* 429 * Tell local transport that it is talking to sockets. 430 */ 431 if (so->so_family == AF_UNIX) { 432 flags |= SO_SOCKSTR; 433 } 434 435 vp = SOTOV(so); 436 newdev = vp->v_rdev; 437 maj = getmajor(newdev); 438 ASSERT(STREAMSTAB(maj)); 439 440 error = stropen(vp, &newdev, flags, cr); 441 442 stp = vp->v_stream; 443 if (error == 0) { 444 if (so->so_flag & SOCLONE) 445 ASSERT(newdev != vp->v_rdev); 446 mutex_enter(&so->so_lock); 447 sti->sti_dev = newdev; 448 vp->v_rdev = newdev; 449 mutex_exit(&so->so_lock); 450 451 if (stp->sd_flag & STRISTTY) { 452 /* 453 * this is a post SVR4 tty driver - a socket can not 454 * be a controlling terminal. Fail the open. 455 */ 456 (void) sotpi_close(so, flags, cr); 457 return (ENOTTY); /* XXX */ 458 } 459 460 ASSERT(stp->sd_wrq != NULL); 461 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 462 463 /* 464 * If caller is interested in doing direct function call 465 * interface to/from transport module, probe the module 466 * directly beneath the streamhead to see if it qualifies. 467 * 468 * We turn off the direct interface when qualifications fail. 469 * In the acceptor case, we simply turn off the sti_direct 470 * flag on the socket. We do the fallback after the accept 471 * has completed, before the new socket is returned to the 472 * application. 473 */ 474 if (sti->sti_direct) { 475 queue_t *tq = stp->sd_wrq->q_next; 476 477 /* 478 * sti_direct is currently supported and tested 479 * only for tcp/udp; this is the main reason to 480 * have the following assertions. 481 */ 482 ASSERT(so->so_family == AF_INET || 483 so->so_family == AF_INET6); 484 ASSERT(so->so_protocol == IPPROTO_UDP || 485 so->so_protocol == IPPROTO_TCP || 486 so->so_protocol == IPPROTO_IP); 487 ASSERT(so->so_type == SOCK_DGRAM || 488 so->so_type == SOCK_STREAM); 489 490 /* 491 * Abort direct call interface if the module directly 492 * underneath the stream head is not defined with the 493 * _D_DIRECT flag. This could happen in the tcp or 494 * udp case, when some other module is autopushed 495 * above it, or for some reasons the expected module 496 * isn't purely D_MP (which is the main requirement). 497 */ 498 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 499 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 500 int rval; 501 502 /* Continue on without direct calls */ 503 sti->sti_direct = 0; 504 505 /* 506 * Cannot issue ioctl on fallback socket since 507 * there is no conn associated with the queue. 508 * The fallback downcall will notify the proto 509 * of the change. 510 */ 511 if (!(flags & SO_ACCEPTOR) && 512 !(flags & SO_FALLBACK)) { 513 if ((error = strioctl(vp, 514 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 515 cr, &rval)) != 0) { 516 (void) sotpi_close(so, flags, 517 cr); 518 return (error); 519 } 520 } 521 } 522 } 523 524 if (flags & SO_FALLBACK) { 525 /* 526 * The stream created does not have a conn. 527 * do stream set up after conn has been assigned 528 */ 529 return (error); 530 } 531 if (error = so_strinit(so, tso)) { 532 (void) sotpi_close(so, flags, cr); 533 return (error); 534 } 535 536 /* Wildcard */ 537 if (so->so_protocol != so->so_sockparams->sp_protocol) { 538 int protocol = so->so_protocol; 539 /* 540 * Issue SO_PROTOTYPE setsockopt. 541 */ 542 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 543 &protocol, (t_uscalar_t)sizeof (protocol), cr); 544 if (error != 0) { 545 (void) sotpi_close(so, flags, cr); 546 /* 547 * Setsockopt often fails with ENOPROTOOPT but 548 * socket() should fail with 549 * EPROTONOSUPPORT/EPROTOTYPE. 550 */ 551 return (EPROTONOSUPPORT); 552 } 553 } 554 555 } else { 556 /* 557 * While the same socket can not be reopened (unlike specfs) 558 * the stream head sets STREOPENFAIL when the autopush fails. 559 */ 560 if ((stp != NULL) && 561 (stp->sd_flag & STREOPENFAIL)) { 562 /* 563 * Open failed part way through. 564 */ 565 mutex_enter(&stp->sd_lock); 566 stp->sd_flag &= ~STREOPENFAIL; 567 mutex_exit(&stp->sd_lock); 568 (void) sotpi_close(so, flags, cr); 569 return (error); 570 /*NOTREACHED*/ 571 } 572 ASSERT(stp == NULL); 573 } 574 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 575 "sockfs open:maj %d vp %p so %p error %d", 576 maj, vp, so, error); 577 return (error); 578 } 579 580 /* 581 * Bind the socket to an unspecified address in sockfs only. 582 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 583 * required in all cases. 584 */ 585 static void 586 so_automatic_bind(struct sonode *so) 587 { 588 sotpi_info_t *sti = SOTOTPI(so); 589 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 590 591 ASSERT(MUTEX_HELD(&so->so_lock)); 592 ASSERT(!(so->so_state & SS_ISBOUND)); 593 ASSERT(sti->sti_unbind_mp); 594 595 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 596 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 597 sti->sti_laddr_sa->sa_family = so->so_family; 598 so->so_state |= SS_ISBOUND; 599 } 600 601 602 /* 603 * bind the socket. 604 * 605 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 606 * are passed in we allow rebinding. Note that for backwards compatibility 607 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 608 * Thus the rebinding code is currently not executed. 609 * 610 * The constraints for rebinding are: 611 * - it is a SOCK_DGRAM, or 612 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 613 * and no listen() has been done. 614 * This rebinding code was added based on some language in the XNET book 615 * about not returning EINVAL it the protocol allows rebinding. However, 616 * this language is not present in the Posix socket draft. Thus maybe the 617 * rebinding logic should be deleted from the source. 618 * 619 * A null "name" can be used to unbind the socket if: 620 * - it is a SOCK_DGRAM, or 621 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 622 * and no listen() has been done. 623 */ 624 /* ARGSUSED */ 625 static int 626 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 627 socklen_t namelen, int backlog, int flags, struct cred *cr) 628 { 629 struct T_bind_req bind_req; 630 struct T_bind_ack *bind_ack; 631 int error = 0; 632 mblk_t *mp; 633 void *addr; 634 t_uscalar_t addrlen; 635 int unbind_on_err = 1; 636 boolean_t clear_acceptconn_on_err = B_FALSE; 637 boolean_t restore_backlog_on_err = B_FALSE; 638 int save_so_backlog; 639 t_scalar_t PRIM_type = O_T_BIND_REQ; 640 boolean_t tcp_udp_xport; 641 void *nl7c = NULL; 642 sotpi_info_t *sti = SOTOTPI(so); 643 644 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 645 (void *)so, (void *)name, namelen, backlog, flags, 646 pr_state(so->so_state, so->so_mode))); 647 648 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 649 650 if (!(flags & _SOBIND_LOCK_HELD)) { 651 mutex_enter(&so->so_lock); 652 so_lock_single(so); /* Set SOLOCKED */ 653 } else { 654 ASSERT(MUTEX_HELD(&so->so_lock)); 655 ASSERT(so->so_flag & SOLOCKED); 656 } 657 658 /* 659 * Make sure that there is a preallocated unbind_req message 660 * before binding. This message allocated when the socket is 661 * created but it might be have been consumed. 662 */ 663 if (sti->sti_unbind_mp == NULL) { 664 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 665 /* NOTE: holding so_lock while sleeping */ 666 sti->sti_unbind_mp = 667 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 668 cr); 669 } 670 671 if (flags & _SOBIND_REBIND) { 672 /* 673 * Called from solisten after doing an sotpi_unbind() or 674 * potentially without the unbind (latter for AF_INET{,6}). 675 */ 676 ASSERT(name == NULL && namelen == 0); 677 678 if (so->so_family == AF_UNIX) { 679 ASSERT(sti->sti_ux_bound_vp); 680 addr = &sti->sti_ux_laddr; 681 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 682 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 683 "addr 0x%p, vp %p\n", 684 addrlen, 685 (void *)((struct so_ux_addr *)addr)->soua_vp, 686 (void *)sti->sti_ux_bound_vp)); 687 } else { 688 addr = sti->sti_laddr_sa; 689 addrlen = (t_uscalar_t)sti->sti_laddr_len; 690 } 691 } else if (flags & _SOBIND_UNSPEC) { 692 ASSERT(name == NULL && namelen == 0); 693 694 /* 695 * The caller checked SS_ISBOUND but not necessarily 696 * under so_lock 697 */ 698 if (so->so_state & SS_ISBOUND) { 699 /* No error */ 700 goto done; 701 } 702 703 /* Set an initial local address */ 704 switch (so->so_family) { 705 case AF_UNIX: 706 /* 707 * Use an address with same size as struct sockaddr 708 * just like BSD. 709 */ 710 sti->sti_laddr_len = 711 (socklen_t)sizeof (struct sockaddr); 712 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 713 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 714 sti->sti_laddr_sa->sa_family = so->so_family; 715 716 /* 717 * Pass down an address with the implicit bind 718 * magic number and the rest all zeros. 719 * The transport will return a unique address. 720 */ 721 sti->sti_ux_laddr.soua_vp = NULL; 722 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 723 addr = &sti->sti_ux_laddr; 724 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 725 break; 726 727 case AF_INET: 728 case AF_INET6: 729 /* 730 * An unspecified bind in TPI has a NULL address. 731 * Set the address in sockfs to have the sa_family. 732 */ 733 sti->sti_laddr_len = (so->so_family == AF_INET) ? 734 (socklen_t)sizeof (sin_t) : 735 (socklen_t)sizeof (sin6_t); 736 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 737 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 738 sti->sti_laddr_sa->sa_family = so->so_family; 739 addr = NULL; 740 addrlen = 0; 741 break; 742 743 default: 744 /* 745 * An unspecified bind in TPI has a NULL address. 746 * Set the address in sockfs to be zero length. 747 * 748 * Can not assume there is a sa_family for all 749 * protocol families. For example, AF_X25 does not 750 * have a family field. 751 */ 752 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 753 sti->sti_laddr_len = 0; /* XXX correct? */ 754 addr = NULL; 755 addrlen = 0; 756 break; 757 } 758 759 } else { 760 if (so->so_state & SS_ISBOUND) { 761 /* 762 * If it is ok to rebind the socket, first unbind 763 * with the transport. A rebind to the NULL address 764 * is interpreted as an unbind. 765 * Note that a bind to NULL in BSD does unbind the 766 * socket but it fails with EINVAL. 767 * Note that regular sockets set SOV_SOCKBSD i.e. 768 * _SOBIND_SOCKBSD gets set here hence no type of 769 * socket does currently allow rebinding. 770 * 771 * If the name is NULL just do an unbind. 772 */ 773 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 774 name != NULL) { 775 error = EINVAL; 776 unbind_on_err = 0; 777 eprintsoline(so, error); 778 goto done; 779 } 780 if ((so->so_mode & SM_CONNREQUIRED) && 781 (so->so_state & SS_CANTREBIND)) { 782 error = EINVAL; 783 unbind_on_err = 0; 784 eprintsoline(so, error); 785 goto done; 786 } 787 error = sotpi_unbind(so, 0); 788 if (error) { 789 eprintsoline(so, error); 790 goto done; 791 } 792 ASSERT(!(so->so_state & SS_ISBOUND)); 793 if (name == NULL) { 794 so->so_state &= 795 ~(SS_ISCONNECTED|SS_ISCONNECTING); 796 goto done; 797 } 798 } 799 800 /* X/Open requires this check */ 801 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 802 if (xnet_check_print) { 803 printf("sockfs: X/Open bind state check " 804 "caused EINVAL\n"); 805 } 806 error = EINVAL; 807 goto done; 808 } 809 810 switch (so->so_family) { 811 case AF_UNIX: 812 /* 813 * All AF_UNIX addresses are nul terminated 814 * when copied (copyin_name) in so the minimum 815 * length is 3 bytes. 816 */ 817 if (name == NULL || 818 (ssize_t)namelen <= sizeof (short) + 1) { 819 error = EISDIR; 820 eprintsoline(so, error); 821 goto done; 822 } 823 /* 824 * Verify so_family matches the bound family. 825 * BSD does not check this for AF_UNIX resulting 826 * in funny mknods. 827 */ 828 if (name->sa_family != so->so_family) { 829 error = EAFNOSUPPORT; 830 goto done; 831 } 832 break; 833 case AF_INET: 834 if (name == NULL) { 835 error = EINVAL; 836 eprintsoline(so, error); 837 goto done; 838 } 839 if ((size_t)namelen != sizeof (sin_t)) { 840 error = name->sa_family != so->so_family ? 841 EAFNOSUPPORT : EINVAL; 842 eprintsoline(so, error); 843 goto done; 844 } 845 if ((flags & _SOBIND_XPG4_2) && 846 (name->sa_family != so->so_family)) { 847 /* 848 * This check has to be made for X/Open 849 * sockets however application failures have 850 * been observed when it is applied to 851 * all sockets. 852 */ 853 error = EAFNOSUPPORT; 854 eprintsoline(so, error); 855 goto done; 856 } 857 /* 858 * Force a zero sa_family to match so_family. 859 * 860 * Some programs like inetd(1M) don't set the 861 * family field. Other programs leave 862 * sin_family set to garbage - SunOS 4.X does 863 * not check the family field on a bind. 864 * We use the family field that 865 * was passed in to the socket() call. 866 */ 867 name->sa_family = so->so_family; 868 break; 869 870 case AF_INET6: { 871 #ifdef DEBUG 872 sin6_t *sin6 = (sin6_t *)name; 873 #endif /* DEBUG */ 874 875 if (name == NULL) { 876 error = EINVAL; 877 eprintsoline(so, error); 878 goto done; 879 } 880 if ((size_t)namelen != sizeof (sin6_t)) { 881 error = name->sa_family != so->so_family ? 882 EAFNOSUPPORT : EINVAL; 883 eprintsoline(so, error); 884 goto done; 885 } 886 if (name->sa_family != so->so_family) { 887 /* 888 * With IPv6 we require the family to match 889 * unlike in IPv4. 890 */ 891 error = EAFNOSUPPORT; 892 eprintsoline(so, error); 893 goto done; 894 } 895 #ifdef DEBUG 896 /* 897 * Verify that apps don't forget to clear 898 * sin6_scope_id etc 899 */ 900 if (sin6->sin6_scope_id != 0 && 901 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 902 zcmn_err(getzoneid(), CE_WARN, 903 "bind with uninitialized sin6_scope_id " 904 "(%d) on socket. Pid = %d\n", 905 (int)sin6->sin6_scope_id, 906 (int)curproc->p_pid); 907 } 908 if (sin6->__sin6_src_id != 0) { 909 zcmn_err(getzoneid(), CE_WARN, 910 "bind with uninitialized __sin6_src_id " 911 "(%d) on socket. Pid = %d\n", 912 (int)sin6->__sin6_src_id, 913 (int)curproc->p_pid); 914 } 915 #endif /* DEBUG */ 916 break; 917 } 918 default: 919 /* 920 * Don't do any length or sa_family check to allow 921 * non-sockaddr style addresses. 922 */ 923 if (name == NULL) { 924 error = EINVAL; 925 eprintsoline(so, error); 926 goto done; 927 } 928 break; 929 } 930 931 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 932 error = ENAMETOOLONG; 933 eprintsoline(so, error); 934 goto done; 935 } 936 /* 937 * Save local address. 938 */ 939 sti->sti_laddr_len = (socklen_t)namelen; 940 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 941 bcopy(name, sti->sti_laddr_sa, namelen); 942 943 addr = sti->sti_laddr_sa; 944 addrlen = (t_uscalar_t)sti->sti_laddr_len; 945 switch (so->so_family) { 946 case AF_INET6: 947 case AF_INET: 948 break; 949 case AF_UNIX: { 950 struct sockaddr_un *soun = 951 (struct sockaddr_un *)sti->sti_laddr_sa; 952 struct vnode *vp, *rvp; 953 struct vattr vattr; 954 955 ASSERT(sti->sti_ux_bound_vp == NULL); 956 /* 957 * Create vnode for the specified path name. 958 * Keep vnode held with a reference in sti_ux_bound_vp. 959 * Use the vnode pointer as the address used in the 960 * bind with the transport. 961 * 962 * Use the same mode as in BSD. In particular this does 963 * not observe the umask. 964 */ 965 /* MAXPATHLEN + soun_family + nul termination */ 966 if (sti->sti_laddr_len > 967 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 968 error = ENAMETOOLONG; 969 eprintsoline(so, error); 970 goto done; 971 } 972 vattr.va_type = VSOCK; 973 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 974 vattr.va_mask = AT_TYPE|AT_MODE; 975 /* NOTE: holding so_lock */ 976 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 977 EXCL, 0, &vp, CRMKNOD, 0, 0); 978 if (error) { 979 if (error == EEXIST) 980 error = EADDRINUSE; 981 eprintsoline(so, error); 982 goto done; 983 } 984 /* 985 * Establish pointer from the underlying filesystem 986 * vnode to the socket node. 987 * sti_ux_bound_vp and v_stream->sd_vnode form the 988 * cross-linkage between the underlying filesystem 989 * node and the socket node. 990 */ 991 992 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 993 VN_HOLD(rvp); 994 VN_RELE(vp); 995 vp = rvp; 996 } 997 998 ASSERT(SOTOV(so)->v_stream); 999 mutex_enter(&vp->v_lock); 1000 vp->v_stream = SOTOV(so)->v_stream; 1001 sti->sti_ux_bound_vp = vp; 1002 mutex_exit(&vp->v_lock); 1003 1004 /* 1005 * Use the vnode pointer value as a unique address 1006 * (together with the magic number to avoid conflicts 1007 * with implicit binds) in the transport provider. 1008 */ 1009 sti->sti_ux_laddr.soua_vp = 1010 (void *)sti->sti_ux_bound_vp; 1011 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1012 addr = &sti->sti_ux_laddr; 1013 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1014 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1015 addrlen, 1016 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1017 break; 1018 } 1019 } /* end switch (so->so_family) */ 1020 } 1021 1022 /* 1023 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1024 * the transport can start passing up T_CONN_IND messages 1025 * as soon as it receives the bind req and strsock_proto() 1026 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1027 */ 1028 if (flags & _SOBIND_LISTEN) { 1029 if ((so->so_state & SS_ACCEPTCONN) == 0) 1030 clear_acceptconn_on_err = B_TRUE; 1031 save_so_backlog = so->so_backlog; 1032 restore_backlog_on_err = B_TRUE; 1033 so->so_state |= SS_ACCEPTCONN; 1034 so->so_backlog = backlog; 1035 } 1036 1037 /* 1038 * If NL7C addr(s) have been configured check for addr/port match, 1039 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1040 * 1041 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1042 * family sockets only. If match mark as such. 1043 */ 1044 if (nl7c_enabled && ((addr != NULL && 1045 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1046 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1047 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1048 /* 1049 * NL7C is not supported in non-global zones, 1050 * we enforce this restriction here. 1051 */ 1052 if (so->so_zoneid == GLOBAL_ZONEID) { 1053 /* An NL7C socket, mark it */ 1054 sti->sti_nl7c_flags |= NL7C_ENABLED; 1055 if (nl7c == NULL) { 1056 /* 1057 * Was an AF_NCA bind() so add it to the 1058 * addr list for reporting purposes. 1059 */ 1060 nl7c = nl7c_add_addr(addr, addrlen); 1061 } 1062 } else 1063 nl7c = NULL; 1064 } 1065 1066 /* 1067 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1068 * for other transports we will send in a O_T_BIND_REQ. 1069 */ 1070 if (tcp_udp_xport && 1071 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1072 PRIM_type = T_BIND_REQ; 1073 1074 bind_req.PRIM_type = PRIM_type; 1075 bind_req.ADDR_length = addrlen; 1076 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1077 bind_req.CONIND_number = backlog; 1078 /* NOTE: holding so_lock while sleeping */ 1079 mp = soallocproto2(&bind_req, sizeof (bind_req), 1080 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1081 sti->sti_laddr_valid = 0; 1082 1083 /* Done using sti_laddr_sa - can drop the lock */ 1084 mutex_exit(&so->so_lock); 1085 1086 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1087 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1088 if (error) { 1089 eprintsoline(so, error); 1090 mutex_enter(&so->so_lock); 1091 goto done; 1092 } 1093 1094 mutex_enter(&so->so_lock); 1095 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1096 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1097 if (error) { 1098 eprintsoline(so, error); 1099 goto done; 1100 } 1101 ASSERT(mp); 1102 /* 1103 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1104 * strsock_proto while the lock was dropped above, the bind 1105 * is allowed to complete. 1106 */ 1107 1108 /* Mark as bound. This will be undone if we detect errors below. */ 1109 if (flags & _SOBIND_NOXLATE) { 1110 ASSERT(so->so_family == AF_UNIX); 1111 sti->sti_faddr_noxlate = 1; 1112 } 1113 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1114 so->so_state |= SS_ISBOUND; 1115 ASSERT(sti->sti_unbind_mp); 1116 1117 /* note that we've already set SS_ACCEPTCONN above */ 1118 1119 /* 1120 * Recompute addrlen - an unspecied bind sent down an 1121 * address of length zero but we expect the appropriate length 1122 * in return. 1123 */ 1124 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1125 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1126 1127 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1128 /* 1129 * The alignment restriction is really too strict but 1130 * we want enough alignment to inspect the fields of 1131 * a sockaddr_in. 1132 */ 1133 addr = sogetoff(mp, bind_ack->ADDR_offset, 1134 bind_ack->ADDR_length, 1135 __TPI_ALIGN_SIZE); 1136 if (addr == NULL) { 1137 freemsg(mp); 1138 error = EPROTO; 1139 eprintsoline(so, error); 1140 goto done; 1141 } 1142 if (!(flags & _SOBIND_UNSPEC)) { 1143 /* 1144 * Verify that the transport didn't return something we 1145 * did not want e.g. an address other than what we asked for. 1146 * 1147 * NOTE: These checks would go away if/when we switch to 1148 * using the new TPI (in which the transport would fail 1149 * the request instead of assigning a different address). 1150 * 1151 * NOTE2: For protocols that we don't know (i.e. any 1152 * other than AF_INET6, AF_INET and AF_UNIX), we 1153 * cannot know if the transport should be expected to 1154 * return the same address as that requested. 1155 * 1156 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1157 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1158 * 1159 * For example, in the case of netatalk it may be 1160 * inappropriate for the transport to return the 1161 * requested address (as it may have allocated a local 1162 * port number in behaviour similar to that of an 1163 * AF_INET bind request with a port number of zero). 1164 * 1165 * Given the definition of O_T_BIND_REQ, where the 1166 * transport may bind to an address other than the 1167 * requested address, it's not possible to determine 1168 * whether a returned address that differs from the 1169 * requested address is a reason to fail (because the 1170 * requested address was not available) or succeed 1171 * (because the transport allocated an appropriate 1172 * address and/or port). 1173 * 1174 * sockfs currently requires that the transport return 1175 * the requested address in the T_BIND_ACK, unless 1176 * there is code here to allow for any discrepancy. 1177 * Such code exists for AF_INET and AF_INET6. 1178 * 1179 * Netatalk chooses to return the requested address 1180 * rather than the (correct) allocated address. This 1181 * means that netatalk violates the TPI specification 1182 * (and would not function correctly if used from a 1183 * TLI application), but it does mean that it works 1184 * with sockfs. 1185 * 1186 * As noted above, using the newer XTI bind primitive 1187 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1188 * allow sockfs to be more sure about whether or not 1189 * the bind request had succeeded (as transports are 1190 * not permitted to bind to a different address than 1191 * that requested - they must return failure). 1192 * Unfortunately, support for T_BIND_REQ may not be 1193 * present in all transport implementations (netatalk, 1194 * for example, doesn't have it), making the 1195 * transition difficult. 1196 */ 1197 if (bind_ack->ADDR_length != addrlen) { 1198 /* Assumes that the requested address was in use */ 1199 freemsg(mp); 1200 error = EADDRINUSE; 1201 eprintsoline(so, error); 1202 goto done; 1203 } 1204 1205 switch (so->so_family) { 1206 case AF_INET6: 1207 case AF_INET: { 1208 sin_t *rname, *aname; 1209 1210 rname = (sin_t *)addr; 1211 aname = (sin_t *)sti->sti_laddr_sa; 1212 1213 /* 1214 * Take advantage of the alignment 1215 * of sin_port and sin6_port which fall 1216 * in the same place in their data structures. 1217 * Just use sin_port for either address family. 1218 * 1219 * This may become a problem if (heaven forbid) 1220 * there's a separate ipv6port_reserved... :-P 1221 * 1222 * Binding to port 0 has the semantics of letting 1223 * the transport bind to any port. 1224 * 1225 * If the transport is TCP or UDP since we had sent 1226 * a T_BIND_REQ we would not get a port other than 1227 * what we asked for. 1228 */ 1229 if (tcp_udp_xport) { 1230 /* 1231 * Pick up the new port number if we bound to 1232 * port 0. 1233 */ 1234 if (aname->sin_port == 0) 1235 aname->sin_port = rname->sin_port; 1236 sti->sti_laddr_valid = 1; 1237 break; 1238 } 1239 if (aname->sin_port != 0 && 1240 aname->sin_port != rname->sin_port) { 1241 freemsg(mp); 1242 error = EADDRINUSE; 1243 eprintsoline(so, error); 1244 goto done; 1245 } 1246 /* 1247 * Pick up the new port number if we bound to port 0. 1248 */ 1249 aname->sin_port = rname->sin_port; 1250 1251 /* 1252 * Unfortunately, addresses aren't _quite_ the same. 1253 */ 1254 if (so->so_family == AF_INET) { 1255 if (aname->sin_addr.s_addr != 1256 rname->sin_addr.s_addr) { 1257 freemsg(mp); 1258 error = EADDRNOTAVAIL; 1259 eprintsoline(so, error); 1260 goto done; 1261 } 1262 } else { 1263 sin6_t *rname6 = (sin6_t *)rname; 1264 sin6_t *aname6 = (sin6_t *)aname; 1265 1266 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1267 &rname6->sin6_addr)) { 1268 freemsg(mp); 1269 error = EADDRNOTAVAIL; 1270 eprintsoline(so, error); 1271 goto done; 1272 } 1273 } 1274 break; 1275 } 1276 case AF_UNIX: 1277 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1278 freemsg(mp); 1279 error = EADDRINUSE; 1280 eprintsoline(so, error); 1281 eprintso(so, 1282 ("addrlen %d, addr 0x%x, vp %p\n", 1283 addrlen, *((int *)addr), 1284 (void *)sti->sti_ux_bound_vp)); 1285 goto done; 1286 } 1287 sti->sti_laddr_valid = 1; 1288 break; 1289 default: 1290 /* 1291 * NOTE: This assumes that addresses can be 1292 * byte-compared for equivalence. 1293 */ 1294 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1295 freemsg(mp); 1296 error = EADDRINUSE; 1297 eprintsoline(so, error); 1298 goto done; 1299 } 1300 /* 1301 * Don't mark sti_laddr_valid, as we cannot be 1302 * sure that the returned address is the real 1303 * bound address when talking to an unknown 1304 * transport. 1305 */ 1306 break; 1307 } 1308 } else { 1309 /* 1310 * Save for returned address for getsockname. 1311 * Needed for unspecific bind unless transport supports 1312 * the TI_GETMYNAME ioctl. 1313 * Do this for AF_INET{,6} even though they do, as 1314 * caching info here is much better performance than 1315 * a TPI/STREAMS trip to the transport for getsockname. 1316 * Any which can't for some reason _must_ _not_ set 1317 * sti_laddr_valid here for the caching version of 1318 * getsockname to not break; 1319 */ 1320 switch (so->so_family) { 1321 case AF_UNIX: 1322 /* 1323 * Record the address bound with the transport 1324 * for use by socketpair. 1325 */ 1326 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1327 sti->sti_laddr_valid = 1; 1328 break; 1329 case AF_INET: 1330 case AF_INET6: 1331 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1332 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1333 sti->sti_laddr_valid = 1; 1334 break; 1335 default: 1336 /* 1337 * Don't mark sti_laddr_valid, as we cannot be 1338 * sure that the returned address is the real 1339 * bound address when talking to an unknown 1340 * transport. 1341 */ 1342 break; 1343 } 1344 } 1345 1346 if (nl7c != NULL) { 1347 /* Register listen()er sonode pointer with NL7C */ 1348 nl7c_listener_addr(nl7c, so); 1349 } 1350 1351 freemsg(mp); 1352 1353 done: 1354 if (error) { 1355 /* reset state & backlog to values held on entry */ 1356 if (clear_acceptconn_on_err == B_TRUE) 1357 so->so_state &= ~SS_ACCEPTCONN; 1358 if (restore_backlog_on_err == B_TRUE) 1359 so->so_backlog = save_so_backlog; 1360 1361 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1362 int err; 1363 1364 err = sotpi_unbind(so, 0); 1365 /* LINTED - statement has no consequent: if */ 1366 if (err) { 1367 eprintsoline(so, error); 1368 } else { 1369 ASSERT(!(so->so_state & SS_ISBOUND)); 1370 } 1371 } 1372 } 1373 if (!(flags & _SOBIND_LOCK_HELD)) { 1374 so_unlock_single(so, SOLOCKED); 1375 mutex_exit(&so->so_lock); 1376 } else { 1377 ASSERT(MUTEX_HELD(&so->so_lock)); 1378 ASSERT(so->so_flag & SOLOCKED); 1379 } 1380 return (error); 1381 } 1382 1383 /* bind the socket */ 1384 static int 1385 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1386 int flags, struct cred *cr) 1387 { 1388 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1389 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1390 1391 flags &= ~_SOBIND_SOCKETPAIR; 1392 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1393 } 1394 1395 /* 1396 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1397 * address, or when listen needs to unbind and bind. 1398 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1399 * so that a sobind can pick them up. 1400 */ 1401 static int 1402 sotpi_unbind(struct sonode *so, int flags) 1403 { 1404 struct T_unbind_req unbind_req; 1405 int error = 0; 1406 mblk_t *mp; 1407 sotpi_info_t *sti = SOTOTPI(so); 1408 1409 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1410 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1411 1412 ASSERT(MUTEX_HELD(&so->so_lock)); 1413 ASSERT(so->so_flag & SOLOCKED); 1414 1415 if (!(so->so_state & SS_ISBOUND)) { 1416 error = EINVAL; 1417 eprintsoline(so, error); 1418 goto done; 1419 } 1420 1421 mutex_exit(&so->so_lock); 1422 1423 /* 1424 * Flush the read and write side (except stream head read queue) 1425 * and send down T_UNBIND_REQ. 1426 */ 1427 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1428 1429 unbind_req.PRIM_type = T_UNBIND_REQ; 1430 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1431 0, _ALLOC_SLEEP, CRED()); 1432 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1433 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1434 mutex_enter(&so->so_lock); 1435 if (error) { 1436 eprintsoline(so, error); 1437 goto done; 1438 } 1439 1440 error = sowaitokack(so, T_UNBIND_REQ); 1441 if (error) { 1442 eprintsoline(so, error); 1443 goto done; 1444 } 1445 1446 /* 1447 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1448 * strsock_proto while the lock was dropped above, the unbind 1449 * is allowed to complete. 1450 */ 1451 if (!(flags & _SOUNBIND_REBIND)) { 1452 /* 1453 * Clear out bound address. 1454 */ 1455 vnode_t *vp; 1456 1457 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1458 sti->sti_ux_bound_vp = NULL; 1459 vn_rele_stream(vp); 1460 } 1461 /* Clear out address */ 1462 sti->sti_laddr_len = 0; 1463 } 1464 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1465 sti->sti_laddr_valid = 0; 1466 1467 done: 1468 1469 /* If the caller held the lock don't release it here */ 1470 ASSERT(MUTEX_HELD(&so->so_lock)); 1471 ASSERT(so->so_flag & SOLOCKED); 1472 1473 return (error); 1474 } 1475 1476 /* 1477 * listen on the socket. 1478 * For TPI conforming transports this has to first unbind with the transport 1479 * and then bind again using the new backlog. 1480 */ 1481 /* ARGSUSED */ 1482 int 1483 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1484 { 1485 int error = 0; 1486 sotpi_info_t *sti = SOTOTPI(so); 1487 1488 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1489 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1490 1491 if (sti->sti_serv_type == T_CLTS) 1492 return (EOPNOTSUPP); 1493 1494 /* 1495 * If the socket is ready to accept connections already, then 1496 * return without doing anything. This avoids a problem where 1497 * a second listen() call fails if a connection is pending and 1498 * leaves the socket unbound. Only when we are not unbinding 1499 * with the transport can we safely increase the backlog. 1500 */ 1501 if (so->so_state & SS_ACCEPTCONN && 1502 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1503 /*CONSTCOND*/ 1504 !solisten_tpi_tcp)) 1505 return (0); 1506 1507 if (so->so_state & SS_ISCONNECTED) 1508 return (EINVAL); 1509 1510 mutex_enter(&so->so_lock); 1511 so_lock_single(so); /* Set SOLOCKED */ 1512 1513 /* 1514 * If the listen doesn't change the backlog we do nothing. 1515 * This avoids an EPROTO error from the transport. 1516 */ 1517 if ((so->so_state & SS_ACCEPTCONN) && 1518 so->so_backlog == backlog) 1519 goto done; 1520 1521 if (!(so->so_state & SS_ISBOUND)) { 1522 /* 1523 * Must have been explicitly bound in the UNIX domain. 1524 */ 1525 if (so->so_family == AF_UNIX) { 1526 error = EINVAL; 1527 goto done; 1528 } 1529 error = sotpi_bindlisten(so, NULL, 0, backlog, 1530 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1531 } else if (backlog > 0) { 1532 /* 1533 * AF_INET{,6} hack to avoid losing the port. 1534 * Assumes that all AF_INET{,6} transports can handle a 1535 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1536 * has already bound thus it is possible to avoid the unbind. 1537 */ 1538 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1539 /*CONSTCOND*/ 1540 !solisten_tpi_tcp)) { 1541 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1542 if (error) 1543 goto done; 1544 } 1545 error = sotpi_bindlisten(so, NULL, 0, backlog, 1546 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1547 } else { 1548 so->so_state |= SS_ACCEPTCONN; 1549 so->so_backlog = backlog; 1550 } 1551 if (error) 1552 goto done; 1553 ASSERT(so->so_state & SS_ACCEPTCONN); 1554 done: 1555 so_unlock_single(so, SOLOCKED); 1556 mutex_exit(&so->so_lock); 1557 return (error); 1558 } 1559 1560 /* 1561 * Disconnect either a specified seqno or all (-1). 1562 * The former is used on listening sockets only. 1563 * 1564 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1565 * the current use of sodisconnect(seqno == -1) is only for shutdown 1566 * so there is no point (and potentially incorrect) to unbind. 1567 */ 1568 static int 1569 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1570 { 1571 struct T_discon_req discon_req; 1572 int error = 0; 1573 mblk_t *mp; 1574 1575 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1576 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1577 1578 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1579 mutex_enter(&so->so_lock); 1580 so_lock_single(so); /* Set SOLOCKED */ 1581 } else { 1582 ASSERT(MUTEX_HELD(&so->so_lock)); 1583 ASSERT(so->so_flag & SOLOCKED); 1584 } 1585 1586 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1587 error = EINVAL; 1588 eprintsoline(so, error); 1589 goto done; 1590 } 1591 1592 mutex_exit(&so->so_lock); 1593 /* 1594 * Flush the write side (unless this is a listener) 1595 * and then send down a T_DISCON_REQ. 1596 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1597 * and other messages.) 1598 */ 1599 if (!(so->so_state & SS_ACCEPTCONN)) 1600 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1601 1602 discon_req.PRIM_type = T_DISCON_REQ; 1603 discon_req.SEQ_number = seqno; 1604 mp = soallocproto1(&discon_req, sizeof (discon_req), 1605 0, _ALLOC_SLEEP, CRED()); 1606 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1607 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1608 mutex_enter(&so->so_lock); 1609 if (error) { 1610 eprintsoline(so, error); 1611 goto done; 1612 } 1613 1614 error = sowaitokack(so, T_DISCON_REQ); 1615 if (error) { 1616 eprintsoline(so, error); 1617 goto done; 1618 } 1619 /* 1620 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1621 * strsock_proto while the lock was dropped above, the disconnect 1622 * is allowed to complete. However, it is not possible to 1623 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1624 */ 1625 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1626 SOTOTPI(so)->sti_laddr_valid = 0; 1627 SOTOTPI(so)->sti_faddr_valid = 0; 1628 done: 1629 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1630 so_unlock_single(so, SOLOCKED); 1631 mutex_exit(&so->so_lock); 1632 } else { 1633 /* If the caller held the lock don't release it here */ 1634 ASSERT(MUTEX_HELD(&so->so_lock)); 1635 ASSERT(so->so_flag & SOLOCKED); 1636 } 1637 return (error); 1638 } 1639 1640 /* ARGSUSED */ 1641 int 1642 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1643 struct sonode **nsop) 1644 { 1645 struct T_conn_ind *conn_ind; 1646 struct T_conn_res *conn_res; 1647 int error = 0; 1648 mblk_t *mp, *ack_mp; 1649 struct sonode *nso; 1650 vnode_t *nvp; 1651 void *src; 1652 t_uscalar_t srclen; 1653 void *opt; 1654 t_uscalar_t optlen; 1655 t_scalar_t PRIM_type; 1656 t_scalar_t SEQ_number; 1657 size_t sinlen; 1658 sotpi_info_t *sti = SOTOTPI(so); 1659 sotpi_info_t *nsti; 1660 1661 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1662 (void *)so, fflag, (void *)nsop, 1663 pr_state(so->so_state, so->so_mode))); 1664 1665 /* 1666 * Defer single-threading the accepting socket until 1667 * the T_CONN_IND has been received and parsed and the 1668 * new sonode has been opened. 1669 */ 1670 1671 /* Check that we are not already connected */ 1672 if ((so->so_state & SS_ACCEPTCONN) == 0) 1673 goto conn_bad; 1674 again: 1675 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1676 goto e_bad; 1677 1678 ASSERT(mp != NULL); 1679 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1680 1681 /* 1682 * Save SEQ_number for error paths. 1683 */ 1684 SEQ_number = conn_ind->SEQ_number; 1685 1686 srclen = conn_ind->SRC_length; 1687 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1688 if (src == NULL) { 1689 error = EPROTO; 1690 freemsg(mp); 1691 eprintsoline(so, error); 1692 goto disconnect_unlocked; 1693 } 1694 optlen = conn_ind->OPT_length; 1695 switch (so->so_family) { 1696 case AF_INET: 1697 case AF_INET6: 1698 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1699 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1700 &opt, conn_ind->OPT_length); 1701 } else { 1702 /* 1703 * The transport (in this case TCP) hasn't sent up 1704 * a pointer to an instance for the accept fast-path. 1705 * Disable fast-path completely because the call to 1706 * sotpi_create() below would otherwise create an 1707 * incomplete TCP instance, which would lead to 1708 * problems when sockfs sends a normal T_CONN_RES 1709 * message down the new stream. 1710 */ 1711 if (sti->sti_direct) { 1712 int rval; 1713 /* 1714 * For consistency we inform tcp to disable 1715 * direct interface on the listener, though 1716 * we can certainly live without doing this 1717 * because no data will ever travel upstream 1718 * on the listening socket. 1719 */ 1720 sti->sti_direct = 0; 1721 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1722 0, 0, K_TO_K, cr, &rval); 1723 } 1724 opt = NULL; 1725 optlen = 0; 1726 } 1727 break; 1728 case AF_UNIX: 1729 default: 1730 if (optlen != 0) { 1731 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1732 __TPI_ALIGN_SIZE); 1733 if (opt == NULL) { 1734 error = EPROTO; 1735 freemsg(mp); 1736 eprintsoline(so, error); 1737 goto disconnect_unlocked; 1738 } 1739 } 1740 if (so->so_family == AF_UNIX) { 1741 if (!sti->sti_faddr_noxlate) { 1742 src = NULL; 1743 srclen = 0; 1744 } 1745 /* Extract src address from options */ 1746 if (optlen != 0) 1747 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1748 } 1749 break; 1750 } 1751 1752 /* 1753 * Create the new socket. 1754 */ 1755 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1756 if (nso == NULL) { 1757 ASSERT(error != 0); 1758 /* 1759 * Accept can not fail with ENOBUFS. sotpi_create 1760 * sleeps waiting for memory until a signal is caught 1761 * so return EINTR. 1762 */ 1763 freemsg(mp); 1764 if (error == ENOBUFS) 1765 error = EINTR; 1766 goto e_disc_unl; 1767 } 1768 nvp = SOTOV(nso); 1769 nsti = SOTOTPI(nso); 1770 1771 #ifdef DEBUG 1772 /* 1773 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1774 * it's inherited early to allow debugging of the accept code itself. 1775 */ 1776 nso->so_options |= so->so_options & SO_DEBUG; 1777 #endif /* DEBUG */ 1778 1779 /* 1780 * Save the SRC address from the T_CONN_IND 1781 * for getpeername to work on AF_UNIX and on transports that do not 1782 * support TI_GETPEERNAME. 1783 * 1784 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1785 * copyin_name(). 1786 */ 1787 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1788 error = EINVAL; 1789 freemsg(mp); 1790 eprintsoline(so, error); 1791 goto disconnect_vp_unlocked; 1792 } 1793 nsti->sti_faddr_len = (socklen_t)srclen; 1794 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1795 bcopy(src, nsti->sti_faddr_sa, srclen); 1796 nsti->sti_faddr_valid = 1; 1797 1798 /* 1799 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1800 */ 1801 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1802 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1803 cred_t *cr; 1804 pid_t cpid; 1805 1806 cr = msg_getcred(mp, &cpid); 1807 if (cr != NULL) { 1808 crhold(cr); 1809 nso->so_peercred = cr; 1810 nso->so_cpid = cpid; 1811 } 1812 freemsg(mp); 1813 1814 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1815 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1816 if (mp == NULL) { 1817 /* 1818 * Accept can not fail with ENOBUFS. 1819 * A signal was caught so return EINTR. 1820 */ 1821 error = EINTR; 1822 eprintsoline(so, error); 1823 goto disconnect_vp_unlocked; 1824 } 1825 conn_res = (struct T_conn_res *)mp->b_rptr; 1826 } else { 1827 /* 1828 * For efficency reasons we use msg_extractcred; no crhold 1829 * needed since db_credp is cleared (i.e., we move the cred 1830 * from the message to so_peercred. 1831 */ 1832 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1833 1834 mp->b_rptr = DB_BASE(mp); 1835 conn_res = (struct T_conn_res *)mp->b_rptr; 1836 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1837 1838 mblk_setcred(mp, cr, curproc->p_pid); 1839 } 1840 1841 /* 1842 * New socket must be bound at least in sockfs and, except for AF_INET, 1843 * (or AF_INET6) it also has to be bound in the transport provider. 1844 * We set the local address in the sonode from the T_OK_ACK of the 1845 * T_CONN_RES. For this reason the address we bind to here isn't 1846 * important. 1847 */ 1848 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1849 /*CONSTCOND*/ 1850 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1851 /* 1852 * Optimization for AF_INET{,6} transports 1853 * that can handle a T_CONN_RES without being bound. 1854 */ 1855 mutex_enter(&nso->so_lock); 1856 so_automatic_bind(nso); 1857 mutex_exit(&nso->so_lock); 1858 } else { 1859 /* Perform NULL bind with the transport provider. */ 1860 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1861 cr)) != 0) { 1862 ASSERT(error != ENOBUFS); 1863 freemsg(mp); 1864 eprintsoline(nso, error); 1865 goto disconnect_vp_unlocked; 1866 } 1867 } 1868 1869 /* 1870 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1871 * so that any data arriving on the new socket will cause the 1872 * appropriate signals to be delivered for the new socket. 1873 * 1874 * No other thread (except strsock_proto and strsock_misc) 1875 * can access the new socket thus we relax the locking. 1876 */ 1877 nso->so_pgrp = so->so_pgrp; 1878 nso->so_state |= so->so_state & SS_ASYNC; 1879 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1880 1881 if (nso->so_pgrp != 0) { 1882 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1883 eprintsoline(nso, error); 1884 error = 0; 1885 nso->so_pgrp = 0; 1886 } 1887 } 1888 1889 /* 1890 * Make note of the socket level options. TCP and IP level options 1891 * are already inherited. We could do all this after accept is 1892 * successful but doing it here simplifies code and no harm done 1893 * for error case. 1894 */ 1895 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1896 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1897 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1898 nso->so_sndbuf = so->so_sndbuf; 1899 nso->so_rcvbuf = so->so_rcvbuf; 1900 if (nso->so_options & SO_LINGER) 1901 nso->so_linger = so->so_linger; 1902 1903 /* 1904 * Note that the following sti_direct code path should be 1905 * removed once we are confident that the direct sockets 1906 * do not result in any degradation. 1907 */ 1908 if (sti->sti_direct) { 1909 1910 ASSERT(opt != NULL); 1911 1912 conn_res->OPT_length = optlen; 1913 conn_res->OPT_offset = MBLKL(mp); 1914 bcopy(&opt, mp->b_wptr, optlen); 1915 mp->b_wptr += optlen; 1916 conn_res->PRIM_type = T_CONN_RES; 1917 conn_res->ACCEPTOR_id = 0; 1918 PRIM_type = T_CONN_RES; 1919 1920 /* Send down the T_CONN_RES on acceptor STREAM */ 1921 error = kstrputmsg(SOTOV(nso), mp, NULL, 1922 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1923 if (error) { 1924 mutex_enter(&so->so_lock); 1925 so_lock_single(so); 1926 eprintsoline(so, error); 1927 goto disconnect_vp; 1928 } 1929 mutex_enter(&nso->so_lock); 1930 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1931 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1932 if (error) { 1933 mutex_exit(&nso->so_lock); 1934 mutex_enter(&so->so_lock); 1935 so_lock_single(so); 1936 eprintsoline(so, error); 1937 goto disconnect_vp; 1938 } 1939 if (nso->so_family == AF_INET) { 1940 sin_t *sin; 1941 1942 sin = (sin_t *)(ack_mp->b_rptr + 1943 sizeof (struct T_ok_ack)); 1944 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1945 nsti->sti_laddr_len = sizeof (sin_t); 1946 } else { 1947 sin6_t *sin6; 1948 1949 sin6 = (sin6_t *)(ack_mp->b_rptr + 1950 sizeof (struct T_ok_ack)); 1951 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1952 nsti->sti_laddr_len = sizeof (sin6_t); 1953 } 1954 freemsg(ack_mp); 1955 1956 nso->so_state |= SS_ISCONNECTED; 1957 nso->so_proto_handle = (sock_lower_handle_t)opt; 1958 nsti->sti_laddr_valid = 1; 1959 1960 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 1961 /* 1962 * A NL7C marked listen()er so the new socket 1963 * inherits the listen()er's NL7C state, except 1964 * for NL7C_POLLIN. 1965 * 1966 * Only call NL7C to process the new socket if 1967 * the listen socket allows blocking i/o. 1968 */ 1969 nsti->sti_nl7c_flags = 1970 sti->sti_nl7c_flags & (~NL7C_POLLIN); 1971 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1972 /* 1973 * Nonblocking accept() just make it 1974 * persist to defer processing to the 1975 * read-side syscall (e.g. read). 1976 */ 1977 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 1978 } else if (nl7c_process(nso, B_FALSE)) { 1979 /* 1980 * NL7C has completed processing on the 1981 * socket, close the socket and back to 1982 * the top to await the next T_CONN_IND. 1983 */ 1984 mutex_exit(&nso->so_lock); 1985 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1986 cr, NULL); 1987 VN_RELE(nvp); 1988 goto again; 1989 } 1990 /* Pass the new socket out */ 1991 } 1992 1993 mutex_exit(&nso->so_lock); 1994 1995 /* 1996 * It's possible, through the use of autopush for example, 1997 * that the acceptor stream may not support sti_direct 1998 * semantics. If the new socket does not support sti_direct 1999 * we issue a _SIOCSOCKFALLBACK to inform the transport 2000 * as we would in the I_PUSH case. 2001 */ 2002 if (nsti->sti_direct == 0) { 2003 int rval; 2004 2005 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2006 0, 0, K_TO_K, cr, &rval)) != 0) { 2007 mutex_enter(&so->so_lock); 2008 so_lock_single(so); 2009 eprintsoline(so, error); 2010 goto disconnect_vp; 2011 } 2012 } 2013 2014 /* 2015 * Pass out new socket. 2016 */ 2017 if (nsop != NULL) 2018 *nsop = nso; 2019 2020 return (0); 2021 } 2022 2023 /* 2024 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2025 * which don't support the FireEngine accept fast-path. It is also 2026 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2027 * again. Neither sockfs nor TCP attempt to find out if some other 2028 * random module has been inserted in between (in which case we 2029 * should follow TLI accept behaviour). We blindly assume the worst 2030 * case and revert back to old behaviour i.e. TCP will not send us 2031 * any option (eager) and the accept should happen on the listener 2032 * queue. Any queued T_conn_ind have already got their options removed 2033 * by so_sock2_stream() when "sockmod" was I_POP'd. 2034 */ 2035 /* 2036 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2037 */ 2038 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2039 #ifdef _ILP32 2040 queue_t *q; 2041 2042 /* 2043 * Find read queue in driver 2044 * Can safely do this since we "own" nso/nvp. 2045 */ 2046 q = strvp2wq(nvp)->q_next; 2047 while (SAMESTR(q)) 2048 q = q->q_next; 2049 q = RD(q); 2050 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2051 #else 2052 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2053 #endif /* _ILP32 */ 2054 conn_res->PRIM_type = O_T_CONN_RES; 2055 PRIM_type = O_T_CONN_RES; 2056 } else { 2057 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2058 conn_res->PRIM_type = T_CONN_RES; 2059 PRIM_type = T_CONN_RES; 2060 } 2061 conn_res->SEQ_number = SEQ_number; 2062 conn_res->OPT_length = 0; 2063 conn_res->OPT_offset = 0; 2064 2065 mutex_enter(&so->so_lock); 2066 so_lock_single(so); /* Set SOLOCKED */ 2067 mutex_exit(&so->so_lock); 2068 2069 error = kstrputmsg(SOTOV(so), mp, NULL, 2070 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2071 mutex_enter(&so->so_lock); 2072 if (error) { 2073 eprintsoline(so, error); 2074 goto disconnect_vp; 2075 } 2076 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2077 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2078 if (error) { 2079 eprintsoline(so, error); 2080 goto disconnect_vp; 2081 } 2082 mutex_exit(&so->so_lock); 2083 /* 2084 * If there is a sin/sin6 appended onto the T_OK_ACK use 2085 * that to set the local address. If this is not present 2086 * then we zero out the address and don't set the 2087 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2088 * the pathname from the listening socket. 2089 * In the case where this is TCP or an AF_UNIX socket the 2090 * client side may have queued data or a T_ORDREL in the 2091 * transport. Having now sent the T_CONN_RES we may receive 2092 * those queued messages at any time. Hold the acceptor 2093 * so_lock until its state and laddr are finalized. 2094 */ 2095 mutex_enter(&nso->so_lock); 2096 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2097 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2098 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2099 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2100 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2101 nsti->sti_laddr_len = sinlen; 2102 nsti->sti_laddr_valid = 1; 2103 } else if (nso->so_family == AF_UNIX) { 2104 ASSERT(so->so_family == AF_UNIX); 2105 nsti->sti_laddr_len = sti->sti_laddr_len; 2106 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2107 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2108 nsti->sti_laddr_len); 2109 nsti->sti_laddr_valid = 1; 2110 } else { 2111 nsti->sti_laddr_len = sti->sti_laddr_len; 2112 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2113 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2114 nsti->sti_laddr_sa->sa_family = nso->so_family; 2115 } 2116 nso->so_state |= SS_ISCONNECTED; 2117 mutex_exit(&nso->so_lock); 2118 2119 freemsg(ack_mp); 2120 2121 mutex_enter(&so->so_lock); 2122 so_unlock_single(so, SOLOCKED); 2123 mutex_exit(&so->so_lock); 2124 2125 /* 2126 * Pass out new socket. 2127 */ 2128 if (nsop != NULL) 2129 *nsop = nso; 2130 2131 return (0); 2132 2133 2134 eproto_disc_unl: 2135 error = EPROTO; 2136 e_disc_unl: 2137 eprintsoline(so, error); 2138 goto disconnect_unlocked; 2139 2140 pr_disc_vp_unl: 2141 eprintsoline(so, error); 2142 disconnect_vp_unlocked: 2143 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2144 VN_RELE(nvp); 2145 disconnect_unlocked: 2146 (void) sodisconnect(so, SEQ_number, 0); 2147 return (error); 2148 2149 pr_disc_vp: 2150 eprintsoline(so, error); 2151 disconnect_vp: 2152 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2153 so_unlock_single(so, SOLOCKED); 2154 mutex_exit(&so->so_lock); 2155 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2156 VN_RELE(nvp); 2157 return (error); 2158 2159 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2160 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2161 ? EOPNOTSUPP : EINVAL; 2162 e_bad: 2163 eprintsoline(so, error); 2164 return (error); 2165 } 2166 2167 /* 2168 * connect a socket. 2169 * 2170 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2171 * unconnect (by specifying a null address). 2172 */ 2173 int 2174 sotpi_connect(struct sonode *so, 2175 struct sockaddr *name, 2176 socklen_t namelen, 2177 int fflag, 2178 int flags, 2179 struct cred *cr) 2180 { 2181 struct T_conn_req conn_req; 2182 int error = 0; 2183 mblk_t *mp; 2184 void *src; 2185 socklen_t srclen; 2186 void *addr; 2187 socklen_t addrlen; 2188 boolean_t need_unlock; 2189 sotpi_info_t *sti = SOTOTPI(so); 2190 2191 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2192 (void *)so, (void *)name, namelen, fflag, flags, 2193 pr_state(so->so_state, so->so_mode))); 2194 2195 /* 2196 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2197 * avoid sleeping for memory with SOLOCKED held. 2198 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2199 * + sizeof (struct T_opthdr). 2200 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2201 * exceed sti_faddr_maxlen). 2202 */ 2203 mp = soallocproto(sizeof (struct T_conn_req) + 2204 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2205 cr); 2206 if (mp == NULL) { 2207 /* 2208 * Connect can not fail with ENOBUFS. A signal was 2209 * caught so return EINTR. 2210 */ 2211 error = EINTR; 2212 eprintsoline(so, error); 2213 return (error); 2214 } 2215 2216 mutex_enter(&so->so_lock); 2217 /* 2218 * Make sure there is a preallocated T_unbind_req message 2219 * before any binding. This message is allocated when the 2220 * socket is created. Since another thread can consume 2221 * so_unbind_mp by the time we return from so_lock_single(), 2222 * we should check the availability of so_unbind_mp after 2223 * we return from so_lock_single(). 2224 */ 2225 2226 so_lock_single(so); /* Set SOLOCKED */ 2227 need_unlock = B_TRUE; 2228 2229 if (sti->sti_unbind_mp == NULL) { 2230 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2231 /* NOTE: holding so_lock while sleeping */ 2232 sti->sti_unbind_mp = 2233 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2234 if (sti->sti_unbind_mp == NULL) { 2235 error = EINTR; 2236 goto done; 2237 } 2238 } 2239 2240 /* 2241 * Can't have done a listen before connecting. 2242 */ 2243 if (so->so_state & SS_ACCEPTCONN) { 2244 error = EOPNOTSUPP; 2245 goto done; 2246 } 2247 2248 /* 2249 * Must be bound with the transport 2250 */ 2251 if (!(so->so_state & SS_ISBOUND)) { 2252 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2253 /*CONSTCOND*/ 2254 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2255 /* 2256 * Optimization for AF_INET{,6} transports 2257 * that can handle a T_CONN_REQ without being bound. 2258 */ 2259 so_automatic_bind(so); 2260 } else { 2261 error = sotpi_bind(so, NULL, 0, 2262 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2263 if (error) 2264 goto done; 2265 } 2266 ASSERT(so->so_state & SS_ISBOUND); 2267 flags |= _SOCONNECT_DID_BIND; 2268 } 2269 2270 /* 2271 * Handle a connect to a name parameter of type AF_UNSPEC like a 2272 * connect to a null address. This is the portable method to 2273 * unconnect a socket. 2274 */ 2275 if ((namelen >= sizeof (sa_family_t)) && 2276 (name->sa_family == AF_UNSPEC)) { 2277 name = NULL; 2278 namelen = 0; 2279 } 2280 2281 /* 2282 * Check that we are not already connected. 2283 * A connection-oriented socket cannot be reconnected. 2284 * A connected connection-less socket can be 2285 * - connected to a different address by a subsequent connect 2286 * - "unconnected" by a connect to the NULL address 2287 */ 2288 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2289 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2290 if (so->so_mode & SM_CONNREQUIRED) { 2291 /* Connection-oriented socket */ 2292 error = so->so_state & SS_ISCONNECTED ? 2293 EISCONN : EALREADY; 2294 goto done; 2295 } 2296 /* Connection-less socket */ 2297 if (name == NULL) { 2298 /* 2299 * Remove the connected state and clear SO_DGRAM_ERRIND 2300 * since it was set when the socket was connected. 2301 * If this is UDP also send down a T_DISCON_REQ. 2302 */ 2303 int val; 2304 2305 if ((so->so_family == AF_INET || 2306 so->so_family == AF_INET6) && 2307 (so->so_type == SOCK_DGRAM || 2308 so->so_type == SOCK_RAW) && 2309 /*CONSTCOND*/ 2310 !soconnect_tpi_udp) { 2311 /* XXX What about implicitly unbinding here? */ 2312 error = sodisconnect(so, -1, 2313 _SODISCONNECT_LOCK_HELD); 2314 } else { 2315 so->so_state &= 2316 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2317 sti->sti_faddr_valid = 0; 2318 sti->sti_faddr_len = 0; 2319 } 2320 2321 /* Remove SOLOCKED since setsockopt will grab it */ 2322 so_unlock_single(so, SOLOCKED); 2323 mutex_exit(&so->so_lock); 2324 2325 val = 0; 2326 (void) sotpi_setsockopt(so, SOL_SOCKET, 2327 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2328 cr); 2329 2330 mutex_enter(&so->so_lock); 2331 so_lock_single(so); /* Set SOLOCKED */ 2332 goto done; 2333 } 2334 } 2335 ASSERT(so->so_state & SS_ISBOUND); 2336 2337 if (name == NULL || namelen == 0) { 2338 error = EINVAL; 2339 goto done; 2340 } 2341 /* 2342 * Mark the socket if sti_faddr_sa represents the transport level 2343 * address. 2344 */ 2345 if (flags & _SOCONNECT_NOXLATE) { 2346 struct sockaddr_ux *soaddr_ux; 2347 2348 ASSERT(so->so_family == AF_UNIX); 2349 if (namelen != sizeof (struct sockaddr_ux)) { 2350 error = EINVAL; 2351 goto done; 2352 } 2353 soaddr_ux = (struct sockaddr_ux *)name; 2354 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2355 namelen = sizeof (soaddr_ux->sou_addr); 2356 sti->sti_faddr_noxlate = 1; 2357 } 2358 2359 /* 2360 * Length and family checks. 2361 */ 2362 error = so_addr_verify(so, name, namelen); 2363 if (error) 2364 goto bad; 2365 2366 /* 2367 * Save foreign address. Needed for AF_UNIX as well as 2368 * transport providers that do not support TI_GETPEERNAME. 2369 * Also used for cached foreign address for TCP and UDP. 2370 */ 2371 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2372 error = EINVAL; 2373 goto done; 2374 } 2375 sti->sti_faddr_len = (socklen_t)namelen; 2376 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2377 bcopy(name, sti->sti_faddr_sa, namelen); 2378 sti->sti_faddr_valid = 1; 2379 2380 if (so->so_family == AF_UNIX) { 2381 if (sti->sti_faddr_noxlate) { 2382 /* 2383 * Already have a transport internal address. Do not 2384 * pass any (transport internal) source address. 2385 */ 2386 addr = sti->sti_faddr_sa; 2387 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2388 src = NULL; 2389 srclen = 0; 2390 } else { 2391 /* 2392 * Pass the sockaddr_un source address as an option 2393 * and translate the remote address. 2394 * Holding so_lock thus sti_laddr_sa can not change. 2395 */ 2396 src = sti->sti_laddr_sa; 2397 srclen = (t_uscalar_t)sti->sti_laddr_len; 2398 dprintso(so, 1, 2399 ("sotpi_connect UNIX: srclen %d, src %p\n", 2400 srclen, src)); 2401 error = so_ux_addr_xlate(so, 2402 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2403 (flags & _SOCONNECT_XPG4_2), 2404 &addr, &addrlen); 2405 if (error) 2406 goto bad; 2407 } 2408 } else { 2409 addr = sti->sti_faddr_sa; 2410 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2411 src = NULL; 2412 srclen = 0; 2413 } 2414 /* 2415 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2416 * option which asks the transport provider to send T_UDERR_IND 2417 * messages. These T_UDERR_IND messages are used to return connected 2418 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2419 * 2420 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2421 * we send down a T_CONN_REQ. This is needed to let the 2422 * transport assign a local address that is consistent with 2423 * the remote address. Applications depend on a getsockname() 2424 * after a connect() to retrieve the "source" IP address for 2425 * the connected socket. Invalidate the cached local address 2426 * to force getsockname() to enquire of the transport. 2427 */ 2428 if (!(so->so_mode & SM_CONNREQUIRED)) { 2429 /* 2430 * Datagram socket. 2431 */ 2432 int32_t val; 2433 2434 so_unlock_single(so, SOLOCKED); 2435 mutex_exit(&so->so_lock); 2436 2437 val = 1; 2438 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2439 &val, (t_uscalar_t)sizeof (val), cr); 2440 2441 mutex_enter(&so->so_lock); 2442 so_lock_single(so); /* Set SOLOCKED */ 2443 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2444 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2445 soconnect_tpi_udp) { 2446 soisconnected(so); 2447 goto done; 2448 } 2449 /* 2450 * Send down T_CONN_REQ etc. 2451 * Clear fflag to avoid returning EWOULDBLOCK. 2452 */ 2453 fflag = 0; 2454 ASSERT(so->so_family != AF_UNIX); 2455 sti->sti_laddr_valid = 0; 2456 } else if (sti->sti_laddr_len != 0) { 2457 /* 2458 * If the local address or port was "any" then it may be 2459 * changed by the transport as a result of the 2460 * connect. Invalidate the cached version if we have one. 2461 */ 2462 switch (so->so_family) { 2463 case AF_INET: 2464 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2465 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2466 INADDR_ANY || 2467 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2468 sti->sti_laddr_valid = 0; 2469 break; 2470 2471 case AF_INET6: 2472 ASSERT(sti->sti_laddr_len == 2473 (socklen_t)sizeof (sin6_t)); 2474 if (IN6_IS_ADDR_UNSPECIFIED( 2475 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2476 IN6_IS_ADDR_V4MAPPED_ANY( 2477 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2478 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2479 sti->sti_laddr_valid = 0; 2480 break; 2481 2482 default: 2483 break; 2484 } 2485 } 2486 2487 /* 2488 * Check for failure of an earlier call 2489 */ 2490 if (so->so_error != 0) 2491 goto so_bad; 2492 2493 /* 2494 * Send down T_CONN_REQ. Message was allocated above. 2495 */ 2496 conn_req.PRIM_type = T_CONN_REQ; 2497 conn_req.DEST_length = addrlen; 2498 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2499 if (srclen == 0) { 2500 conn_req.OPT_length = 0; 2501 conn_req.OPT_offset = 0; 2502 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2503 soappendmsg(mp, addr, addrlen); 2504 } else { 2505 /* 2506 * There is a AF_UNIX sockaddr_un to include as a source 2507 * address option. 2508 */ 2509 struct T_opthdr toh; 2510 2511 toh.level = SOL_SOCKET; 2512 toh.name = SO_SRCADDR; 2513 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2514 toh.status = 0; 2515 conn_req.OPT_length = 2516 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2517 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2518 _TPI_ALIGN_TOPT(addrlen)); 2519 2520 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2521 soappendmsg(mp, addr, addrlen); 2522 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2523 soappendmsg(mp, &toh, sizeof (toh)); 2524 soappendmsg(mp, src, srclen); 2525 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2526 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2527 } 2528 /* 2529 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2530 * in order to have the right state when the T_CONN_CON shows up. 2531 */ 2532 soisconnecting(so); 2533 mutex_exit(&so->so_lock); 2534 2535 if (AU_AUDITING()) 2536 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2537 2538 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2539 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2540 mp = NULL; 2541 mutex_enter(&so->so_lock); 2542 if (error != 0) 2543 goto bad; 2544 2545 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2546 goto bad; 2547 2548 /* Allow other threads to access the socket */ 2549 so_unlock_single(so, SOLOCKED); 2550 need_unlock = B_FALSE; 2551 2552 /* 2553 * Wait until we get a T_CONN_CON or an error 2554 */ 2555 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2556 so_lock_single(so); /* Set SOLOCKED */ 2557 need_unlock = B_TRUE; 2558 } 2559 2560 done: 2561 freemsg(mp); 2562 switch (error) { 2563 case EINPROGRESS: 2564 case EALREADY: 2565 case EISCONN: 2566 case EINTR: 2567 /* Non-fatal errors */ 2568 sti->sti_laddr_valid = 0; 2569 /* FALLTHRU */ 2570 case 0: 2571 break; 2572 default: 2573 ASSERT(need_unlock); 2574 /* 2575 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2576 * and invalidate local-address cache 2577 */ 2578 so->so_state &= ~SS_ISCONNECTING; 2579 sti->sti_laddr_valid = 0; 2580 /* A discon_ind might have already unbound us */ 2581 if ((flags & _SOCONNECT_DID_BIND) && 2582 (so->so_state & SS_ISBOUND)) { 2583 int err; 2584 2585 err = sotpi_unbind(so, 0); 2586 /* LINTED - statement has no conseq */ 2587 if (err) { 2588 eprintsoline(so, err); 2589 } 2590 } 2591 break; 2592 } 2593 if (need_unlock) 2594 so_unlock_single(so, SOLOCKED); 2595 mutex_exit(&so->so_lock); 2596 return (error); 2597 2598 so_bad: error = sogeterr(so, B_TRUE); 2599 bad: eprintsoline(so, error); 2600 goto done; 2601 } 2602 2603 /* ARGSUSED */ 2604 int 2605 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2606 { 2607 struct T_ordrel_req ordrel_req; 2608 mblk_t *mp; 2609 uint_t old_state, state_change; 2610 int error = 0; 2611 sotpi_info_t *sti = SOTOTPI(so); 2612 2613 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2614 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2615 2616 mutex_enter(&so->so_lock); 2617 so_lock_single(so); /* Set SOLOCKED */ 2618 2619 /* 2620 * SunOS 4.X has no check for datagram sockets. 2621 * 5.X checks that it is connected (ENOTCONN) 2622 * X/Open requires that we check the connected state. 2623 */ 2624 if (!(so->so_state & SS_ISCONNECTED)) { 2625 if (!xnet_skip_checks) { 2626 error = ENOTCONN; 2627 if (xnet_check_print) { 2628 printf("sockfs: X/Open shutdown check " 2629 "caused ENOTCONN\n"); 2630 } 2631 } 2632 goto done; 2633 } 2634 /* 2635 * Record the current state and then perform any state changes. 2636 * Then use the difference between the old and new states to 2637 * determine which messages need to be sent. 2638 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2639 * duplicate calls to shutdown(). 2640 */ 2641 old_state = so->so_state; 2642 2643 switch (how) { 2644 case 0: 2645 socantrcvmore(so); 2646 break; 2647 case 1: 2648 socantsendmore(so); 2649 break; 2650 case 2: 2651 socantsendmore(so); 2652 socantrcvmore(so); 2653 break; 2654 default: 2655 error = EINVAL; 2656 goto done; 2657 } 2658 2659 /* 2660 * Assumes that the SS_CANT* flags are never cleared in the above code. 2661 */ 2662 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2663 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2664 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2665 2666 switch (state_change) { 2667 case 0: 2668 dprintso(so, 1, 2669 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2670 so->so_state)); 2671 goto done; 2672 2673 case SS_CANTRCVMORE: 2674 mutex_exit(&so->so_lock); 2675 strseteof(SOTOV(so), 1); 2676 /* 2677 * strseteof takes care of read side wakeups, 2678 * pollwakeups, and signals. 2679 */ 2680 /* 2681 * Get the read lock before flushing data to avoid problems 2682 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2683 */ 2684 mutex_enter(&so->so_lock); 2685 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2686 mutex_exit(&so->so_lock); 2687 2688 /* Flush read side queue */ 2689 strflushrq(SOTOV(so), FLUSHALL); 2690 2691 mutex_enter(&so->so_lock); 2692 so_unlock_read(so); /* Clear SOREADLOCKED */ 2693 break; 2694 2695 case SS_CANTSENDMORE: 2696 mutex_exit(&so->so_lock); 2697 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2698 mutex_enter(&so->so_lock); 2699 break; 2700 2701 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2702 mutex_exit(&so->so_lock); 2703 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2704 strseteof(SOTOV(so), 1); 2705 /* 2706 * strseteof takes care of read side wakeups, 2707 * pollwakeups, and signals. 2708 */ 2709 /* 2710 * Get the read lock before flushing data to avoid problems 2711 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2712 */ 2713 mutex_enter(&so->so_lock); 2714 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2715 mutex_exit(&so->so_lock); 2716 2717 /* Flush read side queue */ 2718 strflushrq(SOTOV(so), FLUSHALL); 2719 2720 mutex_enter(&so->so_lock); 2721 so_unlock_read(so); /* Clear SOREADLOCKED */ 2722 break; 2723 } 2724 2725 ASSERT(MUTEX_HELD(&so->so_lock)); 2726 2727 /* 2728 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2729 * was set due to this call and the new state has both of them set: 2730 * Send the AF_UNIX close indication 2731 * For T_COTS send a discon_ind 2732 * 2733 * If cantsend was set due to this call: 2734 * For T_COTSORD send an ordrel_ind 2735 * 2736 * Note that for T_CLTS there is no message sent here. 2737 */ 2738 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2739 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2740 /* 2741 * For SunOS 4.X compatibility we tell the other end 2742 * that we are unable to receive at this point. 2743 */ 2744 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2745 so_unix_close(so); 2746 2747 if (sti->sti_serv_type == T_COTS) 2748 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2749 } 2750 if ((state_change & SS_CANTSENDMORE) && 2751 (sti->sti_serv_type == T_COTS_ORD)) { 2752 /* Send an orderly release */ 2753 ordrel_req.PRIM_type = T_ORDREL_REQ; 2754 2755 mutex_exit(&so->so_lock); 2756 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2757 0, _ALLOC_SLEEP, cr); 2758 /* 2759 * Send down the T_ORDREL_REQ even if there is flow control. 2760 * This prevents shutdown from blocking. 2761 * Note that there is no T_OK_ACK for ordrel_req. 2762 */ 2763 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2764 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2765 mutex_enter(&so->so_lock); 2766 if (error) { 2767 eprintsoline(so, error); 2768 goto done; 2769 } 2770 } 2771 2772 done: 2773 so_unlock_single(so, SOLOCKED); 2774 mutex_exit(&so->so_lock); 2775 return (error); 2776 } 2777 2778 /* 2779 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2780 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2781 * that we have closed. 2782 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2783 * T_UNITDATA_REQ containing the same option. 2784 * 2785 * For SOCK_DGRAM half-connections (somebody connected to this end 2786 * but this end is not connect) we don't know where to send any 2787 * SO_UNIX_CLOSE. 2788 * 2789 * We have to ignore stream head errors just in case there has been 2790 * a shutdown(output). 2791 * Ignore any flow control to try to get the message more quickly to the peer. 2792 * While locally ignoring flow control solves the problem when there 2793 * is only the loopback transport on the stream it would not provide 2794 * the correct AF_UNIX socket semantics when one or more modules have 2795 * been pushed. 2796 */ 2797 void 2798 so_unix_close(struct sonode *so) 2799 { 2800 int error; 2801 struct T_opthdr toh; 2802 mblk_t *mp; 2803 sotpi_info_t *sti = SOTOTPI(so); 2804 2805 ASSERT(MUTEX_HELD(&so->so_lock)); 2806 2807 ASSERT(so->so_family == AF_UNIX); 2808 2809 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2810 (SS_ISCONNECTED|SS_ISBOUND)) 2811 return; 2812 2813 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2814 (void *)so, pr_state(so->so_state, so->so_mode))); 2815 2816 toh.level = SOL_SOCKET; 2817 toh.name = SO_UNIX_CLOSE; 2818 2819 /* zero length + header */ 2820 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2821 toh.status = 0; 2822 2823 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2824 struct T_optdata_req tdr; 2825 2826 tdr.PRIM_type = T_OPTDATA_REQ; 2827 tdr.DATA_flag = 0; 2828 2829 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2830 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2831 2832 /* NOTE: holding so_lock while sleeping */ 2833 mp = soallocproto2(&tdr, sizeof (tdr), 2834 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2835 } else { 2836 struct T_unitdata_req tudr; 2837 void *addr; 2838 socklen_t addrlen; 2839 void *src; 2840 socklen_t srclen; 2841 struct T_opthdr toh2; 2842 t_scalar_t size; 2843 2844 /* Connecteded DGRAM socket */ 2845 2846 /* 2847 * For AF_UNIX the destination address is translated to 2848 * an internal name and the source address is passed as 2849 * an option. 2850 */ 2851 /* 2852 * Length and family checks. 2853 */ 2854 error = so_addr_verify(so, sti->sti_faddr_sa, 2855 (t_uscalar_t)sti->sti_faddr_len); 2856 if (error) { 2857 eprintsoline(so, error); 2858 return; 2859 } 2860 if (sti->sti_faddr_noxlate) { 2861 /* 2862 * Already have a transport internal address. Do not 2863 * pass any (transport internal) source address. 2864 */ 2865 addr = sti->sti_faddr_sa; 2866 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2867 src = NULL; 2868 srclen = 0; 2869 } else { 2870 /* 2871 * Pass the sockaddr_un source address as an option 2872 * and translate the remote address. 2873 * Holding so_lock thus sti_laddr_sa can not change. 2874 */ 2875 src = sti->sti_laddr_sa; 2876 srclen = (socklen_t)sti->sti_laddr_len; 2877 dprintso(so, 1, 2878 ("so_ux_close: srclen %d, src %p\n", 2879 srclen, src)); 2880 error = so_ux_addr_xlate(so, 2881 sti->sti_faddr_sa, 2882 (socklen_t)sti->sti_faddr_len, 0, 2883 &addr, &addrlen); 2884 if (error) { 2885 eprintsoline(so, error); 2886 return; 2887 } 2888 } 2889 tudr.PRIM_type = T_UNITDATA_REQ; 2890 tudr.DEST_length = addrlen; 2891 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2892 if (srclen == 0) { 2893 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2894 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2895 _TPI_ALIGN_TOPT(addrlen)); 2896 2897 size = tudr.OPT_offset + tudr.OPT_length; 2898 /* NOTE: holding so_lock while sleeping */ 2899 mp = soallocproto2(&tudr, sizeof (tudr), 2900 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2901 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2902 soappendmsg(mp, &toh, sizeof (toh)); 2903 } else { 2904 /* 2905 * There is a AF_UNIX sockaddr_un to include as a 2906 * source address option. 2907 */ 2908 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2909 _TPI_ALIGN_TOPT(srclen)); 2910 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2911 _TPI_ALIGN_TOPT(addrlen)); 2912 2913 toh2.level = SOL_SOCKET; 2914 toh2.name = SO_SRCADDR; 2915 toh2.len = (t_uscalar_t)(srclen + 2916 sizeof (struct T_opthdr)); 2917 toh2.status = 0; 2918 2919 size = tudr.OPT_offset + tudr.OPT_length; 2920 2921 /* NOTE: holding so_lock while sleeping */ 2922 mp = soallocproto2(&tudr, sizeof (tudr), 2923 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2924 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2925 soappendmsg(mp, &toh, sizeof (toh)); 2926 soappendmsg(mp, &toh2, sizeof (toh2)); 2927 soappendmsg(mp, src, srclen); 2928 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2929 } 2930 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2931 } 2932 mutex_exit(&so->so_lock); 2933 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2934 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2935 mutex_enter(&so->so_lock); 2936 } 2937 2938 /* 2939 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2940 * In addition, the caller typically verifies that there is some 2941 * potential state to clear by checking 2942 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2943 * before calling this routine. 2944 * Note that such a check can be made without holding so_lock since 2945 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2946 * decrements sti_oobsigcnt. 2947 * 2948 * When data is read *after* the point that all pending 2949 * oob data has been consumed the oob indication is cleared. 2950 * 2951 * This logic keeps select/poll returning POLLRDBAND and 2952 * SIOCATMARK returning true until we have read past 2953 * the mark. 2954 */ 2955 static void 2956 sorecv_update_oobstate(struct sonode *so) 2957 { 2958 sotpi_info_t *sti = SOTOTPI(so); 2959 2960 mutex_enter(&so->so_lock); 2961 ASSERT(so_verify_oobstate(so)); 2962 dprintso(so, 1, 2963 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2964 sti->sti_oobsigcnt, 2965 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2966 if (sti->sti_oobsigcnt == 0) { 2967 /* No more pending oob indications */ 2968 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2969 freemsg(so->so_oobmsg); 2970 so->so_oobmsg = NULL; 2971 } 2972 ASSERT(so_verify_oobstate(so)); 2973 mutex_exit(&so->so_lock); 2974 } 2975 2976 /* 2977 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2978 */ 2979 static int 2980 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2981 { 2982 sotpi_info_t *sti = SOTOTPI(so); 2983 int error = 0; 2984 mblk_t *tmp = NULL; 2985 mblk_t *pmp = NULL; 2986 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 2987 2988 ASSERT(nmp != NULL); 2989 2990 while (nmp != NULL && uiop->uio_resid > 0) { 2991 ssize_t n; 2992 2993 if (DB_TYPE(nmp) == M_DATA) { 2994 /* 2995 * We have some data, uiomove up to resid bytes. 2996 */ 2997 n = MIN(MBLKL(nmp), uiop->uio_resid); 2998 if (n > 0) 2999 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3000 nmp->b_rptr += n; 3001 if (nmp->b_rptr == nmp->b_wptr) { 3002 pmp = nmp; 3003 nmp = nmp->b_cont; 3004 } 3005 if (error) 3006 break; 3007 } else { 3008 /* 3009 * We only handle data, save for caller to handle. 3010 */ 3011 if (pmp != NULL) { 3012 pmp->b_cont = nmp->b_cont; 3013 } 3014 nmp->b_cont = NULL; 3015 if (*rmp == NULL) { 3016 *rmp = nmp; 3017 } else { 3018 tmp->b_cont = nmp; 3019 } 3020 nmp = nmp->b_cont; 3021 tmp = nmp; 3022 } 3023 } 3024 if (pmp != NULL) { 3025 /* Free any mblk_t(s) which we have consumed */ 3026 pmp->b_cont = NULL; 3027 freemsg(sti->sti_nl7c_rcv_mp); 3028 } 3029 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3030 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3031 if (error == 0) { 3032 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3033 3034 error = p->r_v.r_v2; 3035 p->r_v.r_v2 = 0; 3036 } 3037 rp->r_vals = sti->sti_nl7c_rcv_rval; 3038 sti->sti_nl7c_rcv_rval = 0; 3039 } else { 3040 /* More mblk_t(s) to process so no rval to return */ 3041 rp->r_vals = 0; 3042 } 3043 return (error); 3044 } 3045 /* 3046 * Receive the next message on the queue. 3047 * If msg_controllen is non-zero when called the caller is interested in 3048 * any received control info (options). 3049 * If msg_namelen is non-zero when called the caller is interested in 3050 * any received source address. 3051 * The routine returns with msg_control and msg_name pointing to 3052 * kmem_alloc'ed memory which the caller has to free. 3053 */ 3054 /* ARGSUSED */ 3055 int 3056 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3057 struct cred *cr) 3058 { 3059 union T_primitives *tpr; 3060 mblk_t *mp; 3061 uchar_t pri; 3062 int pflag, opflag; 3063 void *control; 3064 t_uscalar_t controllen; 3065 t_uscalar_t namelen; 3066 int so_state = so->so_state; /* Snapshot */ 3067 ssize_t saved_resid; 3068 rval_t rval; 3069 int flags; 3070 clock_t timout; 3071 int error = 0; 3072 sotpi_info_t *sti = SOTOTPI(so); 3073 3074 flags = msg->msg_flags; 3075 msg->msg_flags = 0; 3076 3077 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3078 (void *)so, (void *)msg, flags, 3079 pr_state(so->so_state, so->so_mode), so->so_error)); 3080 3081 if (so->so_version == SOV_STREAM) { 3082 so_update_attrs(so, SOACC); 3083 /* The imaginary "sockmod" has been popped - act as a stream */ 3084 return (strread(SOTOV(so), uiop, cr)); 3085 } 3086 3087 /* 3088 * If we are not connected because we have never been connected 3089 * we return ENOTCONN. If we have been connected (but are no longer 3090 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3091 * the EOF. 3092 * 3093 * An alternative would be to post an ENOTCONN error in stream head 3094 * (read+write) and clear it when we're connected. However, that error 3095 * would cause incorrect poll/select behavior! 3096 */ 3097 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3098 (so->so_mode & SM_CONNREQUIRED)) { 3099 return (ENOTCONN); 3100 } 3101 3102 /* 3103 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3104 * after checking that the read queue is empty) and returns zero. 3105 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3106 * is zero. 3107 */ 3108 3109 if (flags & MSG_OOB) { 3110 /* Check that the transport supports OOB */ 3111 if (!(so->so_mode & SM_EXDATA)) 3112 return (EOPNOTSUPP); 3113 so_update_attrs(so, SOACC); 3114 return (sorecvoob(so, msg, uiop, flags, 3115 (so->so_options & SO_OOBINLINE))); 3116 } 3117 3118 so_update_attrs(so, SOACC); 3119 3120 /* 3121 * Set msg_controllen and msg_namelen to zero here to make it 3122 * simpler in the cases that no control or name is returned. 3123 */ 3124 controllen = msg->msg_controllen; 3125 namelen = msg->msg_namelen; 3126 msg->msg_controllen = 0; 3127 msg->msg_namelen = 0; 3128 3129 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3130 namelen, controllen)); 3131 3132 mutex_enter(&so->so_lock); 3133 /* 3134 * If an NL7C enabled socket and not waiting for write data. 3135 */ 3136 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3137 NL7C_ENABLED) { 3138 if (sti->sti_nl7c_uri) { 3139 /* Close uri processing for a previous request */ 3140 nl7c_close(so); 3141 } 3142 if ((so_state & SS_CANTRCVMORE) && 3143 sti->sti_nl7c_rcv_mp == NULL) { 3144 /* Nothing to process, EOF */ 3145 mutex_exit(&so->so_lock); 3146 return (0); 3147 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3148 /* Persistent NL7C socket, try to process request */ 3149 boolean_t ret; 3150 3151 ret = nl7c_process(so, 3152 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3153 rval.r_vals = sti->sti_nl7c_rcv_rval; 3154 error = rval.r_v.r_v2; 3155 if (error) { 3156 /* Error of some sort, return it */ 3157 mutex_exit(&so->so_lock); 3158 return (error); 3159 } 3160 if (sti->sti_nl7c_flags && 3161 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3162 /* 3163 * Still an NL7C socket and no data 3164 * to pass up to the caller. 3165 */ 3166 mutex_exit(&so->so_lock); 3167 if (ret) { 3168 /* EOF */ 3169 return (0); 3170 } else { 3171 /* Need more data */ 3172 return (EAGAIN); 3173 } 3174 } 3175 } else { 3176 /* 3177 * Not persistent so no further NL7C processing. 3178 */ 3179 sti->sti_nl7c_flags = 0; 3180 } 3181 } 3182 /* 3183 * Only one reader is allowed at any given time. This is needed 3184 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3185 * 3186 * This is slightly different that BSD behavior in that it fails with 3187 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3188 * is single-threaded using sblock(), which is dropped while waiting 3189 * for data to appear. The difference shows up e.g. if one 3190 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3191 * does use nonblocking io and different threads are reading each 3192 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3193 * in this case as long as the read queue doesn't get empty. 3194 * In this implementation the thread using nonblocking io can 3195 * get an EWOULDBLOCK error due to the blocking thread executing 3196 * e.g. in the uiomove in kstrgetmsg. 3197 * This difference is not believed to be significant. 3198 */ 3199 /* Set SOREADLOCKED */ 3200 error = so_lock_read_intr(so, 3201 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3202 mutex_exit(&so->so_lock); 3203 if (error) 3204 return (error); 3205 3206 /* 3207 * Tell kstrgetmsg to not inspect the stream head errors until all 3208 * queued data has been consumed. 3209 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3210 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3211 * 3212 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3213 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3214 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3215 */ 3216 pflag = MSG_ANY | MSG_DELAYERROR; 3217 if (flags & MSG_PEEK) { 3218 pflag |= MSG_IPEEK; 3219 flags &= ~MSG_WAITALL; 3220 } 3221 if (so->so_mode & SM_ATOMIC) 3222 pflag |= MSG_DISCARDTAIL; 3223 3224 if (flags & MSG_DONTWAIT) 3225 timout = 0; 3226 else 3227 timout = -1; 3228 opflag = pflag; 3229 retry: 3230 saved_resid = uiop->uio_resid; 3231 pri = 0; 3232 mp = NULL; 3233 if (sti->sti_nl7c_rcv_mp != NULL) { 3234 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3235 error = nl7c_sorecv(so, &mp, uiop, &rval); 3236 } else { 3237 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3238 timout, &rval); 3239 } 3240 if (error != 0) { 3241 /* kstrgetmsg returns ETIME when timeout expires */ 3242 if (error == ETIME) 3243 error = EWOULDBLOCK; 3244 goto out; 3245 } 3246 /* 3247 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3248 * For non-datagrams MOREDATA is used to set MSG_EOR. 3249 */ 3250 ASSERT(!(rval.r_val1 & MORECTL)); 3251 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3252 msg->msg_flags |= MSG_TRUNC; 3253 3254 if (mp == NULL) { 3255 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3256 /* 3257 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3258 * The draft Posix socket spec states that the mark should 3259 * not be cleared when peeking. We follow the latter. 3260 */ 3261 if ((so->so_state & 3262 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3263 (uiop->uio_resid != saved_resid) && 3264 !(flags & MSG_PEEK)) { 3265 sorecv_update_oobstate(so); 3266 } 3267 3268 mutex_enter(&so->so_lock); 3269 /* Set MSG_EOR based on MOREDATA */ 3270 if (!(rval.r_val1 & MOREDATA)) { 3271 if (so->so_state & SS_SAVEDEOR) { 3272 msg->msg_flags |= MSG_EOR; 3273 so->so_state &= ~SS_SAVEDEOR; 3274 } 3275 } 3276 /* 3277 * If some data was received (i.e. not EOF) and the 3278 * read/recv* has not been satisfied wait for some more. 3279 */ 3280 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3281 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3282 mutex_exit(&so->so_lock); 3283 pflag = opflag | MSG_NOMARK; 3284 goto retry; 3285 } 3286 goto out_locked; 3287 } 3288 3289 /* strsock_proto has already verified length and alignment */ 3290 tpr = (union T_primitives *)mp->b_rptr; 3291 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3292 3293 switch (tpr->type) { 3294 case T_DATA_IND: { 3295 if ((so->so_state & 3296 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3297 (uiop->uio_resid != saved_resid) && 3298 !(flags & MSG_PEEK)) { 3299 sorecv_update_oobstate(so); 3300 } 3301 3302 /* 3303 * Set msg_flags to MSG_EOR based on 3304 * MORE_flag and MOREDATA. 3305 */ 3306 mutex_enter(&so->so_lock); 3307 so->so_state &= ~SS_SAVEDEOR; 3308 if (!(tpr->data_ind.MORE_flag & 1)) { 3309 if (!(rval.r_val1 & MOREDATA)) 3310 msg->msg_flags |= MSG_EOR; 3311 else 3312 so->so_state |= SS_SAVEDEOR; 3313 } 3314 freemsg(mp); 3315 /* 3316 * If some data was received (i.e. not EOF) and the 3317 * read/recv* has not been satisfied wait for some more. 3318 */ 3319 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3320 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3321 mutex_exit(&so->so_lock); 3322 pflag = opflag | MSG_NOMARK; 3323 goto retry; 3324 } 3325 goto out_locked; 3326 } 3327 case T_UNITDATA_IND: { 3328 void *addr; 3329 t_uscalar_t addrlen; 3330 void *abuf; 3331 t_uscalar_t optlen; 3332 void *opt; 3333 3334 if ((so->so_state & 3335 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3336 (uiop->uio_resid != saved_resid) && 3337 !(flags & MSG_PEEK)) { 3338 sorecv_update_oobstate(so); 3339 } 3340 3341 if (namelen != 0) { 3342 /* Caller wants source address */ 3343 addrlen = tpr->unitdata_ind.SRC_length; 3344 addr = sogetoff(mp, 3345 tpr->unitdata_ind.SRC_offset, 3346 addrlen, 1); 3347 if (addr == NULL) { 3348 freemsg(mp); 3349 error = EPROTO; 3350 eprintsoline(so, error); 3351 goto out; 3352 } 3353 if (so->so_family == AF_UNIX) { 3354 /* 3355 * Can not use the transport level address. 3356 * If there is a SO_SRCADDR option carrying 3357 * the socket level address it will be 3358 * extracted below. 3359 */ 3360 addr = NULL; 3361 addrlen = 0; 3362 } 3363 } 3364 optlen = tpr->unitdata_ind.OPT_length; 3365 if (optlen != 0) { 3366 t_uscalar_t ncontrollen; 3367 3368 /* 3369 * Extract any source address option. 3370 * Determine how large cmsg buffer is needed. 3371 */ 3372 opt = sogetoff(mp, 3373 tpr->unitdata_ind.OPT_offset, 3374 optlen, __TPI_ALIGN_SIZE); 3375 3376 if (opt == NULL) { 3377 freemsg(mp); 3378 error = EPROTO; 3379 eprintsoline(so, error); 3380 goto out; 3381 } 3382 if (so->so_family == AF_UNIX) 3383 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3384 ncontrollen = so_cmsglen(mp, opt, optlen, 3385 !(flags & MSG_XPG4_2)); 3386 if (controllen != 0) 3387 controllen = ncontrollen; 3388 else if (ncontrollen != 0) 3389 msg->msg_flags |= MSG_CTRUNC; 3390 } else { 3391 controllen = 0; 3392 } 3393 3394 if (namelen != 0) { 3395 /* 3396 * Return address to caller. 3397 * Caller handles truncation if length 3398 * exceeds msg_namelen. 3399 * NOTE: AF_UNIX NUL termination is ensured by 3400 * the sender's copyin_name(). 3401 */ 3402 abuf = kmem_alloc(addrlen, KM_SLEEP); 3403 3404 bcopy(addr, abuf, addrlen); 3405 msg->msg_name = abuf; 3406 msg->msg_namelen = addrlen; 3407 } 3408 3409 if (controllen != 0) { 3410 /* 3411 * Return control msg to caller. 3412 * Caller handles truncation if length 3413 * exceeds msg_controllen. 3414 */ 3415 control = kmem_zalloc(controllen, KM_SLEEP); 3416 3417 error = so_opt2cmsg(mp, opt, optlen, 3418 !(flags & MSG_XPG4_2), 3419 control, controllen); 3420 if (error) { 3421 freemsg(mp); 3422 if (msg->msg_namelen != 0) 3423 kmem_free(msg->msg_name, 3424 msg->msg_namelen); 3425 kmem_free(control, controllen); 3426 eprintsoline(so, error); 3427 goto out; 3428 } 3429 msg->msg_control = control; 3430 msg->msg_controllen = controllen; 3431 } 3432 3433 freemsg(mp); 3434 goto out; 3435 } 3436 case T_OPTDATA_IND: { 3437 struct T_optdata_req *tdr; 3438 void *opt; 3439 t_uscalar_t optlen; 3440 3441 if ((so->so_state & 3442 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3443 (uiop->uio_resid != saved_resid) && 3444 !(flags & MSG_PEEK)) { 3445 sorecv_update_oobstate(so); 3446 } 3447 3448 tdr = (struct T_optdata_req *)mp->b_rptr; 3449 optlen = tdr->OPT_length; 3450 if (optlen != 0) { 3451 t_uscalar_t ncontrollen; 3452 /* 3453 * Determine how large cmsg buffer is needed. 3454 */ 3455 opt = sogetoff(mp, 3456 tpr->optdata_ind.OPT_offset, 3457 optlen, __TPI_ALIGN_SIZE); 3458 3459 if (opt == NULL) { 3460 freemsg(mp); 3461 error = EPROTO; 3462 eprintsoline(so, error); 3463 goto out; 3464 } 3465 3466 ncontrollen = so_cmsglen(mp, opt, optlen, 3467 !(flags & MSG_XPG4_2)); 3468 if (controllen != 0) 3469 controllen = ncontrollen; 3470 else if (ncontrollen != 0) 3471 msg->msg_flags |= MSG_CTRUNC; 3472 } else { 3473 controllen = 0; 3474 } 3475 3476 if (controllen != 0) { 3477 /* 3478 * Return control msg to caller. 3479 * Caller handles truncation if length 3480 * exceeds msg_controllen. 3481 */ 3482 control = kmem_zalloc(controllen, KM_SLEEP); 3483 3484 error = so_opt2cmsg(mp, opt, optlen, 3485 !(flags & MSG_XPG4_2), 3486 control, controllen); 3487 if (error) { 3488 freemsg(mp); 3489 kmem_free(control, controllen); 3490 eprintsoline(so, error); 3491 goto out; 3492 } 3493 msg->msg_control = control; 3494 msg->msg_controllen = controllen; 3495 } 3496 3497 /* 3498 * Set msg_flags to MSG_EOR based on 3499 * DATA_flag and MOREDATA. 3500 */ 3501 mutex_enter(&so->so_lock); 3502 so->so_state &= ~SS_SAVEDEOR; 3503 if (!(tpr->data_ind.MORE_flag & 1)) { 3504 if (!(rval.r_val1 & MOREDATA)) 3505 msg->msg_flags |= MSG_EOR; 3506 else 3507 so->so_state |= SS_SAVEDEOR; 3508 } 3509 freemsg(mp); 3510 /* 3511 * If some data was received (i.e. not EOF) and the 3512 * read/recv* has not been satisfied wait for some more. 3513 * Not possible to wait if control info was received. 3514 */ 3515 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3516 controllen == 0 && 3517 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3518 mutex_exit(&so->so_lock); 3519 pflag = opflag | MSG_NOMARK; 3520 goto retry; 3521 } 3522 goto out_locked; 3523 } 3524 case T_EXDATA_IND: { 3525 dprintso(so, 1, 3526 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3527 "state %s\n", 3528 sti->sti_oobsigcnt, sti->sti_oobcnt, 3529 saved_resid - uiop->uio_resid, 3530 pr_state(so->so_state, so->so_mode))); 3531 /* 3532 * kstrgetmsg handles MSGMARK so there is nothing to 3533 * inspect in the T_EXDATA_IND. 3534 * strsock_proto makes the stream head queue the T_EXDATA_IND 3535 * as a separate message with no M_DATA component. Furthermore, 3536 * the stream head does not consolidate M_DATA messages onto 3537 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3538 * remains a message by itself. This is needed since MSGMARK 3539 * marks both the whole message as well as the last byte 3540 * of the message. 3541 */ 3542 freemsg(mp); 3543 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3544 if (flags & MSG_PEEK) { 3545 /* 3546 * Even though we are peeking we consume the 3547 * T_EXDATA_IND thereby moving the mark information 3548 * to SS_RCVATMARK. Then the oob code below will 3549 * retry the peeking kstrgetmsg. 3550 * Note that the stream head read queue is 3551 * never flushed without holding SOREADLOCKED 3552 * thus the T_EXDATA_IND can not disappear 3553 * underneath us. 3554 */ 3555 dprintso(so, 1, 3556 ("sotpi_recvmsg: consume EXDATA_IND " 3557 "counts %d/%d state %s\n", 3558 sti->sti_oobsigcnt, 3559 sti->sti_oobcnt, 3560 pr_state(so->so_state, so->so_mode))); 3561 3562 pflag = MSG_ANY | MSG_DELAYERROR; 3563 if (so->so_mode & SM_ATOMIC) 3564 pflag |= MSG_DISCARDTAIL; 3565 3566 pri = 0; 3567 mp = NULL; 3568 3569 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3570 &pri, &pflag, (clock_t)-1, &rval); 3571 ASSERT(uiop->uio_resid == saved_resid); 3572 3573 if (error) { 3574 #ifdef SOCK_DEBUG 3575 if (error != EWOULDBLOCK && error != EINTR) { 3576 eprintsoline(so, error); 3577 } 3578 #endif /* SOCK_DEBUG */ 3579 goto out; 3580 } 3581 ASSERT(mp); 3582 tpr = (union T_primitives *)mp->b_rptr; 3583 ASSERT(tpr->type == T_EXDATA_IND); 3584 freemsg(mp); 3585 } /* end "if (flags & MSG_PEEK)" */ 3586 3587 /* 3588 * Decrement the number of queued and pending oob. 3589 * 3590 * SS_RCVATMARK is cleared when we read past a mark. 3591 * SS_HAVEOOBDATA is cleared when we've read past the 3592 * last mark. 3593 * SS_OOBPEND is cleared if we've read past the last 3594 * mark and no (new) SIGURG has been posted. 3595 */ 3596 mutex_enter(&so->so_lock); 3597 ASSERT(so_verify_oobstate(so)); 3598 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3599 ASSERT(sti->sti_oobsigcnt > 0); 3600 sti->sti_oobsigcnt--; 3601 ASSERT(sti->sti_oobcnt > 0); 3602 sti->sti_oobcnt--; 3603 /* 3604 * Since the T_EXDATA_IND has been removed from the stream 3605 * head, but we have not read data past the mark, 3606 * sockfs needs to track that the socket is still at the mark. 3607 * 3608 * Since no data was received call kstrgetmsg again to wait 3609 * for data. 3610 */ 3611 so->so_state |= SS_RCVATMARK; 3612 mutex_exit(&so->so_lock); 3613 dprintso(so, 1, 3614 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3615 sti->sti_oobsigcnt, sti->sti_oobcnt, 3616 pr_state(so->so_state, so->so_mode))); 3617 pflag = opflag; 3618 goto retry; 3619 } 3620 default: 3621 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3622 (void *)so, tpr->type, (void *)mp); 3623 ASSERT(0); 3624 freemsg(mp); 3625 error = EPROTO; 3626 eprintsoline(so, error); 3627 goto out; 3628 } 3629 /* NOTREACHED */ 3630 out: 3631 mutex_enter(&so->so_lock); 3632 out_locked: 3633 so_unlock_read(so); /* Clear SOREADLOCKED */ 3634 mutex_exit(&so->so_lock); 3635 return (error); 3636 } 3637 3638 /* 3639 * Sending data with options on a datagram socket. 3640 * Assumes caller has verified that SS_ISBOUND etc. are set. 3641 */ 3642 static int 3643 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3644 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3645 { 3646 struct T_unitdata_req tudr; 3647 mblk_t *mp; 3648 int error; 3649 void *addr; 3650 socklen_t addrlen; 3651 void *src; 3652 socklen_t srclen; 3653 ssize_t len; 3654 int size; 3655 struct T_opthdr toh; 3656 struct fdbuf *fdbuf; 3657 t_uscalar_t optlen; 3658 void *fds; 3659 int fdlen; 3660 sotpi_info_t *sti = SOTOTPI(so); 3661 3662 ASSERT(name && namelen); 3663 ASSERT(control && controllen); 3664 3665 len = uiop->uio_resid; 3666 if (len > (ssize_t)sti->sti_tidu_size) { 3667 return (EMSGSIZE); 3668 } 3669 3670 /* 3671 * For AF_UNIX the destination address is translated to an internal 3672 * name and the source address is passed as an option. 3673 * Also, file descriptors are passed as file pointers in an 3674 * option. 3675 */ 3676 3677 /* 3678 * Length and family checks. 3679 */ 3680 error = so_addr_verify(so, name, namelen); 3681 if (error) { 3682 eprintsoline(so, error); 3683 return (error); 3684 } 3685 if (so->so_family == AF_UNIX) { 3686 if (sti->sti_faddr_noxlate) { 3687 /* 3688 * Already have a transport internal address. Do not 3689 * pass any (transport internal) source address. 3690 */ 3691 addr = name; 3692 addrlen = namelen; 3693 src = NULL; 3694 srclen = 0; 3695 } else { 3696 /* 3697 * Pass the sockaddr_un source address as an option 3698 * and translate the remote address. 3699 * 3700 * Note that this code does not prevent sti_laddr_sa 3701 * from changing while it is being used. Thus 3702 * if an unbind+bind occurs concurrently with this 3703 * send the peer might see a partially new and a 3704 * partially old "from" address. 3705 */ 3706 src = sti->sti_laddr_sa; 3707 srclen = (t_uscalar_t)sti->sti_laddr_len; 3708 dprintso(so, 1, 3709 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3710 srclen, src)); 3711 error = so_ux_addr_xlate(so, name, namelen, 3712 (flags & MSG_XPG4_2), 3713 &addr, &addrlen); 3714 if (error) { 3715 eprintsoline(so, error); 3716 return (error); 3717 } 3718 } 3719 } else { 3720 addr = name; 3721 addrlen = namelen; 3722 src = NULL; 3723 srclen = 0; 3724 } 3725 optlen = so_optlen(control, controllen, 3726 !(flags & MSG_XPG4_2)); 3727 tudr.PRIM_type = T_UNITDATA_REQ; 3728 tudr.DEST_length = addrlen; 3729 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3730 if (srclen != 0) 3731 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3732 _TPI_ALIGN_TOPT(srclen)); 3733 else 3734 tudr.OPT_length = optlen; 3735 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3736 _TPI_ALIGN_TOPT(addrlen)); 3737 3738 size = tudr.OPT_offset + tudr.OPT_length; 3739 3740 /* 3741 * File descriptors only when SM_FDPASSING set. 3742 */ 3743 error = so_getfdopt(control, controllen, 3744 !(flags & MSG_XPG4_2), &fds, &fdlen); 3745 if (error) 3746 return (error); 3747 if (fdlen != -1) { 3748 if (!(so->so_mode & SM_FDPASSING)) 3749 return (EOPNOTSUPP); 3750 3751 error = fdbuf_create(fds, fdlen, &fdbuf); 3752 if (error) 3753 return (error); 3754 mp = fdbuf_allocmsg(size, fdbuf); 3755 } else { 3756 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3757 if (mp == NULL) { 3758 /* 3759 * Caught a signal waiting for memory. 3760 * Let send* return EINTR. 3761 */ 3762 return (EINTR); 3763 } 3764 } 3765 soappendmsg(mp, &tudr, sizeof (tudr)); 3766 soappendmsg(mp, addr, addrlen); 3767 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3768 3769 if (fdlen != -1) { 3770 ASSERT(fdbuf != NULL); 3771 toh.level = SOL_SOCKET; 3772 toh.name = SO_FILEP; 3773 toh.len = fdbuf->fd_size + 3774 (t_uscalar_t)sizeof (struct T_opthdr); 3775 toh.status = 0; 3776 soappendmsg(mp, &toh, sizeof (toh)); 3777 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3778 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3779 } 3780 if (srclen != 0) { 3781 /* 3782 * There is a AF_UNIX sockaddr_un to include as a source 3783 * address option. 3784 */ 3785 toh.level = SOL_SOCKET; 3786 toh.name = SO_SRCADDR; 3787 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3788 toh.status = 0; 3789 soappendmsg(mp, &toh, sizeof (toh)); 3790 soappendmsg(mp, src, srclen); 3791 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3792 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3793 } 3794 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3795 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3796 /* At most 3 bytes left in the message */ 3797 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3798 ASSERT(MBLKL(mp) <= (ssize_t)size); 3799 3800 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3801 if (AU_AUDITING()) 3802 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3803 3804 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3805 #ifdef SOCK_DEBUG 3806 if (error) { 3807 eprintsoline(so, error); 3808 } 3809 #endif /* SOCK_DEBUG */ 3810 return (error); 3811 } 3812 3813 /* 3814 * Sending data with options on a connected stream socket. 3815 * Assumes caller has verified that SS_ISCONNECTED is set. 3816 */ 3817 static int 3818 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3819 t_uscalar_t controllen, int flags) 3820 { 3821 struct T_optdata_req tdr; 3822 mblk_t *mp; 3823 int error; 3824 ssize_t iosize; 3825 int size; 3826 struct fdbuf *fdbuf; 3827 t_uscalar_t optlen; 3828 void *fds; 3829 int fdlen; 3830 struct T_opthdr toh; 3831 sotpi_info_t *sti = SOTOTPI(so); 3832 3833 dprintso(so, 1, 3834 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3835 3836 /* 3837 * Has to be bound and connected. However, since no locks are 3838 * held the state could have changed after sotpi_sendmsg checked it 3839 * thus it is not possible to ASSERT on the state. 3840 */ 3841 3842 /* Options on connection-oriented only when SM_OPTDATA set. */ 3843 if (!(so->so_mode & SM_OPTDATA)) 3844 return (EOPNOTSUPP); 3845 3846 do { 3847 /* 3848 * Set the MORE flag if uio_resid does not fit in this 3849 * message or if the caller passed in "more". 3850 * Error for transports with zero tidu_size. 3851 */ 3852 tdr.PRIM_type = T_OPTDATA_REQ; 3853 iosize = sti->sti_tidu_size; 3854 if (iosize <= 0) 3855 return (EMSGSIZE); 3856 if (uiop->uio_resid > iosize) { 3857 tdr.DATA_flag = 1; 3858 } else { 3859 if (more) 3860 tdr.DATA_flag = 1; 3861 else 3862 tdr.DATA_flag = 0; 3863 iosize = uiop->uio_resid; 3864 } 3865 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3866 tdr.DATA_flag, iosize)); 3867 3868 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3869 tdr.OPT_length = optlen; 3870 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3871 3872 size = (int)sizeof (tdr) + optlen; 3873 /* 3874 * File descriptors only when SM_FDPASSING set. 3875 */ 3876 error = so_getfdopt(control, controllen, 3877 !(flags & MSG_XPG4_2), &fds, &fdlen); 3878 if (error) 3879 return (error); 3880 if (fdlen != -1) { 3881 if (!(so->so_mode & SM_FDPASSING)) 3882 return (EOPNOTSUPP); 3883 3884 error = fdbuf_create(fds, fdlen, &fdbuf); 3885 if (error) 3886 return (error); 3887 mp = fdbuf_allocmsg(size, fdbuf); 3888 } else { 3889 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3890 if (mp == NULL) { 3891 /* 3892 * Caught a signal waiting for memory. 3893 * Let send* return EINTR. 3894 */ 3895 return (EINTR); 3896 } 3897 } 3898 soappendmsg(mp, &tdr, sizeof (tdr)); 3899 3900 if (fdlen != -1) { 3901 ASSERT(fdbuf != NULL); 3902 toh.level = SOL_SOCKET; 3903 toh.name = SO_FILEP; 3904 toh.len = fdbuf->fd_size + 3905 (t_uscalar_t)sizeof (struct T_opthdr); 3906 toh.status = 0; 3907 soappendmsg(mp, &toh, sizeof (toh)); 3908 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3909 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3910 } 3911 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3912 /* At most 3 bytes left in the message */ 3913 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3914 ASSERT(MBLKL(mp) <= (ssize_t)size); 3915 3916 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3917 3918 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3919 0, MSG_BAND, 0); 3920 if (error) { 3921 eprintsoline(so, error); 3922 return (error); 3923 } 3924 control = NULL; 3925 if (uiop->uio_resid > 0) { 3926 /* 3927 * Recheck for fatal errors. Fail write even though 3928 * some data have been written. This is consistent 3929 * with strwrite semantics and BSD sockets semantics. 3930 */ 3931 if (so->so_state & SS_CANTSENDMORE) { 3932 eprintsoline(so, error); 3933 return (EPIPE); 3934 } 3935 if (so->so_error != 0) { 3936 mutex_enter(&so->so_lock); 3937 error = sogeterr(so, B_TRUE); 3938 mutex_exit(&so->so_lock); 3939 if (error != 0) { 3940 eprintsoline(so, error); 3941 return (error); 3942 } 3943 } 3944 } 3945 } while (uiop->uio_resid > 0); 3946 return (0); 3947 } 3948 3949 /* 3950 * Sending data on a datagram socket. 3951 * Assumes caller has verified that SS_ISBOUND etc. are set. 3952 * 3953 * For AF_UNIX the destination address is translated to an internal 3954 * name and the source address is passed as an option. 3955 */ 3956 int 3957 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3958 struct uio *uiop, int flags) 3959 { 3960 struct T_unitdata_req tudr; 3961 mblk_t *mp; 3962 int error; 3963 void *addr; 3964 socklen_t addrlen; 3965 void *src; 3966 socklen_t srclen; 3967 ssize_t len; 3968 sotpi_info_t *sti = SOTOTPI(so); 3969 3970 ASSERT(name != NULL && namelen != 0); 3971 3972 len = uiop->uio_resid; 3973 if (len > sti->sti_tidu_size) { 3974 error = EMSGSIZE; 3975 goto done; 3976 } 3977 3978 /* Length and family checks */ 3979 error = so_addr_verify(so, name, namelen); 3980 if (error != 0) 3981 goto done; 3982 3983 if (sti->sti_direct) 3984 return (sodgram_direct(so, name, namelen, uiop, flags)); 3985 3986 if (so->so_family == AF_UNIX) { 3987 if (sti->sti_faddr_noxlate) { 3988 /* 3989 * Already have a transport internal address. Do not 3990 * pass any (transport internal) source address. 3991 */ 3992 addr = name; 3993 addrlen = namelen; 3994 src = NULL; 3995 srclen = 0; 3996 } else { 3997 /* 3998 * Pass the sockaddr_un source address as an option 3999 * and translate the remote address. 4000 * 4001 * Note that this code does not prevent sti_laddr_sa 4002 * from changing while it is being used. Thus 4003 * if an unbind+bind occurs concurrently with this 4004 * send the peer might see a partially new and a 4005 * partially old "from" address. 4006 */ 4007 src = sti->sti_laddr_sa; 4008 srclen = (socklen_t)sti->sti_laddr_len; 4009 dprintso(so, 1, 4010 ("sosend_dgram UNIX: srclen %d, src %p\n", 4011 srclen, src)); 4012 error = so_ux_addr_xlate(so, name, namelen, 4013 (flags & MSG_XPG4_2), 4014 &addr, &addrlen); 4015 if (error) { 4016 eprintsoline(so, error); 4017 goto done; 4018 } 4019 } 4020 } else { 4021 addr = name; 4022 addrlen = namelen; 4023 src = NULL; 4024 srclen = 0; 4025 } 4026 tudr.PRIM_type = T_UNITDATA_REQ; 4027 tudr.DEST_length = addrlen; 4028 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4029 if (srclen == 0) { 4030 tudr.OPT_length = 0; 4031 tudr.OPT_offset = 0; 4032 4033 mp = soallocproto2(&tudr, sizeof (tudr), 4034 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4035 if (mp == NULL) { 4036 /* 4037 * Caught a signal waiting for memory. 4038 * Let send* return EINTR. 4039 */ 4040 error = EINTR; 4041 goto done; 4042 } 4043 } else { 4044 /* 4045 * There is a AF_UNIX sockaddr_un to include as a source 4046 * address option. 4047 */ 4048 struct T_opthdr toh; 4049 ssize_t size; 4050 4051 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4052 _TPI_ALIGN_TOPT(srclen)); 4053 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4054 _TPI_ALIGN_TOPT(addrlen)); 4055 4056 toh.level = SOL_SOCKET; 4057 toh.name = SO_SRCADDR; 4058 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4059 toh.status = 0; 4060 4061 size = tudr.OPT_offset + tudr.OPT_length; 4062 mp = soallocproto2(&tudr, sizeof (tudr), 4063 addr, addrlen, size, _ALLOC_INTR, CRED()); 4064 if (mp == NULL) { 4065 /* 4066 * Caught a signal waiting for memory. 4067 * Let send* return EINTR. 4068 */ 4069 error = EINTR; 4070 goto done; 4071 } 4072 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4073 soappendmsg(mp, &toh, sizeof (toh)); 4074 soappendmsg(mp, src, srclen); 4075 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4076 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4077 } 4078 4079 if (AU_AUDITING()) 4080 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4081 4082 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4083 done: 4084 #ifdef SOCK_DEBUG 4085 if (error) { 4086 eprintsoline(so, error); 4087 } 4088 #endif /* SOCK_DEBUG */ 4089 return (error); 4090 } 4091 4092 /* 4093 * Sending data on a connected stream socket. 4094 * Assumes caller has verified that SS_ISCONNECTED is set. 4095 */ 4096 int 4097 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4098 int sflag) 4099 { 4100 struct T_data_req tdr; 4101 mblk_t *mp; 4102 int error; 4103 ssize_t iosize; 4104 sotpi_info_t *sti = SOTOTPI(so); 4105 4106 dprintso(so, 1, 4107 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4108 (void *)so, uiop->uio_resid, prim, sflag)); 4109 4110 /* 4111 * Has to be bound and connected. However, since no locks are 4112 * held the state could have changed after sotpi_sendmsg checked it 4113 * thus it is not possible to ASSERT on the state. 4114 */ 4115 4116 do { 4117 /* 4118 * Set the MORE flag if uio_resid does not fit in this 4119 * message or if the caller passed in "more". 4120 * Error for transports with zero tidu_size. 4121 */ 4122 tdr.PRIM_type = prim; 4123 iosize = sti->sti_tidu_size; 4124 if (iosize <= 0) 4125 return (EMSGSIZE); 4126 if (uiop->uio_resid > iosize) { 4127 tdr.MORE_flag = 1; 4128 } else { 4129 if (more) 4130 tdr.MORE_flag = 1; 4131 else 4132 tdr.MORE_flag = 0; 4133 iosize = uiop->uio_resid; 4134 } 4135 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4136 prim, tdr.MORE_flag, iosize)); 4137 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4138 if (mp == NULL) { 4139 /* 4140 * Caught a signal waiting for memory. 4141 * Let send* return EINTR. 4142 */ 4143 return (EINTR); 4144 } 4145 4146 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4147 0, sflag | MSG_BAND, 0); 4148 if (error) { 4149 eprintsoline(so, error); 4150 return (error); 4151 } 4152 if (uiop->uio_resid > 0) { 4153 /* 4154 * Recheck for fatal errors. Fail write even though 4155 * some data have been written. This is consistent 4156 * with strwrite semantics and BSD sockets semantics. 4157 */ 4158 if (so->so_state & SS_CANTSENDMORE) { 4159 eprintsoline(so, error); 4160 return (EPIPE); 4161 } 4162 if (so->so_error != 0) { 4163 mutex_enter(&so->so_lock); 4164 error = sogeterr(so, B_TRUE); 4165 mutex_exit(&so->so_lock); 4166 if (error != 0) { 4167 eprintsoline(so, error); 4168 return (error); 4169 } 4170 } 4171 } 4172 } while (uiop->uio_resid > 0); 4173 return (0); 4174 } 4175 4176 /* 4177 * Check the state for errors and call the appropriate send function. 4178 * 4179 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4180 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4181 * after sending the message. 4182 */ 4183 static int 4184 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4185 struct cred *cr) 4186 { 4187 int so_state; 4188 int so_mode; 4189 int error; 4190 struct sockaddr *name; 4191 t_uscalar_t namelen; 4192 int dontroute; 4193 int flags; 4194 sotpi_info_t *sti = SOTOTPI(so); 4195 4196 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4197 (void *)so, (void *)msg, msg->msg_flags, 4198 pr_state(so->so_state, so->so_mode), so->so_error)); 4199 4200 if (so->so_version == SOV_STREAM) { 4201 /* The imaginary "sockmod" has been popped - act as a stream */ 4202 so_update_attrs(so, SOMOD); 4203 return (strwrite(SOTOV(so), uiop, cr)); 4204 } 4205 4206 mutex_enter(&so->so_lock); 4207 so_state = so->so_state; 4208 4209 if (so_state & SS_CANTSENDMORE) { 4210 mutex_exit(&so->so_lock); 4211 return (EPIPE); 4212 } 4213 4214 if (so->so_error != 0) { 4215 error = sogeterr(so, B_TRUE); 4216 if (error != 0) { 4217 mutex_exit(&so->so_lock); 4218 return (error); 4219 } 4220 } 4221 4222 name = (struct sockaddr *)msg->msg_name; 4223 namelen = msg->msg_namelen; 4224 4225 so_mode = so->so_mode; 4226 4227 if (name == NULL) { 4228 if (!(so_state & SS_ISCONNECTED)) { 4229 mutex_exit(&so->so_lock); 4230 if (so_mode & SM_CONNREQUIRED) 4231 return (ENOTCONN); 4232 else 4233 return (EDESTADDRREQ); 4234 } 4235 if (so_mode & SM_CONNREQUIRED) { 4236 name = NULL; 4237 namelen = 0; 4238 } else { 4239 /* 4240 * Note that this code does not prevent sti_faddr_sa 4241 * from changing while it is being used. Thus 4242 * if an "unconnect"+connect occurs concurrently with 4243 * this send the datagram might be delivered to a 4244 * garbaled address. 4245 */ 4246 ASSERT(sti->sti_faddr_sa); 4247 name = sti->sti_faddr_sa; 4248 namelen = (t_uscalar_t)sti->sti_faddr_len; 4249 } 4250 } else { 4251 if (!(so_state & SS_ISCONNECTED) && 4252 (so_mode & SM_CONNREQUIRED)) { 4253 /* Required but not connected */ 4254 mutex_exit(&so->so_lock); 4255 return (ENOTCONN); 4256 } 4257 /* 4258 * Ignore the address on connection-oriented sockets. 4259 * Just like BSD this code does not generate an error for 4260 * TCP (a CONNREQUIRED socket) when sending to an address 4261 * passed in with sendto/sendmsg. Instead the data is 4262 * delivered on the connection as if no address had been 4263 * supplied. 4264 */ 4265 if ((so_state & SS_ISCONNECTED) && 4266 !(so_mode & SM_CONNREQUIRED)) { 4267 mutex_exit(&so->so_lock); 4268 return (EISCONN); 4269 } 4270 if (!(so_state & SS_ISBOUND)) { 4271 so_lock_single(so); /* Set SOLOCKED */ 4272 error = sotpi_bind(so, NULL, 0, 4273 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4274 so_unlock_single(so, SOLOCKED); 4275 if (error) { 4276 mutex_exit(&so->so_lock); 4277 eprintsoline(so, error); 4278 return (error); 4279 } 4280 } 4281 /* 4282 * Handle delayed datagram errors. These are only queued 4283 * when the application sets SO_DGRAM_ERRIND. 4284 * Return the error if we are sending to the address 4285 * that was returned in the last T_UDERROR_IND. 4286 * If sending to some other address discard the delayed 4287 * error indication. 4288 */ 4289 if (sti->sti_delayed_error) { 4290 struct T_uderror_ind *tudi; 4291 void *addr; 4292 t_uscalar_t addrlen; 4293 boolean_t match = B_FALSE; 4294 4295 ASSERT(sti->sti_eaddr_mp); 4296 error = sti->sti_delayed_error; 4297 sti->sti_delayed_error = 0; 4298 tudi = 4299 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4300 addrlen = tudi->DEST_length; 4301 addr = sogetoff(sti->sti_eaddr_mp, 4302 tudi->DEST_offset, addrlen, 1); 4303 ASSERT(addr); /* Checked by strsock_proto */ 4304 switch (so->so_family) { 4305 case AF_INET: { 4306 /* Compare just IP address and port */ 4307 sin_t *sin1 = (sin_t *)name; 4308 sin_t *sin2 = (sin_t *)addr; 4309 4310 if (addrlen == sizeof (sin_t) && 4311 namelen == addrlen && 4312 sin1->sin_port == sin2->sin_port && 4313 sin1->sin_addr.s_addr == 4314 sin2->sin_addr.s_addr) 4315 match = B_TRUE; 4316 break; 4317 } 4318 case AF_INET6: { 4319 /* Compare just IP address and port. Not flow */ 4320 sin6_t *sin1 = (sin6_t *)name; 4321 sin6_t *sin2 = (sin6_t *)addr; 4322 4323 if (addrlen == sizeof (sin6_t) && 4324 namelen == addrlen && 4325 sin1->sin6_port == sin2->sin6_port && 4326 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4327 &sin2->sin6_addr)) 4328 match = B_TRUE; 4329 break; 4330 } 4331 case AF_UNIX: 4332 default: 4333 if (namelen == addrlen && 4334 bcmp(name, addr, namelen) == 0) 4335 match = B_TRUE; 4336 } 4337 if (match) { 4338 freemsg(sti->sti_eaddr_mp); 4339 sti->sti_eaddr_mp = NULL; 4340 mutex_exit(&so->so_lock); 4341 #ifdef DEBUG 4342 dprintso(so, 0, 4343 ("sockfs delayed error %d for %s\n", 4344 error, 4345 pr_addr(so->so_family, name, namelen))); 4346 #endif /* DEBUG */ 4347 return (error); 4348 } 4349 freemsg(sti->sti_eaddr_mp); 4350 sti->sti_eaddr_mp = NULL; 4351 } 4352 } 4353 mutex_exit(&so->so_lock); 4354 4355 flags = msg->msg_flags; 4356 dontroute = 0; 4357 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4358 uint32_t val; 4359 4360 val = 1; 4361 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4362 &val, (t_uscalar_t)sizeof (val), cr); 4363 if (error) 4364 return (error); 4365 dontroute = 1; 4366 } 4367 4368 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4369 error = EOPNOTSUPP; 4370 goto done; 4371 } 4372 if (msg->msg_controllen != 0) { 4373 if (!(so_mode & SM_CONNREQUIRED)) { 4374 so_update_attrs(so, SOMOD); 4375 error = sosend_dgramcmsg(so, name, namelen, uiop, 4376 msg->msg_control, msg->msg_controllen, flags); 4377 } else { 4378 if (flags & MSG_OOB) { 4379 /* Can't generate T_EXDATA_REQ with options */ 4380 error = EOPNOTSUPP; 4381 goto done; 4382 } 4383 so_update_attrs(so, SOMOD); 4384 error = sosend_svccmsg(so, uiop, 4385 !(flags & MSG_EOR), 4386 msg->msg_control, msg->msg_controllen, 4387 flags); 4388 } 4389 goto done; 4390 } 4391 4392 so_update_attrs(so, SOMOD); 4393 if (!(so_mode & SM_CONNREQUIRED)) { 4394 /* 4395 * If there is no SO_DONTROUTE to turn off return immediately 4396 * from send_dgram. This can allow tail-call optimizations. 4397 */ 4398 if (!dontroute) { 4399 return (sosend_dgram(so, name, namelen, uiop, flags)); 4400 } 4401 error = sosend_dgram(so, name, namelen, uiop, flags); 4402 } else { 4403 t_scalar_t prim; 4404 int sflag; 4405 4406 /* Ignore msg_name in the connected state */ 4407 if (flags & MSG_OOB) { 4408 prim = T_EXDATA_REQ; 4409 /* 4410 * Send down T_EXDATA_REQ even if there is flow 4411 * control for data. 4412 */ 4413 sflag = MSG_IGNFLOW; 4414 } else { 4415 if (so_mode & SM_BYTESTREAM) { 4416 /* Byte stream transport - use write */ 4417 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4418 4419 /* Send M_DATA messages */ 4420 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4421 (error = nl7c_data(so, uiop)) >= 0) { 4422 /* NL7C consumed the data */ 4423 return (error); 4424 } 4425 /* 4426 * If there is no SO_DONTROUTE to turn off, 4427 * sti_direct is on, and there is no flow 4428 * control, we can take the fast path. 4429 */ 4430 if (!dontroute && sti->sti_direct != 0 && 4431 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4432 return (sostream_direct(so, uiop, 4433 NULL, cr)); 4434 } 4435 error = strwrite(SOTOV(so), uiop, cr); 4436 goto done; 4437 } 4438 prim = T_DATA_REQ; 4439 sflag = 0; 4440 } 4441 /* 4442 * If there is no SO_DONTROUTE to turn off return immediately 4443 * from sosend_svc. This can allow tail-call optimizations. 4444 */ 4445 if (!dontroute) 4446 return (sosend_svc(so, uiop, prim, 4447 !(flags & MSG_EOR), sflag)); 4448 error = sosend_svc(so, uiop, prim, 4449 !(flags & MSG_EOR), sflag); 4450 } 4451 ASSERT(dontroute); 4452 done: 4453 if (dontroute) { 4454 uint32_t val; 4455 4456 val = 0; 4457 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4458 &val, (t_uscalar_t)sizeof (val), cr); 4459 } 4460 return (error); 4461 } 4462 4463 /* 4464 * kstrwritemp() has very similar semantics as that of strwrite(). 4465 * The main difference is it obtains mblks from the caller and also 4466 * does not do any copy as done in strwrite() from user buffers to 4467 * kernel buffers. 4468 * 4469 * Currently, this routine is used by sendfile to send data allocated 4470 * within the kernel without any copying. This interface does not use the 4471 * synchronous stream interface as synch. stream interface implies 4472 * copying. 4473 */ 4474 int 4475 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4476 { 4477 struct stdata *stp; 4478 struct queue *wqp; 4479 mblk_t *newmp; 4480 char waitflag; 4481 int tempmode; 4482 int error = 0; 4483 int done = 0; 4484 struct sonode *so; 4485 boolean_t direct; 4486 4487 ASSERT(vp->v_stream); 4488 stp = vp->v_stream; 4489 4490 so = VTOSO(vp); 4491 direct = _SOTOTPI(so)->sti_direct; 4492 4493 /* 4494 * This is the sockfs direct fast path. canputnext() need 4495 * not be accurate so we don't grab the sd_lock here. If 4496 * we get flow-controlled, we grab sd_lock just before the 4497 * do..while loop below to emulate what strwrite() does. 4498 */ 4499 wqp = stp->sd_wrq; 4500 if (canputnext(wqp) && direct && 4501 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4502 return (sostream_direct(so, NULL, mp, CRED())); 4503 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4504 /* Fast check of flags before acquiring the lock */ 4505 mutex_enter(&stp->sd_lock); 4506 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4507 mutex_exit(&stp->sd_lock); 4508 if (error != 0) { 4509 if (!(stp->sd_flag & STPLEX) && 4510 (stp->sd_wput_opt & SW_SIGPIPE)) { 4511 error = EPIPE; 4512 } 4513 return (error); 4514 } 4515 } 4516 4517 waitflag = WRITEWAIT; 4518 if (stp->sd_flag & OLDNDELAY) 4519 tempmode = fmode & ~FNDELAY; 4520 else 4521 tempmode = fmode; 4522 4523 mutex_enter(&stp->sd_lock); 4524 do { 4525 if (canputnext(wqp)) { 4526 mutex_exit(&stp->sd_lock); 4527 if (stp->sd_wputdatafunc != NULL) { 4528 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4529 NULL, NULL, NULL); 4530 if (newmp == NULL) { 4531 /* The caller will free mp */ 4532 return (ECOMM); 4533 } 4534 mp = newmp; 4535 } 4536 putnext(wqp, mp); 4537 return (0); 4538 } 4539 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4540 &done); 4541 } while (error == 0 && !done); 4542 4543 mutex_exit(&stp->sd_lock); 4544 /* 4545 * EAGAIN tells the application to try again. ENOMEM 4546 * is returned only if the memory allocation size 4547 * exceeds the physical limits of the system. ENOMEM 4548 * can't be true here. 4549 */ 4550 if (error == ENOMEM) 4551 error = EAGAIN; 4552 return (error); 4553 } 4554 4555 /* ARGSUSED */ 4556 static int 4557 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4558 struct cred *cr, mblk_t **mpp) 4559 { 4560 int error; 4561 4562 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4563 return (EAFNOSUPPORT); 4564 4565 if (so->so_state & SS_CANTSENDMORE) 4566 return (EPIPE); 4567 4568 if (so->so_type != SOCK_STREAM) 4569 return (EOPNOTSUPP); 4570 4571 if ((so->so_state & SS_ISCONNECTED) == 0) 4572 return (ENOTCONN); 4573 4574 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4575 if (error == 0) 4576 *mpp = NULL; 4577 return (error); 4578 } 4579 4580 /* 4581 * Sending data on a datagram socket. 4582 * Assumes caller has verified that SS_ISBOUND etc. are set. 4583 */ 4584 /* ARGSUSED */ 4585 static int 4586 sodgram_direct(struct sonode *so, struct sockaddr *name, 4587 socklen_t namelen, struct uio *uiop, int flags) 4588 { 4589 struct T_unitdata_req tudr; 4590 mblk_t *mp = NULL; 4591 int error = 0; 4592 void *addr; 4593 socklen_t addrlen; 4594 ssize_t len; 4595 struct stdata *stp = SOTOV(so)->v_stream; 4596 int so_state; 4597 queue_t *udp_wq; 4598 boolean_t connected; 4599 mblk_t *mpdata = NULL; 4600 sotpi_info_t *sti = SOTOTPI(so); 4601 uint32_t auditing = AU_AUDITING(); 4602 4603 ASSERT(name != NULL && namelen != 0); 4604 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4605 ASSERT(!(so->so_mode & SM_EXDATA)); 4606 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4607 ASSERT(SOTOV(so)->v_type == VSOCK); 4608 4609 /* Caller checked for proper length */ 4610 len = uiop->uio_resid; 4611 ASSERT(len <= sti->sti_tidu_size); 4612 4613 /* Length and family checks have been done by caller */ 4614 ASSERT(name->sa_family == so->so_family); 4615 ASSERT(so->so_family == AF_INET || 4616 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4617 ASSERT(so->so_family == AF_INET6 || 4618 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4619 4620 addr = name; 4621 addrlen = namelen; 4622 4623 if (stp->sd_sidp != NULL && 4624 (error = straccess(stp, JCWRITE)) != 0) 4625 goto done; 4626 4627 so_state = so->so_state; 4628 4629 connected = so_state & SS_ISCONNECTED; 4630 if (!connected) { 4631 tudr.PRIM_type = T_UNITDATA_REQ; 4632 tudr.DEST_length = addrlen; 4633 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4634 tudr.OPT_length = 0; 4635 tudr.OPT_offset = 0; 4636 4637 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4638 _ALLOC_INTR, CRED()); 4639 if (mp == NULL) { 4640 /* 4641 * Caught a signal waiting for memory. 4642 * Let send* return EINTR. 4643 */ 4644 error = EINTR; 4645 goto done; 4646 } 4647 } 4648 4649 /* 4650 * For UDP we don't break up the copyin into smaller pieces 4651 * as in the TCP case. That means if ENOMEM is returned by 4652 * mcopyinuio() then the uio vector has not been modified at 4653 * all and we fallback to either strwrite() or kstrputmsg() 4654 * below. Note also that we never generate priority messages 4655 * from here. 4656 */ 4657 udp_wq = stp->sd_wrq->q_next; 4658 if (canput(udp_wq) && 4659 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4660 ASSERT(DB_TYPE(mpdata) == M_DATA); 4661 ASSERT(uiop->uio_resid == 0); 4662 if (!connected) 4663 linkb(mp, mpdata); 4664 else 4665 mp = mpdata; 4666 if (auditing) 4667 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4668 4669 udp_wput(udp_wq, mp); 4670 return (0); 4671 } 4672 4673 ASSERT(mpdata == NULL); 4674 if (error != 0 && error != ENOMEM) { 4675 freemsg(mp); 4676 return (error); 4677 } 4678 4679 /* 4680 * For connected, let strwrite() handle the blocking case. 4681 * Otherwise we fall thru and use kstrputmsg(). 4682 */ 4683 if (connected) 4684 return (strwrite(SOTOV(so), uiop, CRED())); 4685 4686 if (auditing) 4687 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4688 4689 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4690 done: 4691 #ifdef SOCK_DEBUG 4692 if (error != 0) { 4693 eprintsoline(so, error); 4694 } 4695 #endif /* SOCK_DEBUG */ 4696 return (error); 4697 } 4698 4699 int 4700 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4701 { 4702 struct stdata *stp = SOTOV(so)->v_stream; 4703 ssize_t iosize, rmax, maxblk; 4704 queue_t *tcp_wq = stp->sd_wrq->q_next; 4705 mblk_t *newmp; 4706 int error = 0, wflag = 0; 4707 4708 ASSERT(so->so_mode & SM_BYTESTREAM); 4709 ASSERT(SOTOV(so)->v_type == VSOCK); 4710 4711 if (stp->sd_sidp != NULL && 4712 (error = straccess(stp, JCWRITE)) != 0) 4713 return (error); 4714 4715 if (uiop == NULL) { 4716 /* 4717 * kstrwritemp() should have checked sd_flag and 4718 * flow-control before coming here. If we end up 4719 * here it means that we can simply pass down the 4720 * data to tcp. 4721 */ 4722 ASSERT(mp != NULL); 4723 if (stp->sd_wputdatafunc != NULL) { 4724 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4725 NULL, NULL, NULL); 4726 if (newmp == NULL) { 4727 /* The caller will free mp */ 4728 return (ECOMM); 4729 } 4730 mp = newmp; 4731 } 4732 tcp_wput(tcp_wq, mp); 4733 return (0); 4734 } 4735 4736 /* Fallback to strwrite() to do proper error handling */ 4737 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4738 return (strwrite(SOTOV(so), uiop, cr)); 4739 4740 rmax = stp->sd_qn_maxpsz; 4741 ASSERT(rmax >= 0 || rmax == INFPSZ); 4742 if (rmax == 0 || uiop->uio_resid <= 0) 4743 return (0); 4744 4745 if (rmax == INFPSZ) 4746 rmax = uiop->uio_resid; 4747 4748 maxblk = stp->sd_maxblk; 4749 4750 for (;;) { 4751 iosize = MIN(uiop->uio_resid, rmax); 4752 4753 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4754 if (mp == NULL) { 4755 /* 4756 * Fallback to strwrite() for ENOMEM; if this 4757 * is our first time in this routine and the uio 4758 * vector has not been modified, we will end up 4759 * calling strwrite() without any flag set. 4760 */ 4761 if (error == ENOMEM) 4762 goto slow_send; 4763 else 4764 return (error); 4765 } 4766 ASSERT(uiop->uio_resid >= 0); 4767 /* 4768 * If mp is non-NULL and ENOMEM is set, it means that 4769 * mcopyinuio() was able to break down some of the user 4770 * data into one or more mblks. Send the partial data 4771 * to tcp and let the rest be handled in strwrite(). 4772 */ 4773 ASSERT(error == 0 || error == ENOMEM); 4774 if (stp->sd_wputdatafunc != NULL) { 4775 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4776 NULL, NULL, NULL); 4777 if (newmp == NULL) { 4778 /* The caller will free mp */ 4779 return (ECOMM); 4780 } 4781 mp = newmp; 4782 } 4783 tcp_wput(tcp_wq, mp); 4784 4785 wflag |= NOINTR; 4786 4787 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4788 ASSERT(error == 0); 4789 break; 4790 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4791 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4792 slow_send: 4793 /* 4794 * We were able to send down partial data using 4795 * the direct call interface, but are now relying 4796 * on strwrite() to handle the non-fastpath cases. 4797 * If the socket is blocking we will sleep in 4798 * strwaitq() until write is permitted, otherwise, 4799 * we will need to return the amount of bytes 4800 * written so far back to the app. This is the 4801 * reason why we pass NOINTR flag to strwrite() 4802 * for non-blocking socket, because we don't want 4803 * to return EAGAIN when portion of the user data 4804 * has actually been sent down. 4805 */ 4806 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4807 } 4808 } 4809 return (0); 4810 } 4811 4812 /* 4813 * Update sti_faddr by asking the transport (unless AF_UNIX). 4814 */ 4815 /* ARGSUSED */ 4816 int 4817 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4818 boolean_t accept, struct cred *cr) 4819 { 4820 struct strbuf strbuf; 4821 int error = 0, res; 4822 void *addr; 4823 t_uscalar_t addrlen; 4824 k_sigset_t smask; 4825 sotpi_info_t *sti = SOTOTPI(so); 4826 4827 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4828 (void *)so, pr_state(so->so_state, so->so_mode))); 4829 4830 ASSERT(*namelen > 0); 4831 mutex_enter(&so->so_lock); 4832 so_lock_single(so); /* Set SOLOCKED */ 4833 4834 if (accept) { 4835 bcopy(sti->sti_faddr_sa, name, 4836 MIN(*namelen, sti->sti_faddr_len)); 4837 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4838 goto done; 4839 } 4840 4841 if (!(so->so_state & SS_ISCONNECTED)) { 4842 error = ENOTCONN; 4843 goto done; 4844 } 4845 /* Added this check for X/Open */ 4846 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4847 error = EINVAL; 4848 if (xnet_check_print) { 4849 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4850 } 4851 goto done; 4852 } 4853 4854 if (sti->sti_faddr_valid) { 4855 bcopy(sti->sti_faddr_sa, name, 4856 MIN(*namelen, sti->sti_faddr_len)); 4857 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4858 goto done; 4859 } 4860 4861 #ifdef DEBUG 4862 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4863 pr_addr(so->so_family, sti->sti_faddr_sa, 4864 (t_uscalar_t)sti->sti_faddr_len))); 4865 #endif /* DEBUG */ 4866 4867 if (so->so_family == AF_UNIX) { 4868 /* Transport has different name space - return local info */ 4869 if (sti->sti_faddr_noxlate) 4870 *namelen = 0; 4871 error = 0; 4872 goto done; 4873 } 4874 4875 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4876 4877 ASSERT(sti->sti_faddr_sa); 4878 /* Allocate local buffer to use with ioctl */ 4879 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4880 mutex_exit(&so->so_lock); 4881 addr = kmem_alloc(addrlen, KM_SLEEP); 4882 4883 /* 4884 * Issue TI_GETPEERNAME with signals masked. 4885 * Put the result in sti_faddr_sa so that getpeername works after 4886 * a shutdown(output). 4887 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4888 * back to the socket. 4889 */ 4890 strbuf.buf = addr; 4891 strbuf.maxlen = addrlen; 4892 strbuf.len = 0; 4893 4894 sigintr(&smask, 0); 4895 res = 0; 4896 ASSERT(cr); 4897 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4898 0, K_TO_K, cr, &res); 4899 sigunintr(&smask); 4900 4901 mutex_enter(&so->so_lock); 4902 /* 4903 * If there is an error record the error in so_error put don't fail 4904 * the getpeername. Instead fallback on the recorded 4905 * sti->sti_faddr_sa. 4906 */ 4907 if (error) { 4908 /* 4909 * Various stream head errors can be returned to the ioctl. 4910 * However, it is impossible to determine which ones of 4911 * these are really socket level errors that were incorrectly 4912 * consumed by the ioctl. Thus this code silently ignores the 4913 * error - to code explicitly does not reinstate the error 4914 * using soseterror(). 4915 * Experiments have shows that at least this set of 4916 * errors are reported and should not be reinstated on the 4917 * socket: 4918 * EINVAL E.g. if an I_LINK was in effect when 4919 * getpeername was called. 4920 * EPIPE The ioctl error semantics prefer the write 4921 * side error over the read side error. 4922 * ENOTCONN The transport just got disconnected but 4923 * sockfs had not yet seen the T_DISCON_IND 4924 * when issuing the ioctl. 4925 */ 4926 error = 0; 4927 } else if (res == 0 && strbuf.len > 0 && 4928 (so->so_state & SS_ISCONNECTED)) { 4929 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4930 sti->sti_faddr_len = (socklen_t)strbuf.len; 4931 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4932 sti->sti_faddr_valid = 1; 4933 4934 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4935 *namelen = sti->sti_faddr_len; 4936 } 4937 kmem_free(addr, addrlen); 4938 #ifdef DEBUG 4939 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4940 pr_addr(so->so_family, sti->sti_faddr_sa, 4941 (t_uscalar_t)sti->sti_faddr_len))); 4942 #endif /* DEBUG */ 4943 done: 4944 so_unlock_single(so, SOLOCKED); 4945 mutex_exit(&so->so_lock); 4946 return (error); 4947 } 4948 4949 /* 4950 * Update sti_laddr by asking the transport (unless AF_UNIX). 4951 */ 4952 int 4953 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4954 struct cred *cr) 4955 { 4956 struct strbuf strbuf; 4957 int error = 0, res; 4958 void *addr; 4959 t_uscalar_t addrlen; 4960 k_sigset_t smask; 4961 sotpi_info_t *sti = SOTOTPI(so); 4962 4963 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4964 (void *)so, pr_state(so->so_state, so->so_mode))); 4965 4966 ASSERT(*namelen > 0); 4967 mutex_enter(&so->so_lock); 4968 so_lock_single(so); /* Set SOLOCKED */ 4969 4970 #ifdef DEBUG 4971 4972 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4973 pr_addr(so->so_family, sti->sti_laddr_sa, 4974 (t_uscalar_t)sti->sti_laddr_len))); 4975 #endif /* DEBUG */ 4976 if (sti->sti_laddr_valid) { 4977 bcopy(sti->sti_laddr_sa, name, 4978 MIN(*namelen, sti->sti_laddr_len)); 4979 *namelen = sti->sti_laddr_len; 4980 goto done; 4981 } 4982 4983 if (so->so_family == AF_UNIX) { 4984 /* 4985 * Transport has different name space - return local info. If we 4986 * have enough space, let consumers know the family. 4987 */ 4988 if (*namelen >= sizeof (sa_family_t)) { 4989 name->sa_family = AF_UNIX; 4990 *namelen = sizeof (sa_family_t); 4991 } else { 4992 *namelen = 0; 4993 } 4994 error = 0; 4995 goto done; 4996 } 4997 if (!(so->so_state & SS_ISBOUND)) { 4998 /* If not bound, then nothing to return. */ 4999 error = 0; 5000 goto done; 5001 } 5002 5003 /* Allocate local buffer to use with ioctl */ 5004 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5005 mutex_exit(&so->so_lock); 5006 addr = kmem_alloc(addrlen, KM_SLEEP); 5007 5008 /* 5009 * Issue TI_GETMYNAME with signals masked. 5010 * Put the result in sti_laddr_sa so that getsockname works after 5011 * a shutdown(output). 5012 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5013 * back to the socket. 5014 */ 5015 strbuf.buf = addr; 5016 strbuf.maxlen = addrlen; 5017 strbuf.len = 0; 5018 5019 sigintr(&smask, 0); 5020 res = 0; 5021 ASSERT(cr); 5022 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5023 0, K_TO_K, cr, &res); 5024 sigunintr(&smask); 5025 5026 mutex_enter(&so->so_lock); 5027 /* 5028 * If there is an error record the error in so_error put don't fail 5029 * the getsockname. Instead fallback on the recorded 5030 * sti->sti_laddr_sa. 5031 */ 5032 if (error) { 5033 /* 5034 * Various stream head errors can be returned to the ioctl. 5035 * However, it is impossible to determine which ones of 5036 * these are really socket level errors that were incorrectly 5037 * consumed by the ioctl. Thus this code silently ignores the 5038 * error - to code explicitly does not reinstate the error 5039 * using soseterror(). 5040 * Experiments have shows that at least this set of 5041 * errors are reported and should not be reinstated on the 5042 * socket: 5043 * EINVAL E.g. if an I_LINK was in effect when 5044 * getsockname was called. 5045 * EPIPE The ioctl error semantics prefer the write 5046 * side error over the read side error. 5047 */ 5048 error = 0; 5049 } else if (res == 0 && strbuf.len > 0 && 5050 (so->so_state & SS_ISBOUND)) { 5051 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5052 sti->sti_laddr_len = (socklen_t)strbuf.len; 5053 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5054 sti->sti_laddr_valid = 1; 5055 5056 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5057 *namelen = sti->sti_laddr_len; 5058 } 5059 kmem_free(addr, addrlen); 5060 #ifdef DEBUG 5061 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5062 pr_addr(so->so_family, sti->sti_laddr_sa, 5063 (t_uscalar_t)sti->sti_laddr_len))); 5064 #endif /* DEBUG */ 5065 done: 5066 so_unlock_single(so, SOLOCKED); 5067 mutex_exit(&so->so_lock); 5068 return (error); 5069 } 5070 5071 /* 5072 * Get socket options. For SOL_SOCKET options some options are handled 5073 * by the sockfs while others use the value recorded in the sonode as a 5074 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5075 * 5076 * On the return most *optlenp bytes are copied to optval. 5077 */ 5078 /* ARGSUSED */ 5079 int 5080 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5081 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5082 { 5083 struct T_optmgmt_req optmgmt_req; 5084 struct T_optmgmt_ack *optmgmt_ack; 5085 struct opthdr oh; 5086 struct opthdr *opt_res; 5087 mblk_t *mp = NULL; 5088 int error = 0; 5089 void *option = NULL; /* Set if fallback value */ 5090 t_uscalar_t maxlen = *optlenp; 5091 t_uscalar_t len; 5092 uint32_t value; 5093 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5094 struct timeval32 tmo_val32; 5095 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5096 5097 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5098 (void *)so, level, option_name, optval, (void *)optlenp, 5099 pr_state(so->so_state, so->so_mode))); 5100 5101 mutex_enter(&so->so_lock); 5102 so_lock_single(so); /* Set SOLOCKED */ 5103 5104 /* 5105 * Check for SOL_SOCKET options. 5106 * Certain SOL_SOCKET options are returned directly whereas 5107 * others only provide a default (fallback) value should 5108 * the T_SVR4_OPTMGMT_REQ fail. 5109 */ 5110 if (level == SOL_SOCKET) { 5111 /* Check parameters */ 5112 switch (option_name) { 5113 case SO_TYPE: 5114 case SO_ERROR: 5115 case SO_DEBUG: 5116 case SO_ACCEPTCONN: 5117 case SO_REUSEADDR: 5118 case SO_KEEPALIVE: 5119 case SO_DONTROUTE: 5120 case SO_BROADCAST: 5121 case SO_USELOOPBACK: 5122 case SO_OOBINLINE: 5123 case SO_SNDBUF: 5124 case SO_RCVBUF: 5125 #ifdef notyet 5126 case SO_SNDLOWAT: 5127 case SO_RCVLOWAT: 5128 #endif /* notyet */ 5129 case SO_DOMAIN: 5130 case SO_DGRAM_ERRIND: 5131 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5132 error = EINVAL; 5133 eprintsoline(so, error); 5134 goto done2; 5135 } 5136 break; 5137 case SO_RCVTIMEO: 5138 case SO_SNDTIMEO: 5139 if (get_udatamodel() == DATAMODEL_NONE || 5140 get_udatamodel() == DATAMODEL_NATIVE) { 5141 if (maxlen < sizeof (struct timeval)) { 5142 error = EINVAL; 5143 eprintsoline(so, error); 5144 goto done2; 5145 } 5146 } else { 5147 if (maxlen < sizeof (struct timeval32)) { 5148 error = EINVAL; 5149 eprintsoline(so, error); 5150 goto done2; 5151 } 5152 5153 } 5154 break; 5155 case SO_LINGER: 5156 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5157 error = EINVAL; 5158 eprintsoline(so, error); 5159 goto done2; 5160 } 5161 break; 5162 case SO_SND_BUFINFO: 5163 if (maxlen < (t_uscalar_t) 5164 sizeof (struct so_snd_bufinfo)) { 5165 error = EINVAL; 5166 eprintsoline(so, error); 5167 goto done2; 5168 } 5169 break; 5170 } 5171 5172 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5173 5174 switch (option_name) { 5175 case SO_TYPE: 5176 value = so->so_type; 5177 option = &value; 5178 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5179 5180 case SO_ERROR: 5181 value = sogeterr(so, B_TRUE); 5182 option = &value; 5183 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5184 5185 case SO_ACCEPTCONN: 5186 if (so->so_state & SS_ACCEPTCONN) 5187 value = SO_ACCEPTCONN; 5188 else 5189 value = 0; 5190 #ifdef DEBUG 5191 if (value) { 5192 dprintso(so, 1, 5193 ("sotpi_getsockopt: 0x%x is set\n", 5194 option_name)); 5195 } else { 5196 dprintso(so, 1, 5197 ("sotpi_getsockopt: 0x%x not set\n", 5198 option_name)); 5199 } 5200 #endif /* DEBUG */ 5201 option = &value; 5202 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5203 5204 case SO_DEBUG: 5205 case SO_REUSEADDR: 5206 case SO_KEEPALIVE: 5207 case SO_DONTROUTE: 5208 case SO_BROADCAST: 5209 case SO_USELOOPBACK: 5210 case SO_OOBINLINE: 5211 case SO_DGRAM_ERRIND: 5212 value = (so->so_options & option_name); 5213 #ifdef DEBUG 5214 if (value) { 5215 dprintso(so, 1, 5216 ("sotpi_getsockopt: 0x%x is set\n", 5217 option_name)); 5218 } else { 5219 dprintso(so, 1, 5220 ("sotpi_getsockopt: 0x%x not set\n", 5221 option_name)); 5222 } 5223 #endif /* DEBUG */ 5224 option = &value; 5225 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5226 5227 /* 5228 * The following options are only returned by sockfs when the 5229 * T_SVR4_OPTMGMT_REQ fails. 5230 */ 5231 case SO_LINGER: 5232 option = &so->so_linger; 5233 len = (t_uscalar_t)sizeof (struct linger); 5234 break; 5235 case SO_SNDBUF: { 5236 ssize_t lvalue; 5237 5238 /* 5239 * If the option has not been set then get a default 5240 * value from the read queue. This value is 5241 * returned if the transport fails 5242 * the T_SVR4_OPTMGMT_REQ. 5243 */ 5244 lvalue = so->so_sndbuf; 5245 if (lvalue == 0) { 5246 mutex_exit(&so->so_lock); 5247 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5248 QHIWAT, 0, &lvalue); 5249 mutex_enter(&so->so_lock); 5250 dprintso(so, 1, 5251 ("got SO_SNDBUF %ld from q\n", lvalue)); 5252 } 5253 value = (int)lvalue; 5254 option = &value; 5255 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5256 break; 5257 } 5258 case SO_RCVBUF: { 5259 ssize_t lvalue; 5260 5261 /* 5262 * If the option has not been set then get a default 5263 * value from the read queue. This value is 5264 * returned if the transport fails 5265 * the T_SVR4_OPTMGMT_REQ. 5266 * 5267 * XXX If SO_RCVBUF has been set and this is an 5268 * XPG 4.2 application then do not ask the transport 5269 * since the transport might adjust the value and not 5270 * return exactly what was set by the application. 5271 * For non-XPG 4.2 application we return the value 5272 * that the transport is actually using. 5273 */ 5274 lvalue = so->so_rcvbuf; 5275 if (lvalue == 0) { 5276 mutex_exit(&so->so_lock); 5277 (void) strqget(RD(strvp2wq(SOTOV(so))), 5278 QHIWAT, 0, &lvalue); 5279 mutex_enter(&so->so_lock); 5280 dprintso(so, 1, 5281 ("got SO_RCVBUF %ld from q\n", lvalue)); 5282 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5283 value = (int)lvalue; 5284 option = &value; 5285 goto copyout; /* skip asking transport */ 5286 } 5287 value = (int)lvalue; 5288 option = &value; 5289 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5290 break; 5291 } 5292 case SO_DOMAIN: 5293 value = so->so_family; 5294 option = &value; 5295 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5296 5297 #ifdef notyet 5298 /* 5299 * We do not implement the semantics of these options 5300 * thus we shouldn't implement the options either. 5301 */ 5302 case SO_SNDLOWAT: 5303 value = so->so_sndlowat; 5304 option = &value; 5305 break; 5306 case SO_RCVLOWAT: 5307 value = so->so_rcvlowat; 5308 option = &value; 5309 break; 5310 #endif /* notyet */ 5311 case SO_SNDTIMEO: 5312 case SO_RCVTIMEO: { 5313 clock_t val; 5314 5315 if (option_name == SO_RCVTIMEO) 5316 val = drv_hztousec(so->so_rcvtimeo); 5317 else 5318 val = drv_hztousec(so->so_sndtimeo); 5319 tmo_val.tv_sec = val / (1000 * 1000); 5320 tmo_val.tv_usec = val % (1000 * 1000); 5321 if (get_udatamodel() == DATAMODEL_NONE || 5322 get_udatamodel() == DATAMODEL_NATIVE) { 5323 option = &tmo_val; 5324 len = sizeof (struct timeval); 5325 } else { 5326 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5327 option = &tmo_val32; 5328 len = sizeof (struct timeval32); 5329 } 5330 break; 5331 } 5332 case SO_SND_BUFINFO: { 5333 snd_bufinfo.sbi_wroff = 5334 (so->so_proto_props).sopp_wroff; 5335 snd_bufinfo.sbi_maxblk = 5336 (so->so_proto_props).sopp_maxblk; 5337 snd_bufinfo.sbi_maxpsz = 5338 (so->so_proto_props).sopp_maxpsz; 5339 snd_bufinfo.sbi_tail = 5340 (so->so_proto_props).sopp_tail; 5341 option = &snd_bufinfo; 5342 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5343 break; 5344 } 5345 } 5346 } 5347 5348 mutex_exit(&so->so_lock); 5349 5350 /* Send request */ 5351 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5352 optmgmt_req.MGMT_flags = T_CHECK; 5353 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5354 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5355 5356 oh.level = level; 5357 oh.name = option_name; 5358 oh.len = maxlen; 5359 5360 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5361 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5362 /* Let option management work in the presence of data flow control */ 5363 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5364 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5365 mp = NULL; 5366 mutex_enter(&so->so_lock); 5367 if (error) { 5368 eprintsoline(so, error); 5369 goto done2; 5370 } 5371 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5372 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5373 if (error) { 5374 if (option != NULL) { 5375 /* We have a fallback value */ 5376 error = 0; 5377 goto copyout; 5378 } 5379 eprintsoline(so, error); 5380 goto done2; 5381 } 5382 ASSERT(mp); 5383 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5384 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5385 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5386 if (opt_res == NULL) { 5387 if (option != NULL) { 5388 /* We have a fallback value */ 5389 error = 0; 5390 goto copyout; 5391 } 5392 error = EPROTO; 5393 eprintsoline(so, error); 5394 goto done; 5395 } 5396 option = &opt_res[1]; 5397 5398 /* check to ensure that the option is within bounds */ 5399 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5400 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5401 if (option != NULL) { 5402 /* We have a fallback value */ 5403 error = 0; 5404 goto copyout; 5405 } 5406 error = EPROTO; 5407 eprintsoline(so, error); 5408 goto done; 5409 } 5410 5411 len = opt_res->len; 5412 5413 copyout: { 5414 t_uscalar_t size = MIN(len, maxlen); 5415 bcopy(option, optval, size); 5416 bcopy(&size, optlenp, sizeof (size)); 5417 } 5418 done: 5419 freemsg(mp); 5420 done2: 5421 so_unlock_single(so, SOLOCKED); 5422 mutex_exit(&so->so_lock); 5423 5424 return (error); 5425 } 5426 5427 /* 5428 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5429 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5430 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5431 * setsockopt has to work even if the transport does not support the option. 5432 */ 5433 /* ARGSUSED */ 5434 int 5435 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5436 const void *optval, t_uscalar_t optlen, struct cred *cr) 5437 { 5438 struct T_optmgmt_req optmgmt_req; 5439 struct opthdr oh; 5440 mblk_t *mp; 5441 int error = 0; 5442 boolean_t handled = B_FALSE; 5443 5444 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5445 (void *)so, level, option_name, optval, optlen, 5446 pr_state(so->so_state, so->so_mode))); 5447 5448 /* X/Open requires this check */ 5449 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5450 if (xnet_check_print) 5451 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5452 return (EINVAL); 5453 } 5454 5455 mutex_enter(&so->so_lock); 5456 so_lock_single(so); /* Set SOLOCKED */ 5457 mutex_exit(&so->so_lock); 5458 5459 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5460 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5461 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5462 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5463 5464 oh.level = level; 5465 oh.name = option_name; 5466 oh.len = optlen; 5467 5468 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5469 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5470 /* Let option management work in the presence of data flow control */ 5471 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5472 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5473 mp = NULL; 5474 mutex_enter(&so->so_lock); 5475 if (error) { 5476 eprintsoline(so, error); 5477 goto done2; 5478 } 5479 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5480 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5481 if (error) { 5482 eprintsoline(so, error); 5483 goto done; 5484 } 5485 ASSERT(mp); 5486 /* No need to verify T_optmgmt_ack */ 5487 freemsg(mp); 5488 done: 5489 /* 5490 * Check for SOL_SOCKET options and record their values. 5491 * If we know about a SOL_SOCKET parameter and the transport 5492 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5493 * EPROTO) we let the setsockopt succeed. 5494 */ 5495 if (level == SOL_SOCKET) { 5496 /* Check parameters */ 5497 switch (option_name) { 5498 case SO_DEBUG: 5499 case SO_REUSEADDR: 5500 case SO_KEEPALIVE: 5501 case SO_DONTROUTE: 5502 case SO_BROADCAST: 5503 case SO_USELOOPBACK: 5504 case SO_OOBINLINE: 5505 case SO_SNDBUF: 5506 case SO_RCVBUF: 5507 #ifdef notyet 5508 case SO_SNDLOWAT: 5509 case SO_RCVLOWAT: 5510 #endif /* notyet */ 5511 case SO_DGRAM_ERRIND: 5512 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5513 error = EINVAL; 5514 eprintsoline(so, error); 5515 goto done2; 5516 } 5517 ASSERT(optval); 5518 handled = B_TRUE; 5519 break; 5520 case SO_SNDTIMEO: 5521 case SO_RCVTIMEO: 5522 if (get_udatamodel() == DATAMODEL_NONE || 5523 get_udatamodel() == DATAMODEL_NATIVE) { 5524 if (optlen != sizeof (struct timeval)) { 5525 error = EINVAL; 5526 eprintsoline(so, error); 5527 goto done2; 5528 } 5529 } else { 5530 if (optlen != sizeof (struct timeval32)) { 5531 error = EINVAL; 5532 eprintsoline(so, error); 5533 goto done2; 5534 } 5535 } 5536 ASSERT(optval); 5537 handled = B_TRUE; 5538 break; 5539 case SO_LINGER: 5540 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5541 error = EINVAL; 5542 eprintsoline(so, error); 5543 goto done2; 5544 } 5545 ASSERT(optval); 5546 handled = B_TRUE; 5547 break; 5548 } 5549 5550 #define intvalue (*(int32_t *)optval) 5551 5552 switch (option_name) { 5553 case SO_TYPE: 5554 case SO_ERROR: 5555 case SO_ACCEPTCONN: 5556 /* Can't be set */ 5557 error = ENOPROTOOPT; 5558 goto done2; 5559 case SO_LINGER: { 5560 struct linger *l = (struct linger *)optval; 5561 5562 so->so_linger.l_linger = l->l_linger; 5563 if (l->l_onoff) { 5564 so->so_linger.l_onoff = SO_LINGER; 5565 so->so_options |= SO_LINGER; 5566 } else { 5567 so->so_linger.l_onoff = 0; 5568 so->so_options &= ~SO_LINGER; 5569 } 5570 break; 5571 } 5572 5573 case SO_DEBUG: 5574 #ifdef SOCK_TEST 5575 if (intvalue & 2) 5576 sock_test_timelimit = 10 * hz; 5577 else 5578 sock_test_timelimit = 0; 5579 5580 if (intvalue & 4) 5581 do_useracc = 0; 5582 else 5583 do_useracc = 1; 5584 #endif /* SOCK_TEST */ 5585 /* FALLTHRU */ 5586 case SO_REUSEADDR: 5587 case SO_KEEPALIVE: 5588 case SO_DONTROUTE: 5589 case SO_BROADCAST: 5590 case SO_USELOOPBACK: 5591 case SO_OOBINLINE: 5592 case SO_DGRAM_ERRIND: 5593 if (intvalue != 0) { 5594 dprintso(so, 1, 5595 ("socket_setsockopt: setting 0x%x\n", 5596 option_name)); 5597 so->so_options |= option_name; 5598 } else { 5599 dprintso(so, 1, 5600 ("socket_setsockopt: clearing 0x%x\n", 5601 option_name)); 5602 so->so_options &= ~option_name; 5603 } 5604 break; 5605 /* 5606 * The following options are only returned by us when the 5607 * transport layer fails. 5608 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5609 * since the transport might adjust the value and not 5610 * return exactly what was set by the application. 5611 */ 5612 case SO_SNDBUF: 5613 so->so_sndbuf = intvalue; 5614 break; 5615 case SO_RCVBUF: 5616 so->so_rcvbuf = intvalue; 5617 break; 5618 case SO_RCVPSH: 5619 so->so_rcv_timer_interval = intvalue; 5620 break; 5621 #ifdef notyet 5622 /* 5623 * We do not implement the semantics of these options 5624 * thus we shouldn't implement the options either. 5625 */ 5626 case SO_SNDLOWAT: 5627 so->so_sndlowat = intvalue; 5628 break; 5629 case SO_RCVLOWAT: 5630 so->so_rcvlowat = intvalue; 5631 break; 5632 #endif /* notyet */ 5633 case SO_SNDTIMEO: 5634 case SO_RCVTIMEO: { 5635 struct timeval tl; 5636 clock_t val; 5637 5638 if (get_udatamodel() == DATAMODEL_NONE || 5639 get_udatamodel() == DATAMODEL_NATIVE) 5640 bcopy(&tl, (struct timeval *)optval, 5641 sizeof (struct timeval)); 5642 else 5643 TIMEVAL32_TO_TIMEVAL(&tl, 5644 (struct timeval32 *)optval); 5645 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5646 if (option_name == SO_RCVTIMEO) 5647 so->so_rcvtimeo = drv_usectohz(val); 5648 else 5649 so->so_sndtimeo = drv_usectohz(val); 5650 break; 5651 } 5652 } 5653 #undef intvalue 5654 5655 if (error) { 5656 if ((error == ENOPROTOOPT || error == EPROTO || 5657 error == EINVAL) && handled) { 5658 dprintso(so, 1, 5659 ("setsockopt: ignoring error %d for 0x%x\n", 5660 error, option_name)); 5661 error = 0; 5662 } 5663 } 5664 } 5665 done2: 5666 so_unlock_single(so, SOLOCKED); 5667 mutex_exit(&so->so_lock); 5668 return (error); 5669 } 5670 5671 /* 5672 * sotpi_close() is called when the last open reference goes away. 5673 */ 5674 /* ARGSUSED */ 5675 int 5676 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5677 { 5678 struct vnode *vp = SOTOV(so); 5679 dev_t dev; 5680 int error = 0; 5681 sotpi_info_t *sti = SOTOTPI(so); 5682 5683 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5684 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5685 5686 dev = sti->sti_dev; 5687 5688 ASSERT(STREAMSTAB(getmajor(dev))); 5689 5690 mutex_enter(&so->so_lock); 5691 so_lock_single(so); /* Set SOLOCKED */ 5692 5693 ASSERT(so_verify_oobstate(so)); 5694 5695 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5696 sti->sti_nl7c_flags = 0; 5697 nl7c_close(so); 5698 } 5699 5700 if (vp->v_stream != NULL) { 5701 vnode_t *ux_vp; 5702 5703 if (so->so_family == AF_UNIX) { 5704 /* Could avoid this when CANTSENDMORE for !dgram */ 5705 so_unix_close(so); 5706 } 5707 5708 mutex_exit(&so->so_lock); 5709 /* 5710 * Disassemble the linkage from the AF_UNIX underlying file 5711 * system vnode to this socket (by atomically clearing 5712 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5713 * and frees the stream head. 5714 */ 5715 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5716 ASSERT(ux_vp->v_stream); 5717 sti->sti_ux_bound_vp = NULL; 5718 vn_rele_stream(ux_vp); 5719 } 5720 error = strclose(vp, flag, cr); 5721 vp->v_stream = NULL; 5722 mutex_enter(&so->so_lock); 5723 } 5724 5725 /* 5726 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5727 */ 5728 so_flush_discon_ind(so); 5729 5730 so_unlock_single(so, SOLOCKED); 5731 mutex_exit(&so->so_lock); 5732 5733 /* 5734 * Needed for STREAMs. 5735 * Decrement the device driver's reference count for streams 5736 * opened via the clone dip. The driver was held in clone_open(). 5737 * The absence of clone_close() forces this asymmetry. 5738 */ 5739 if (so->so_flag & SOCLONE) 5740 ddi_rele_driver(getmajor(dev)); 5741 5742 return (error); 5743 } 5744 5745 static int 5746 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5747 struct cred *cr, int32_t *rvalp) 5748 { 5749 struct vnode *vp = SOTOV(so); 5750 sotpi_info_t *sti = SOTOTPI(so); 5751 int error = 0; 5752 5753 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5754 cmd, arg, pr_state(so->so_state, so->so_mode))); 5755 5756 switch (cmd) { 5757 case SIOCSQPTR: 5758 /* 5759 * SIOCSQPTR is valid only when helper stream is created 5760 * by the protocol. 5761 */ 5762 case _I_INSERT: 5763 case _I_REMOVE: 5764 /* 5765 * Since there's no compelling reason to support these ioctls 5766 * on sockets, and doing so would increase the complexity 5767 * markedly, prevent it. 5768 */ 5769 return (EOPNOTSUPP); 5770 5771 case I_FIND: 5772 case I_LIST: 5773 case I_LOOK: 5774 case I_POP: 5775 case I_PUSH: 5776 /* 5777 * To prevent races and inconsistencies between the actual 5778 * state of the stream and the state according to the sonode, 5779 * we serialize all operations which modify or operate on the 5780 * list of modules on the socket's stream. 5781 */ 5782 mutex_enter(&sti->sti_plumb_lock); 5783 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5784 mutex_exit(&sti->sti_plumb_lock); 5785 return (error); 5786 5787 default: 5788 if (so->so_version != SOV_STREAM) 5789 break; 5790 5791 /* 5792 * The imaginary "sockmod" has been popped; act as a stream. 5793 */ 5794 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5795 } 5796 5797 ASSERT(so->so_version != SOV_STREAM); 5798 5799 /* 5800 * Process socket-specific ioctls. 5801 */ 5802 switch (cmd) { 5803 case FIONBIO: { 5804 int32_t value; 5805 5806 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5807 (mode & (int)FKIOCTL))) 5808 return (EFAULT); 5809 5810 mutex_enter(&so->so_lock); 5811 if (value) { 5812 so->so_state |= SS_NDELAY; 5813 } else { 5814 so->so_state &= ~SS_NDELAY; 5815 } 5816 mutex_exit(&so->so_lock); 5817 return (0); 5818 } 5819 5820 case FIOASYNC: { 5821 int32_t value; 5822 5823 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5824 (mode & (int)FKIOCTL))) 5825 return (EFAULT); 5826 5827 mutex_enter(&so->so_lock); 5828 /* 5829 * SS_ASYNC flag not already set correctly? 5830 * (!value != !(so->so_state & SS_ASYNC)) 5831 * but some engineers find that too hard to read. 5832 */ 5833 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5834 value != 0 && (so->so_state & SS_ASYNC) == 0) 5835 error = so_flip_async(so, vp, mode, cr); 5836 mutex_exit(&so->so_lock); 5837 return (error); 5838 } 5839 5840 case SIOCSPGRP: 5841 case FIOSETOWN: { 5842 pid_t pgrp; 5843 5844 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5845 (mode & (int)FKIOCTL))) 5846 return (EFAULT); 5847 5848 mutex_enter(&so->so_lock); 5849 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5850 /* Any change? */ 5851 if (pgrp != so->so_pgrp) 5852 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5853 mutex_exit(&so->so_lock); 5854 return (error); 5855 } 5856 case SIOCGPGRP: 5857 case FIOGETOWN: 5858 if (so_copyout(&so->so_pgrp, (void *)arg, 5859 sizeof (pid_t), (mode & (int)FKIOCTL))) 5860 return (EFAULT); 5861 return (0); 5862 5863 case SIOCATMARK: { 5864 int retval; 5865 uint_t so_state; 5866 5867 /* 5868 * strwaitmark has a finite timeout after which it 5869 * returns -1 if the mark state is undetermined. 5870 * In order to avoid any race between the mark state 5871 * in sockfs and the mark state in the stream head this 5872 * routine loops until the mark state can be determined 5873 * (or the urgent data indication has been removed by some 5874 * other thread). 5875 */ 5876 do { 5877 mutex_enter(&so->so_lock); 5878 so_state = so->so_state; 5879 mutex_exit(&so->so_lock); 5880 if (so_state & SS_RCVATMARK) { 5881 retval = 1; 5882 } else if (!(so_state & SS_OOBPEND)) { 5883 /* 5884 * No SIGURG has been generated -- there is no 5885 * pending or present urgent data. Thus can't 5886 * possibly be at the mark. 5887 */ 5888 retval = 0; 5889 } else { 5890 /* 5891 * Have the stream head wait until there is 5892 * either some messages on the read queue, or 5893 * STRATMARK or STRNOTATMARK gets set. The 5894 * STRNOTATMARK flag is used so that the 5895 * transport can send up a MSGNOTMARKNEXT 5896 * M_DATA to indicate that it is not 5897 * at the mark and additional data is not about 5898 * to be send upstream. 5899 * 5900 * If the mark state is undetermined this will 5901 * return -1 and we will loop rechecking the 5902 * socket state. 5903 */ 5904 retval = strwaitmark(vp); 5905 } 5906 } while (retval == -1); 5907 5908 if (so_copyout(&retval, (void *)arg, sizeof (int), 5909 (mode & (int)FKIOCTL))) 5910 return (EFAULT); 5911 return (0); 5912 } 5913 5914 case I_FDINSERT: 5915 case I_SENDFD: 5916 case I_RECVFD: 5917 case I_ATMARK: 5918 case _SIOCSOCKFALLBACK: 5919 /* 5920 * These ioctls do not apply to sockets. I_FDINSERT can be 5921 * used to send M_PROTO messages without modifying the socket 5922 * state. I_SENDFD/RECVFD should not be used for socket file 5923 * descriptor passing since they assume a twisted stream. 5924 * SIOCATMARK must be used instead of I_ATMARK. 5925 * 5926 * _SIOCSOCKFALLBACK from an application should never be 5927 * processed. It is only generated by socktpi_open() or 5928 * in response to I_POP or I_PUSH. 5929 */ 5930 #ifdef DEBUG 5931 zcmn_err(getzoneid(), CE_WARN, 5932 "Unsupported STREAMS ioctl 0x%x on socket. " 5933 "Pid = %d\n", cmd, curproc->p_pid); 5934 #endif /* DEBUG */ 5935 return (EOPNOTSUPP); 5936 5937 case _I_GETPEERCRED: 5938 if ((mode & FKIOCTL) == 0) 5939 return (EINVAL); 5940 5941 mutex_enter(&so->so_lock); 5942 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 5943 error = ENOTSUP; 5944 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 5945 error = ENOTCONN; 5946 } else if (so->so_peercred != NULL) { 5947 k_peercred_t *kp = (k_peercred_t *)arg; 5948 kp->pc_cr = so->so_peercred; 5949 kp->pc_cpid = so->so_cpid; 5950 crhold(so->so_peercred); 5951 } else { 5952 error = EINVAL; 5953 } 5954 mutex_exit(&so->so_lock); 5955 return (error); 5956 5957 default: 5958 /* 5959 * Do the higher-order bits of the ioctl cmd indicate 5960 * that it is an I_* streams ioctl? 5961 */ 5962 if ((cmd & 0xffffff00U) == STR && 5963 so->so_version == SOV_SOCKBSD) { 5964 #ifdef DEBUG 5965 zcmn_err(getzoneid(), CE_WARN, 5966 "Unsupported STREAMS ioctl 0x%x on socket. " 5967 "Pid = %d\n", cmd, curproc->p_pid); 5968 #endif /* DEBUG */ 5969 return (EOPNOTSUPP); 5970 } 5971 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5972 } 5973 } 5974 5975 /* 5976 * Handle plumbing-related ioctls. 5977 */ 5978 static int 5979 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 5980 struct cred *cr, int32_t *rvalp) 5981 { 5982 static const char sockmod_name[] = "sockmod"; 5983 struct sonode *so = VTOSO(vp); 5984 char mname[FMNAMESZ + 1]; 5985 int error; 5986 sotpi_info_t *sti = SOTOTPI(so); 5987 5988 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 5989 5990 if (so->so_version == SOV_SOCKBSD) 5991 return (EOPNOTSUPP); 5992 5993 if (so->so_version == SOV_STREAM) { 5994 /* 5995 * The imaginary "sockmod" has been popped - act as a stream. 5996 * If this is a push of sockmod then change back to a socket. 5997 */ 5998 if (cmd == I_PUSH) { 5999 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6000 (void *)arg, mname, sizeof (mname), NULL); 6001 6002 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6003 dprintso(so, 0, ("socktpi_ioctl: going to " 6004 "socket version\n")); 6005 so_stream2sock(so); 6006 return (0); 6007 } 6008 } 6009 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6010 } 6011 6012 switch (cmd) { 6013 case I_PUSH: 6014 if (sti->sti_direct) { 6015 mutex_enter(&so->so_lock); 6016 so_lock_single(so); 6017 mutex_exit(&so->so_lock); 6018 6019 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6020 cr, rvalp); 6021 6022 mutex_enter(&so->so_lock); 6023 if (error == 0) 6024 sti->sti_direct = 0; 6025 so_unlock_single(so, SOLOCKED); 6026 mutex_exit(&so->so_lock); 6027 6028 if (error != 0) 6029 return (error); 6030 } 6031 6032 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6033 if (error == 0) 6034 sti->sti_pushcnt++; 6035 return (error); 6036 6037 case I_POP: 6038 if (sti->sti_pushcnt == 0) { 6039 /* Emulate sockmod being popped */ 6040 dprintso(so, 0, 6041 ("socktpi_ioctl: going to STREAMS version\n")); 6042 return (so_sock2stream(so)); 6043 } 6044 6045 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6046 if (error == 0) 6047 sti->sti_pushcnt--; 6048 return (error); 6049 6050 case I_LIST: { 6051 struct str_mlist *kmlistp, *umlistp; 6052 struct str_list kstrlist; 6053 ssize_t kstrlistsize; 6054 int i, nmods; 6055 6056 STRUCT_DECL(str_list, ustrlist); 6057 STRUCT_INIT(ustrlist, mode); 6058 6059 if (arg == NULL) { 6060 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6061 if (error == 0) 6062 (*rvalp)++; /* Add one for sockmod */ 6063 return (error); 6064 } 6065 6066 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6067 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6068 if (error != 0) 6069 return (error); 6070 6071 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6072 if (nmods <= 0) 6073 return (EINVAL); 6074 /* 6075 * Ceiling nmods at nstrpush to prevent someone from 6076 * maliciously consuming lots of kernel memory. 6077 */ 6078 nmods = MIN(nmods, nstrpush); 6079 6080 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6081 kstrlist.sl_nmods = nmods; 6082 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6083 6084 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6085 cr, rvalp); 6086 if (error != 0) 6087 goto done; 6088 6089 /* 6090 * Considering the module list as a 0-based array of sl_nmods 6091 * modules, sockmod should conceptually exist at slot 6092 * sti_pushcnt. Insert sockmod at this location by sliding all 6093 * of the module names after so_pushcnt over by one. We know 6094 * that there will be room to do this since we allocated 6095 * sl_modlist with an additional slot. 6096 */ 6097 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6098 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6099 6100 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6101 kstrlist.sl_nmods++; 6102 6103 /* 6104 * Copy all of the entries out to ustrlist. 6105 */ 6106 kmlistp = kstrlist.sl_modlist; 6107 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6108 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6109 error = so_copyout(kmlistp++, umlistp++, 6110 sizeof (struct str_mlist), mode & FKIOCTL); 6111 if (error != 0) 6112 goto done; 6113 } 6114 6115 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6116 mode & FKIOCTL); 6117 if (error == 0) 6118 *rvalp = 0; 6119 done: 6120 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6121 return (error); 6122 } 6123 case I_LOOK: 6124 if (sti->sti_pushcnt == 0) { 6125 return (so_copyout(sockmod_name, (void *)arg, 6126 sizeof (sockmod_name), mode & FKIOCTL)); 6127 } 6128 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6129 6130 case I_FIND: 6131 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6132 if (error && error != EINVAL) 6133 return (error); 6134 6135 /* if not found and string was sockmod return 1 */ 6136 if (*rvalp == 0 || error == EINVAL) { 6137 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6138 (void *)arg, mname, sizeof (mname), NULL); 6139 if (error == ENAMETOOLONG) 6140 error = EINVAL; 6141 6142 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6143 *rvalp = 1; 6144 } 6145 return (error); 6146 6147 default: 6148 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6149 break; 6150 } 6151 6152 return (0); 6153 } 6154 6155 /* 6156 * Wrapper around the streams poll routine that implements socket poll 6157 * semantics. 6158 * The sockfs never calls pollwakeup itself - the stream head take care 6159 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6160 * stream head there can never be a deadlock due to holding so_lock across 6161 * pollwakeup and acquiring so_lock in this routine. 6162 * 6163 * However, since the performance of VOP_POLL is critical we avoid 6164 * acquiring so_lock here. This is based on two assumptions: 6165 * - The poll implementation holds locks to serialize the VOP_POLL call 6166 * and a pollwakeup for the same pollhead. This ensures that should 6167 * e.g. so_state change during a socktpi_poll call the pollwakeup 6168 * (which strsock_* and strrput conspire to issue) is issued after 6169 * the state change. Thus the pollwakeup will block until VOP_POLL has 6170 * returned and then wake up poll and have it call VOP_POLL again. 6171 * - The reading of so_state without holding so_lock does not result in 6172 * stale data that is older than the latest state change that has dropped 6173 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6174 * memory barrier to force the data into the coherency domain. 6175 */ 6176 static int 6177 sotpi_poll( 6178 struct sonode *so, 6179 short events, 6180 int anyyet, 6181 short *reventsp, 6182 struct pollhead **phpp) 6183 { 6184 short origevents = events; 6185 struct vnode *vp = SOTOV(so); 6186 int error; 6187 int so_state = so->so_state; /* snapshot */ 6188 sotpi_info_t *sti = SOTOTPI(so); 6189 6190 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6191 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6192 6193 ASSERT(vp->v_type == VSOCK); 6194 ASSERT(vp->v_stream != NULL); 6195 6196 if (so->so_version == SOV_STREAM) { 6197 /* The imaginary "sockmod" has been popped - act as a stream */ 6198 return (strpoll(vp->v_stream, events, anyyet, 6199 reventsp, phpp)); 6200 } 6201 6202 if (!(so_state & SS_ISCONNECTED) && 6203 (so->so_mode & SM_CONNREQUIRED)) { 6204 /* Not connected yet - turn off write side events */ 6205 events &= ~(POLLOUT|POLLWRBAND); 6206 } 6207 /* 6208 * Check for errors without calling strpoll if the caller wants them. 6209 * In sockets the errors are represented as input/output events 6210 * and there is no need to ask the stream head for this information. 6211 */ 6212 if (so->so_error != 0 && 6213 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6214 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6215 return (0); 6216 } 6217 /* 6218 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6219 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6220 * will not trigger a POLLIN event with POLLRDDATA set. 6221 * The handling of urgent data (causing POLLRDBAND) is done by 6222 * inspecting SS_OOBPEND below. 6223 */ 6224 events |= POLLRDDATA; 6225 6226 /* 6227 * After shutdown(output) a stream head write error is set. 6228 * However, we should not return output events. 6229 */ 6230 events |= POLLNOERR; 6231 error = strpoll(vp->v_stream, events, anyyet, 6232 reventsp, phpp); 6233 if (error) 6234 return (error); 6235 6236 ASSERT(!(*reventsp & POLLERR)); 6237 6238 /* 6239 * Notes on T_CONN_IND handling for sockets. 6240 * 6241 * If strpoll() returned without events, SR_POLLIN is guaranteed 6242 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6243 * 6244 * Since the so_lock is not held, soqueueconnind() may have run 6245 * and a T_CONN_IND may be waiting. We now check for any queued 6246 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6247 * to ensure poll returns. 6248 * 6249 * However: 6250 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6251 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6252 * the following actions will occur; taken together they ensure the 6253 * syscall will return. 6254 * 6255 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6256 * the accept() was run on a non-blocking socket sowaitconnind() 6257 * may have already returned EWOULDBLOCK, so not be waiting to 6258 * process the message. Additionally socktpi_poll() has probably 6259 * proceeded past the sti_conn_ind_head check below. 6260 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6261 * this thread, however that could occur before poll_common() 6262 * has entered cv_wait. 6263 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6264 * 6265 * Before proceeding to cv_wait() in poll_common() for an event, 6266 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6267 * and if set, re-calls strpoll() to ensure the late arriving 6268 * T_CONN_IND is recognized, and pollsys() returns. 6269 */ 6270 6271 if (sti->sti_conn_ind_head != NULL) 6272 *reventsp |= (POLLIN|POLLRDNORM) & events; 6273 6274 if (so->so_state & SS_OOBPEND) 6275 *reventsp |= POLLRDBAND & events; 6276 6277 if (sti->sti_nl7c_rcv_mp != NULL) { 6278 *reventsp |= (POLLIN|POLLRDNORM) & events; 6279 } 6280 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6281 ((POLLIN|POLLRDNORM) & *reventsp)) { 6282 sti->sti_nl7c_flags |= NL7C_POLLIN; 6283 } 6284 6285 return (0); 6286 } 6287 6288 /*ARGSUSED*/ 6289 static int 6290 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6291 { 6292 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6293 int error = 0; 6294 6295 error = sonode_constructor(buf, cdrarg, kmflags); 6296 if (error != 0) 6297 return (error); 6298 6299 error = i_sotpi_info_constructor(&st->st_info); 6300 if (error != 0) 6301 sonode_destructor(buf, cdrarg); 6302 6303 st->st_sonode.so_priv = &st->st_info; 6304 6305 return (error); 6306 } 6307 6308 /*ARGSUSED1*/ 6309 static void 6310 socktpi_destructor(void *buf, void *cdrarg) 6311 { 6312 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6313 6314 ASSERT(st->st_sonode.so_priv == &st->st_info); 6315 st->st_sonode.so_priv = NULL; 6316 6317 i_sotpi_info_destructor(&st->st_info); 6318 sonode_destructor(buf, cdrarg); 6319 } 6320 6321 static int 6322 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6323 { 6324 int retval; 6325 6326 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6327 struct sonode *so = (struct sonode *)buf; 6328 sotpi_info_t *sti = SOTOTPI(so); 6329 6330 mutex_enter(&socklist.sl_lock); 6331 6332 sti->sti_next_so = socklist.sl_list; 6333 sti->sti_prev_so = NULL; 6334 if (sti->sti_next_so != NULL) 6335 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6336 socklist.sl_list = so; 6337 6338 mutex_exit(&socklist.sl_lock); 6339 6340 } 6341 return (retval); 6342 } 6343 6344 static void 6345 socktpi_unix_destructor(void *buf, void *cdrarg) 6346 { 6347 struct sonode *so = (struct sonode *)buf; 6348 sotpi_info_t *sti = SOTOTPI(so); 6349 6350 mutex_enter(&socklist.sl_lock); 6351 6352 if (sti->sti_next_so != NULL) 6353 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6354 if (sti->sti_prev_so != NULL) 6355 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6356 else 6357 socklist.sl_list = sti->sti_next_so; 6358 6359 mutex_exit(&socklist.sl_lock); 6360 6361 socktpi_destructor(buf, cdrarg); 6362 } 6363 6364 int 6365 socktpi_init(void) 6366 { 6367 /* 6368 * Create sonode caches. We create a special one for AF_UNIX so 6369 * that we can track them for netstat(1m). 6370 */ 6371 socktpi_cache = kmem_cache_create("socktpi_cache", 6372 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6373 socktpi_destructor, NULL, NULL, NULL, 0); 6374 6375 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6376 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6377 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6378 6379 return (0); 6380 } 6381 6382 /* 6383 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6384 * 6385 * Caller must still update state and mode using sotpi_update_state(). 6386 */ 6387 int 6388 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6389 boolean_t *direct, queue_t **qp, struct cred *cr) 6390 { 6391 sotpi_info_t *sti; 6392 struct sockparams *origsp = so->so_sockparams; 6393 sock_lower_handle_t handle = so->so_proto_handle; 6394 struct stdata *stp; 6395 struct vnode *vp; 6396 queue_t *q; 6397 int error = 0; 6398 6399 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6400 SS_FALLBACK_PENDING); 6401 ASSERT(SOCK_IS_NONSTR(so)); 6402 6403 *qp = NULL; 6404 *direct = B_FALSE; 6405 so->so_sockparams = newsp; 6406 /* 6407 * Allocate and initalize fields required by TPI. 6408 */ 6409 (void) sotpi_info_create(so, KM_SLEEP); 6410 sotpi_info_init(so); 6411 6412 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6413 sotpi_info_fini(so); 6414 sotpi_info_destroy(so); 6415 return (error); 6416 } 6417 ASSERT(handle == so->so_proto_handle); 6418 sti = SOTOTPI(so); 6419 if (sti->sti_direct != 0) 6420 *direct = B_TRUE; 6421 6422 /* 6423 * Keep the original sp around so we can properly dispose of the 6424 * sonode when the socket is being closed. 6425 */ 6426 sti->sti_orig_sp = origsp; 6427 6428 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6429 so_alloc_addr(so, so->so_max_addr_len); 6430 6431 /* 6432 * If the application has done a SIOCSPGRP, make sure the 6433 * STREAM head is aware. This needs to take place before 6434 * the protocol start sending up messages. Otherwise we 6435 * might miss to generate SIGPOLL. 6436 * 6437 * It is possible that the application will receive duplicate 6438 * signals if some were already generated for either data or 6439 * connection indications. 6440 */ 6441 if (so->so_pgrp != 0) { 6442 if (so_set_events(so, so->so_vnode, cr) != 0) 6443 so->so_pgrp = 0; 6444 } 6445 6446 /* 6447 * Determine which queue to use. 6448 */ 6449 vp = SOTOV(so); 6450 stp = vp->v_stream; 6451 ASSERT(stp != NULL); 6452 q = stp->sd_wrq->q_next; 6453 6454 /* 6455 * Skip any modules that may have been auto pushed when the device 6456 * was opened 6457 */ 6458 while (q->q_next != NULL) 6459 q = q->q_next; 6460 *qp = _RD(q); 6461 6462 /* This is now a STREAMS sockets */ 6463 so->so_not_str = B_FALSE; 6464 6465 return (error); 6466 } 6467 6468 /* 6469 * Revert a TPI sonode. It is only allowed to revert the sonode during 6470 * the fallback process. 6471 */ 6472 void 6473 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6474 { 6475 vnode_t *vp = SOTOV(so); 6476 6477 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6478 SS_FALLBACK_PENDING); 6479 ASSERT(!SOCK_IS_NONSTR(so)); 6480 ASSERT(vp->v_stream != NULL); 6481 6482 strclean(vp); 6483 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6484 6485 /* 6486 * Restore the original sockparams. The caller is responsible for 6487 * dropping the ref to the new sp. 6488 */ 6489 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6490 6491 sotpi_info_fini(so); 6492 sotpi_info_destroy(so); 6493 6494 /* This is no longer a STREAMS sockets */ 6495 so->so_not_str = B_TRUE; 6496 } 6497 6498 void 6499 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6500 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6501 socklen_t faddrlen, short opts) 6502 { 6503 sotpi_info_t *sti = SOTOTPI(so); 6504 6505 so_proc_tcapability_ack(so, tcap); 6506 6507 so->so_options |= opts; 6508 6509 /* 6510 * Determine whether the foreign and local address are valid 6511 */ 6512 if (laddrlen != 0) { 6513 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6514 sti->sti_laddr_len = laddrlen; 6515 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6516 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6517 } 6518 6519 if (faddrlen != 0) { 6520 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6521 sti->sti_faddr_len = faddrlen; 6522 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6523 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6524 } 6525 6526 } 6527 6528 /* 6529 * Allocate enough space to cache the local and foreign addresses. 6530 */ 6531 void 6532 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6533 { 6534 sotpi_info_t *sti = SOTOTPI(so); 6535 6536 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6537 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6538 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6539 P2ROUNDUP(maxlen, KMEM_ALIGN); 6540 so->so_max_addr_len = sti->sti_laddr_maxlen; 6541 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6542 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6543 + sti->sti_laddr_maxlen); 6544 6545 if (so->so_family == AF_UNIX) { 6546 /* 6547 * Initialize AF_UNIX related fields. 6548 */ 6549 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6550 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6551 } 6552 } 6553 6554 6555 sotpi_info_t * 6556 sotpi_sototpi(struct sonode *so) 6557 { 6558 sotpi_info_t *sti; 6559 6560 ASSERT(so != NULL); 6561 6562 sti = (sotpi_info_t *)so->so_priv; 6563 6564 ASSERT(sti != NULL); 6565 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6566 6567 return (sti); 6568 } 6569 6570 static int 6571 i_sotpi_info_constructor(sotpi_info_t *sti) 6572 { 6573 sti->sti_magic = SOTPI_INFO_MAGIC; 6574 sti->sti_ack_mp = NULL; 6575 sti->sti_discon_ind_mp = NULL; 6576 sti->sti_ux_bound_vp = NULL; 6577 sti->sti_unbind_mp = NULL; 6578 6579 sti->sti_conn_ind_head = NULL; 6580 sti->sti_conn_ind_tail = NULL; 6581 6582 sti->sti_laddr_sa = NULL; 6583 sti->sti_faddr_sa = NULL; 6584 6585 sti->sti_nl7c_flags = 0; 6586 sti->sti_nl7c_uri = NULL; 6587 sti->sti_nl7c_rcv_mp = NULL; 6588 6589 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6590 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6591 6592 return (0); 6593 } 6594 6595 static void 6596 i_sotpi_info_destructor(sotpi_info_t *sti) 6597 { 6598 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6599 ASSERT(sti->sti_ack_mp == NULL); 6600 ASSERT(sti->sti_discon_ind_mp == NULL); 6601 ASSERT(sti->sti_ux_bound_vp == NULL); 6602 ASSERT(sti->sti_unbind_mp == NULL); 6603 6604 ASSERT(sti->sti_conn_ind_head == NULL); 6605 ASSERT(sti->sti_conn_ind_tail == NULL); 6606 6607 ASSERT(sti->sti_laddr_sa == NULL); 6608 ASSERT(sti->sti_faddr_sa == NULL); 6609 6610 ASSERT(sti->sti_nl7c_flags == 0); 6611 ASSERT(sti->sti_nl7c_uri == NULL); 6612 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6613 6614 mutex_destroy(&sti->sti_plumb_lock); 6615 cv_destroy(&sti->sti_ack_cv); 6616 } 6617 6618 /* 6619 * Creates and attaches TPI information to the given sonode 6620 */ 6621 static boolean_t 6622 sotpi_info_create(struct sonode *so, int kmflags) 6623 { 6624 sotpi_info_t *sti; 6625 6626 ASSERT(so->so_priv == NULL); 6627 6628 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6629 return (B_FALSE); 6630 6631 if (i_sotpi_info_constructor(sti) != 0) { 6632 kmem_free(sti, sizeof (*sti)); 6633 return (B_FALSE); 6634 } 6635 6636 so->so_priv = (void *)sti; 6637 return (B_TRUE); 6638 } 6639 6640 /* 6641 * Initializes the TPI information. 6642 */ 6643 static void 6644 sotpi_info_init(struct sonode *so) 6645 { 6646 struct vnode *vp = SOTOV(so); 6647 sotpi_info_t *sti = SOTOTPI(so); 6648 time_t now; 6649 6650 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6651 vp->v_rdev = sti->sti_dev; 6652 6653 sti->sti_orig_sp = NULL; 6654 6655 sti->sti_pushcnt = 0; 6656 6657 now = gethrestime_sec(); 6658 sti->sti_atime = now; 6659 sti->sti_mtime = now; 6660 sti->sti_ctime = now; 6661 6662 sti->sti_eaddr_mp = NULL; 6663 sti->sti_delayed_error = 0; 6664 6665 sti->sti_provinfo = NULL; 6666 6667 sti->sti_oobcnt = 0; 6668 sti->sti_oobsigcnt = 0; 6669 6670 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6671 6672 sti->sti_laddr_sa = 0; 6673 sti->sti_faddr_sa = 0; 6674 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6675 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6676 6677 sti->sti_laddr_valid = 0; 6678 sti->sti_faddr_valid = 0; 6679 sti->sti_faddr_noxlate = 0; 6680 6681 sti->sti_direct = 0; 6682 6683 ASSERT(sti->sti_ack_mp == NULL); 6684 ASSERT(sti->sti_ux_bound_vp == NULL); 6685 ASSERT(sti->sti_unbind_mp == NULL); 6686 6687 ASSERT(sti->sti_conn_ind_head == NULL); 6688 ASSERT(sti->sti_conn_ind_tail == NULL); 6689 } 6690 6691 /* 6692 * Given a sonode, grab the TPI info and free any data. 6693 */ 6694 static void 6695 sotpi_info_fini(struct sonode *so) 6696 { 6697 sotpi_info_t *sti = SOTOTPI(so); 6698 mblk_t *mp; 6699 6700 ASSERT(sti->sti_discon_ind_mp == NULL); 6701 6702 if ((mp = sti->sti_conn_ind_head) != NULL) { 6703 mblk_t *mp1; 6704 6705 while (mp) { 6706 mp1 = mp->b_next; 6707 mp->b_next = NULL; 6708 freemsg(mp); 6709 mp = mp1; 6710 } 6711 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6712 } 6713 6714 /* 6715 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6716 * indirect them. It also uses so_count as a validity test. 6717 */ 6718 mutex_enter(&so->so_lock); 6719 6720 if (sti->sti_laddr_sa) { 6721 ASSERT((caddr_t)sti->sti_faddr_sa == 6722 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6723 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6724 sti->sti_laddr_valid = 0; 6725 sti->sti_faddr_valid = 0; 6726 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6727 sti->sti_laddr_sa = NULL; 6728 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6729 sti->sti_faddr_sa = NULL; 6730 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6731 } 6732 6733 mutex_exit(&so->so_lock); 6734 6735 if ((mp = sti->sti_eaddr_mp) != NULL) { 6736 freemsg(mp); 6737 sti->sti_eaddr_mp = NULL; 6738 sti->sti_delayed_error = 0; 6739 } 6740 6741 if ((mp = sti->sti_ack_mp) != NULL) { 6742 freemsg(mp); 6743 sti->sti_ack_mp = NULL; 6744 } 6745 6746 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6747 sti->sti_nl7c_rcv_mp = NULL; 6748 freemsg(mp); 6749 } 6750 sti->sti_nl7c_rcv_rval = 0; 6751 if (sti->sti_nl7c_uri != NULL) { 6752 nl7c_urifree(so); 6753 /* urifree() cleared nl7c_uri */ 6754 } 6755 if (sti->sti_nl7c_flags) { 6756 sti->sti_nl7c_flags = 0; 6757 } 6758 6759 ASSERT(sti->sti_ux_bound_vp == NULL); 6760 if ((mp = sti->sti_unbind_mp) != NULL) { 6761 freemsg(mp); 6762 sti->sti_unbind_mp = NULL; 6763 } 6764 } 6765 6766 /* 6767 * Destroys the TPI information attached to a sonode. 6768 */ 6769 static void 6770 sotpi_info_destroy(struct sonode *so) 6771 { 6772 sotpi_info_t *sti = SOTOTPI(so); 6773 6774 i_sotpi_info_destructor(sti); 6775 kmem_free(sti, sizeof (*sti)); 6776 6777 so->so_priv = NULL; 6778 } 6779 6780 /* 6781 * Create the global sotpi socket module entry. It will never be freed. 6782 */ 6783 smod_info_t * 6784 sotpi_smod_create(void) 6785 { 6786 smod_info_t *smodp; 6787 6788 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6789 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6790 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6791 /* 6792 * Initialize the smod_refcnt to 1 so it will never be freed. 6793 */ 6794 smodp->smod_refcnt = 1; 6795 smodp->smod_uc_version = SOCK_UC_VERSION; 6796 smodp->smod_dc_version = SOCK_DC_VERSION; 6797 smodp->smod_sock_create_func = &sotpi_create; 6798 smodp->smod_sock_destroy_func = &sotpi_destroy; 6799 return (smodp); 6800 } 6801