1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/t_lock.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/buf.h> 31 #include <sys/conf.h> 32 #include <sys/cred.h> 33 #include <sys/kmem.h> 34 #include <sys/kmem_impl.h> 35 #include <sys/sysmacros.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/time.h> 41 #include <sys/file.h> 42 #include <sys/open.h> 43 #include <sys/user.h> 44 #include <sys/termios.h> 45 #include <sys/stream.h> 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/suntpi.h> 49 #include <sys/ddi.h> 50 #include <sys/esunddi.h> 51 #include <sys/flock.h> 52 #include <sys/modctl.h> 53 #include <sys/vtrace.h> 54 #include <sys/cmn_err.h> 55 #include <sys/pathname.h> 56 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/sockio.h> 60 #include <netinet/in.h> 61 #include <sys/un.h> 62 #include <sys/strsun.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 68 69 #include <c2/audit.h> 70 71 #include <inet/common.h> 72 #include <inet/ip.h> 73 #include <inet/ip6.h> 74 #include <inet/tcp.h> 75 #include <inet/udp_impl.h> 76 77 #include <sys/zone.h> 78 79 #include <fs/sockfs/nl7c.h> 80 #include <fs/sockfs/nl7curi.h> 81 82 #include <fs/sockfs/sockcommon.h> 83 #include <fs/sockfs/socktpi.h> 84 #include <fs/sockfs/socktpi_impl.h> 85 86 /* 87 * Possible failures when memory can't be allocated. The documented behavior: 88 * 89 * 5.5: 4.X: XNET: 90 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 91 * EINTR 92 * (4.X does not document EINTR but returns it) 93 * bind: ENOSR - ENOBUFS/ENOSR 94 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 95 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 96 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 97 * (4.X getpeername and getsockname do not fail in practice) 98 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 99 * listen: - - ENOBUFS 100 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 101 * EINTR 102 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 105 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 106 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 107 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 108 * 109 * Resolution. When allocation fails: 110 * recv: return EINTR 111 * send: return EINTR 112 * connect, accept: EINTR 113 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 114 * socket, socketpair: ENOBUFS 115 * getpeername, getsockname: sleep 116 * getsockopt, setsockopt: sleep 117 */ 118 119 #ifdef SOCK_TEST 120 /* 121 * Variables that make sockfs do something other than the standard TPI 122 * for the AF_INET transports. 123 * 124 * solisten_tpi_tcp: 125 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 126 * the transport is already bound. This is needed to avoid loosing the 127 * port number should listen() do a T_UNBIND_REQ followed by a 128 * O_T_BIND_REQ. 129 * 130 * soconnect_tpi_udp: 131 * UDP and ICMP can handle a T_CONN_REQ. 132 * This is needed to make the sequence of connect(), getsockname() 133 * return the local IP address used to send packets to the connected to 134 * destination. 135 * 136 * soconnect_tpi_tcp: 137 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 138 * Set this to non-zero to send TPI conformant messages to TCP in this 139 * respect. This is a performance optimization. 140 * 141 * soaccept_tpi_tcp: 142 * TCP can handle a T_CONN_REQ without the acceptor being bound. 143 * This is a performance optimization that has been picked up in XTI. 144 * 145 * soaccept_tpi_multioptions: 146 * When inheriting SOL_SOCKET options from the listener to the accepting 147 * socket send them as a single message for AF_INET{,6}. 148 */ 149 int solisten_tpi_tcp = 0; 150 int soconnect_tpi_udp = 0; 151 int soconnect_tpi_tcp = 0; 152 int soaccept_tpi_tcp = 0; 153 int soaccept_tpi_multioptions = 1; 154 #else /* SOCK_TEST */ 155 #define soconnect_tpi_tcp 0 156 #define soconnect_tpi_udp 0 157 #define solisten_tpi_tcp 0 158 #define soaccept_tpi_tcp 0 159 #define soaccept_tpi_multioptions 1 160 #endif /* SOCK_TEST */ 161 162 #ifdef SOCK_TEST 163 extern int do_useracc; 164 extern clock_t sock_test_timelimit; 165 #endif /* SOCK_TEST */ 166 167 /* 168 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 169 * applications working. Turn on this flag to disable these checks. 170 */ 171 int xnet_skip_checks = 0; 172 int xnet_check_print = 0; 173 int xnet_truncate_print = 0; 174 175 static void sotpi_destroy(struct sonode *); 176 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 177 int, int *, cred_t *cr); 178 179 static boolean_t sotpi_info_create(struct sonode *, int); 180 static void sotpi_info_init(struct sonode *); 181 static void sotpi_info_fini(struct sonode *); 182 static void sotpi_info_destroy(struct sonode *); 183 184 /* 185 * Do direct function call to the transport layer below; this would 186 * also allow the transport to utilize read-side synchronous stream 187 * interface if necessary. This is a /etc/system tunable that must 188 * not be modified on a running system. By default this is enabled 189 * for performance reasons and may be disabled for debugging purposes. 190 */ 191 boolean_t socktpi_direct = B_TRUE; 192 193 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 194 195 extern void sigintr(k_sigset_t *, int); 196 extern void sigunintr(k_sigset_t *); 197 198 static int sotpi_unbind(struct sonode *, int); 199 200 /* TPI sockfs sonode operations */ 201 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 202 int); 203 static int sotpi_accept(struct sonode *, int, struct cred *, 204 struct sonode **); 205 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 206 int, struct cred *); 207 static int sotpi_listen(struct sonode *, int, struct cred *); 208 static int sotpi_connect(struct sonode *, struct sockaddr *, 209 socklen_t, int, int, struct cred *); 210 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 211 struct uio *, struct cred *); 212 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 213 struct uio *, struct cred *); 214 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 215 struct cred *, mblk_t **); 216 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 217 struct uio *, void *, t_uscalar_t, int); 218 static int sodgram_direct(struct sonode *, struct sockaddr *, 219 socklen_t, struct uio *, int); 220 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 221 socklen_t *, boolean_t, struct cred *); 222 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 223 socklen_t *, struct cred *); 224 static int sotpi_shutdown(struct sonode *, int, struct cred *); 225 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 226 socklen_t *, int, struct cred *); 227 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 228 socklen_t, struct cred *); 229 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 230 int32_t *); 231 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 232 struct cred *, int32_t *); 233 static int sotpi_poll(struct sonode *, short, int, short *, 234 struct pollhead **); 235 static int sotpi_close(struct sonode *, int, struct cred *); 236 237 static int i_sotpi_info_constructor(sotpi_info_t *); 238 static void i_sotpi_info_destructor(sotpi_info_t *); 239 240 sonodeops_t sotpi_sonodeops = { 241 sotpi_init, /* sop_init */ 242 sotpi_accept, /* sop_accept */ 243 sotpi_bind, /* sop_bind */ 244 sotpi_listen, /* sop_listen */ 245 sotpi_connect, /* sop_connect */ 246 sotpi_recvmsg, /* sop_recvmsg */ 247 sotpi_sendmsg, /* sop_sendmsg */ 248 sotpi_sendmblk, /* sop_sendmblk */ 249 sotpi_getpeername, /* sop_getpeername */ 250 sotpi_getsockname, /* sop_getsockname */ 251 sotpi_shutdown, /* sop_shutdown */ 252 sotpi_getsockopt, /* sop_getsockopt */ 253 sotpi_setsockopt, /* sop_setsockopt */ 254 sotpi_ioctl, /* sop_ioctl */ 255 sotpi_poll, /* sop_poll */ 256 sotpi_close, /* sop_close */ 257 }; 258 259 /* 260 * Return a TPI socket vnode. 261 * 262 * Note that sockets assume that the driver will clone (either itself 263 * or by using the clone driver) i.e. a socket() call will always 264 * result in a new vnode being created. 265 */ 266 267 /* 268 * Common create code for socket and accept. If tso is set the values 269 * from that node is used instead of issuing a T_INFO_REQ. 270 */ 271 272 /* ARGSUSED */ 273 static struct sonode * 274 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 275 int version, int sflags, int *errorp, cred_t *cr) 276 { 277 struct sonode *so; 278 kmem_cache_t *cp; 279 int sfamily = family; 280 281 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 282 283 if (family == AF_NCA) { 284 /* 285 * The request is for an NCA socket so for NL7C use the 286 * INET domain instead and mark NL7C_AF_NCA below. 287 */ 288 family = AF_INET; 289 /* 290 * NL7C is not supported in the non-global zone, 291 * we enforce this restriction here. 292 */ 293 if (getzoneid() != GLOBAL_ZONEID) { 294 *errorp = ENOTSUP; 295 return (NULL); 296 } 297 } 298 299 /* 300 * to be compatible with old tpi socket implementation ignore 301 * sleep flag (sflags) passed in 302 */ 303 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 304 so = kmem_cache_alloc(cp, KM_SLEEP); 305 if (so == NULL) { 306 *errorp = ENOMEM; 307 return (NULL); 308 } 309 310 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 311 sotpi_info_init(so); 312 313 if (sfamily == AF_NCA) { 314 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 315 } 316 317 if (version == SOV_DEFAULT) 318 version = so_default_version; 319 320 so->so_version = (short)version; 321 *errorp = 0; 322 323 return (so); 324 } 325 326 static void 327 sotpi_destroy(struct sonode *so) 328 { 329 kmem_cache_t *cp; 330 struct sockparams *origsp; 331 332 /* 333 * If there is a new dealloc function (ie. smod_destroy_func), 334 * then it should check the correctness of the ops. 335 */ 336 337 ASSERT(so->so_ops == &sotpi_sonodeops); 338 339 origsp = SOTOTPI(so)->sti_orig_sp; 340 341 sotpi_info_fini(so); 342 343 if (so->so_state & SS_FALLBACK_COMP) { 344 /* 345 * A fallback happend, which means that a sotpi_info_t struct 346 * was allocated (as opposed to being allocated from the TPI 347 * sonode cache. Therefore we explicitly free the struct 348 * here. 349 */ 350 sotpi_info_destroy(so); 351 ASSERT(origsp != NULL); 352 353 origsp->sp_smod_info->smod_sock_destroy_func(so); 354 SOCKPARAMS_DEC_REF(origsp); 355 } else { 356 sonode_fini(so); 357 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 358 socktpi_cache; 359 kmem_cache_free(cp, so); 360 } 361 } 362 363 /* ARGSUSED1 */ 364 int 365 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 366 { 367 major_t maj; 368 dev_t newdev; 369 struct vnode *vp; 370 int error = 0; 371 struct stdata *stp; 372 373 sotpi_info_t *sti = SOTOTPI(so); 374 375 dprint(1, ("sotpi_init()\n")); 376 377 /* 378 * over write the sleep flag passed in but that is ok 379 * as tpi socket does not honor sleep flag. 380 */ 381 flags |= FREAD|FWRITE; 382 383 /* 384 * Record in so_flag that it is a clone. 385 */ 386 if (getmajor(sti->sti_dev) == clone_major) 387 so->so_flag |= SOCLONE; 388 389 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 390 (so->so_family == AF_INET || so->so_family == AF_INET6) && 391 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 392 so->so_protocol == IPPROTO_IP)) { 393 /* Tell tcp or udp that it's talking to sockets */ 394 flags |= SO_SOCKSTR; 395 396 /* 397 * Here we indicate to socktpi_open() our attempt to 398 * make direct calls between sockfs and transport. 399 * The final decision is left to socktpi_open(). 400 */ 401 sti->sti_direct = 1; 402 403 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 404 if (so->so_type == SOCK_STREAM && tso != NULL) { 405 if (SOTOTPI(tso)->sti_direct) { 406 /* 407 * Inherit sti_direct from listener and pass 408 * SO_ACCEPTOR open flag to tcp, indicating 409 * that this is an accept fast-path instance. 410 */ 411 flags |= SO_ACCEPTOR; 412 } else { 413 /* 414 * sti_direct is not set on listener, meaning 415 * that the listener has been converted from 416 * a socket to a stream. Ensure that the 417 * acceptor inherits these settings. 418 */ 419 sti->sti_direct = 0; 420 flags &= ~SO_SOCKSTR; 421 } 422 } 423 } 424 425 /* 426 * Tell local transport that it is talking to sockets. 427 */ 428 if (so->so_family == AF_UNIX) { 429 flags |= SO_SOCKSTR; 430 } 431 432 vp = SOTOV(so); 433 newdev = vp->v_rdev; 434 maj = getmajor(newdev); 435 ASSERT(STREAMSTAB(maj)); 436 437 error = stropen(vp, &newdev, flags, cr); 438 439 stp = vp->v_stream; 440 if (error == 0) { 441 if (so->so_flag & SOCLONE) 442 ASSERT(newdev != vp->v_rdev); 443 mutex_enter(&so->so_lock); 444 sti->sti_dev = newdev; 445 vp->v_rdev = newdev; 446 mutex_exit(&so->so_lock); 447 448 if (stp->sd_flag & STRISTTY) { 449 /* 450 * this is a post SVR4 tty driver - a socket can not 451 * be a controlling terminal. Fail the open. 452 */ 453 (void) sotpi_close(so, flags, cr); 454 return (ENOTTY); /* XXX */ 455 } 456 457 ASSERT(stp->sd_wrq != NULL); 458 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 459 460 /* 461 * If caller is interested in doing direct function call 462 * interface to/from transport module, probe the module 463 * directly beneath the streamhead to see if it qualifies. 464 * 465 * We turn off the direct interface when qualifications fail. 466 * In the acceptor case, we simply turn off the sti_direct 467 * flag on the socket. We do the fallback after the accept 468 * has completed, before the new socket is returned to the 469 * application. 470 */ 471 if (sti->sti_direct) { 472 queue_t *tq = stp->sd_wrq->q_next; 473 474 /* 475 * sti_direct is currently supported and tested 476 * only for tcp/udp; this is the main reason to 477 * have the following assertions. 478 */ 479 ASSERT(so->so_family == AF_INET || 480 so->so_family == AF_INET6); 481 ASSERT(so->so_protocol == IPPROTO_UDP || 482 so->so_protocol == IPPROTO_TCP || 483 so->so_protocol == IPPROTO_IP); 484 ASSERT(so->so_type == SOCK_DGRAM || 485 so->so_type == SOCK_STREAM); 486 487 /* 488 * Abort direct call interface if the module directly 489 * underneath the stream head is not defined with the 490 * _D_DIRECT flag. This could happen in the tcp or 491 * udp case, when some other module is autopushed 492 * above it, or for some reasons the expected module 493 * isn't purely D_MP (which is the main requirement). 494 */ 495 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 496 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 497 int rval; 498 499 /* Continue on without direct calls */ 500 sti->sti_direct = 0; 501 502 /* 503 * Cannot issue ioctl on fallback socket since 504 * there is no conn associated with the queue. 505 * The fallback downcall will notify the proto 506 * of the change. 507 */ 508 if (!(flags & SO_ACCEPTOR) && 509 !(flags & SO_FALLBACK)) { 510 if ((error = strioctl(vp, 511 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 512 cr, &rval)) != 0) { 513 (void) sotpi_close(so, flags, 514 cr); 515 return (error); 516 } 517 } 518 } 519 } 520 521 if (flags & SO_FALLBACK) { 522 /* 523 * The stream created does not have a conn. 524 * do stream set up after conn has been assigned 525 */ 526 return (error); 527 } 528 if (error = so_strinit(so, tso)) { 529 (void) sotpi_close(so, flags, cr); 530 return (error); 531 } 532 533 /* Wildcard */ 534 if (so->so_protocol != so->so_sockparams->sp_protocol) { 535 int protocol = so->so_protocol; 536 /* 537 * Issue SO_PROTOTYPE setsockopt. 538 */ 539 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 540 &protocol, (t_uscalar_t)sizeof (protocol), cr); 541 if (error != 0) { 542 (void) sotpi_close(so, flags, cr); 543 /* 544 * Setsockopt often fails with ENOPROTOOPT but 545 * socket() should fail with 546 * EPROTONOSUPPORT/EPROTOTYPE. 547 */ 548 return (EPROTONOSUPPORT); 549 } 550 } 551 552 } else { 553 /* 554 * While the same socket can not be reopened (unlike specfs) 555 * the stream head sets STREOPENFAIL when the autopush fails. 556 */ 557 if ((stp != NULL) && 558 (stp->sd_flag & STREOPENFAIL)) { 559 /* 560 * Open failed part way through. 561 */ 562 mutex_enter(&stp->sd_lock); 563 stp->sd_flag &= ~STREOPENFAIL; 564 mutex_exit(&stp->sd_lock); 565 (void) sotpi_close(so, flags, cr); 566 return (error); 567 /*NOTREACHED*/ 568 } 569 ASSERT(stp == NULL); 570 } 571 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 572 "sockfs open:maj %d vp %p so %p error %d", 573 maj, vp, so, error); 574 return (error); 575 } 576 577 /* 578 * Bind the socket to an unspecified address in sockfs only. 579 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 580 * required in all cases. 581 */ 582 static void 583 so_automatic_bind(struct sonode *so) 584 { 585 sotpi_info_t *sti = SOTOTPI(so); 586 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 587 588 ASSERT(MUTEX_HELD(&so->so_lock)); 589 ASSERT(!(so->so_state & SS_ISBOUND)); 590 ASSERT(sti->sti_unbind_mp); 591 592 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 593 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 594 sti->sti_laddr_sa->sa_family = so->so_family; 595 so->so_state |= SS_ISBOUND; 596 } 597 598 599 /* 600 * bind the socket. 601 * 602 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 603 * are passed in we allow rebinding. Note that for backwards compatibility 604 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 605 * Thus the rebinding code is currently not executed. 606 * 607 * The constraints for rebinding are: 608 * - it is a SOCK_DGRAM, or 609 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 610 * and no listen() has been done. 611 * This rebinding code was added based on some language in the XNET book 612 * about not returning EINVAL it the protocol allows rebinding. However, 613 * this language is not present in the Posix socket draft. Thus maybe the 614 * rebinding logic should be deleted from the source. 615 * 616 * A null "name" can be used to unbind the socket if: 617 * - it is a SOCK_DGRAM, or 618 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 619 * and no listen() has been done. 620 */ 621 /* ARGSUSED */ 622 static int 623 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 624 socklen_t namelen, int backlog, int flags, struct cred *cr) 625 { 626 struct T_bind_req bind_req; 627 struct T_bind_ack *bind_ack; 628 int error = 0; 629 mblk_t *mp; 630 void *addr; 631 t_uscalar_t addrlen; 632 int unbind_on_err = 1; 633 boolean_t clear_acceptconn_on_err = B_FALSE; 634 boolean_t restore_backlog_on_err = B_FALSE; 635 int save_so_backlog; 636 t_scalar_t PRIM_type = O_T_BIND_REQ; 637 boolean_t tcp_udp_xport; 638 void *nl7c = NULL; 639 sotpi_info_t *sti = SOTOTPI(so); 640 641 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 642 (void *)so, (void *)name, namelen, backlog, flags, 643 pr_state(so->so_state, so->so_mode))); 644 645 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 646 647 if (!(flags & _SOBIND_LOCK_HELD)) { 648 mutex_enter(&so->so_lock); 649 so_lock_single(so); /* Set SOLOCKED */ 650 } else { 651 ASSERT(MUTEX_HELD(&so->so_lock)); 652 ASSERT(so->so_flag & SOLOCKED); 653 } 654 655 /* 656 * Make sure that there is a preallocated unbind_req message 657 * before binding. This message allocated when the socket is 658 * created but it might be have been consumed. 659 */ 660 if (sti->sti_unbind_mp == NULL) { 661 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 662 /* NOTE: holding so_lock while sleeping */ 663 sti->sti_unbind_mp = 664 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 665 cr); 666 } 667 668 if (flags & _SOBIND_REBIND) { 669 /* 670 * Called from solisten after doing an sotpi_unbind() or 671 * potentially without the unbind (latter for AF_INET{,6}). 672 */ 673 ASSERT(name == NULL && namelen == 0); 674 675 if (so->so_family == AF_UNIX) { 676 ASSERT(sti->sti_ux_bound_vp); 677 addr = &sti->sti_ux_laddr; 678 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 679 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 680 "addr 0x%p, vp %p\n", 681 addrlen, 682 (void *)((struct so_ux_addr *)addr)->soua_vp, 683 (void *)sti->sti_ux_bound_vp)); 684 } else { 685 addr = sti->sti_laddr_sa; 686 addrlen = (t_uscalar_t)sti->sti_laddr_len; 687 } 688 } else if (flags & _SOBIND_UNSPEC) { 689 ASSERT(name == NULL && namelen == 0); 690 691 /* 692 * The caller checked SS_ISBOUND but not necessarily 693 * under so_lock 694 */ 695 if (so->so_state & SS_ISBOUND) { 696 /* No error */ 697 goto done; 698 } 699 700 /* Set an initial local address */ 701 switch (so->so_family) { 702 case AF_UNIX: 703 /* 704 * Use an address with same size as struct sockaddr 705 * just like BSD. 706 */ 707 sti->sti_laddr_len = 708 (socklen_t)sizeof (struct sockaddr); 709 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 710 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 711 sti->sti_laddr_sa->sa_family = so->so_family; 712 713 /* 714 * Pass down an address with the implicit bind 715 * magic number and the rest all zeros. 716 * The transport will return a unique address. 717 */ 718 sti->sti_ux_laddr.soua_vp = NULL; 719 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 720 addr = &sti->sti_ux_laddr; 721 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 722 break; 723 724 case AF_INET: 725 case AF_INET6: 726 /* 727 * An unspecified bind in TPI has a NULL address. 728 * Set the address in sockfs to have the sa_family. 729 */ 730 sti->sti_laddr_len = (so->so_family == AF_INET) ? 731 (socklen_t)sizeof (sin_t) : 732 (socklen_t)sizeof (sin6_t); 733 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 734 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 735 sti->sti_laddr_sa->sa_family = so->so_family; 736 addr = NULL; 737 addrlen = 0; 738 break; 739 740 default: 741 /* 742 * An unspecified bind in TPI has a NULL address. 743 * Set the address in sockfs to be zero length. 744 * 745 * Can not assume there is a sa_family for all 746 * protocol families. For example, AF_X25 does not 747 * have a family field. 748 */ 749 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 750 sti->sti_laddr_len = 0; /* XXX correct? */ 751 addr = NULL; 752 addrlen = 0; 753 break; 754 } 755 756 } else { 757 if (so->so_state & SS_ISBOUND) { 758 /* 759 * If it is ok to rebind the socket, first unbind 760 * with the transport. A rebind to the NULL address 761 * is interpreted as an unbind. 762 * Note that a bind to NULL in BSD does unbind the 763 * socket but it fails with EINVAL. 764 * Note that regular sockets set SOV_SOCKBSD i.e. 765 * _SOBIND_SOCKBSD gets set here hence no type of 766 * socket does currently allow rebinding. 767 * 768 * If the name is NULL just do an unbind. 769 */ 770 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 771 name != NULL) { 772 error = EINVAL; 773 unbind_on_err = 0; 774 eprintsoline(so, error); 775 goto done; 776 } 777 if ((so->so_mode & SM_CONNREQUIRED) && 778 (so->so_state & SS_CANTREBIND)) { 779 error = EINVAL; 780 unbind_on_err = 0; 781 eprintsoline(so, error); 782 goto done; 783 } 784 error = sotpi_unbind(so, 0); 785 if (error) { 786 eprintsoline(so, error); 787 goto done; 788 } 789 ASSERT(!(so->so_state & SS_ISBOUND)); 790 if (name == NULL) { 791 so->so_state &= 792 ~(SS_ISCONNECTED|SS_ISCONNECTING); 793 goto done; 794 } 795 } 796 797 /* X/Open requires this check */ 798 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 799 if (xnet_check_print) { 800 printf("sockfs: X/Open bind state check " 801 "caused EINVAL\n"); 802 } 803 error = EINVAL; 804 goto done; 805 } 806 807 switch (so->so_family) { 808 case AF_UNIX: 809 /* 810 * All AF_UNIX addresses are nul terminated 811 * when copied (copyin_name) in so the minimum 812 * length is 3 bytes. 813 */ 814 if (name == NULL || 815 (ssize_t)namelen <= sizeof (short) + 1) { 816 error = EISDIR; 817 eprintsoline(so, error); 818 goto done; 819 } 820 /* 821 * Verify so_family matches the bound family. 822 * BSD does not check this for AF_UNIX resulting 823 * in funny mknods. 824 */ 825 if (name->sa_family != so->so_family) { 826 error = EAFNOSUPPORT; 827 goto done; 828 } 829 break; 830 case AF_INET: 831 if (name == NULL) { 832 error = EINVAL; 833 eprintsoline(so, error); 834 goto done; 835 } 836 if ((size_t)namelen != sizeof (sin_t)) { 837 error = name->sa_family != so->so_family ? 838 EAFNOSUPPORT : EINVAL; 839 eprintsoline(so, error); 840 goto done; 841 } 842 if ((flags & _SOBIND_XPG4_2) && 843 (name->sa_family != so->so_family)) { 844 /* 845 * This check has to be made for X/Open 846 * sockets however application failures have 847 * been observed when it is applied to 848 * all sockets. 849 */ 850 error = EAFNOSUPPORT; 851 eprintsoline(so, error); 852 goto done; 853 } 854 /* 855 * Force a zero sa_family to match so_family. 856 * 857 * Some programs like inetd(1M) don't set the 858 * family field. Other programs leave 859 * sin_family set to garbage - SunOS 4.X does 860 * not check the family field on a bind. 861 * We use the family field that 862 * was passed in to the socket() call. 863 */ 864 name->sa_family = so->so_family; 865 break; 866 867 case AF_INET6: { 868 #ifdef DEBUG 869 sin6_t *sin6 = (sin6_t *)name; 870 #endif /* DEBUG */ 871 872 if (name == NULL) { 873 error = EINVAL; 874 eprintsoline(so, error); 875 goto done; 876 } 877 if ((size_t)namelen != sizeof (sin6_t)) { 878 error = name->sa_family != so->so_family ? 879 EAFNOSUPPORT : EINVAL; 880 eprintsoline(so, error); 881 goto done; 882 } 883 if (name->sa_family != so->so_family) { 884 /* 885 * With IPv6 we require the family to match 886 * unlike in IPv4. 887 */ 888 error = EAFNOSUPPORT; 889 eprintsoline(so, error); 890 goto done; 891 } 892 #ifdef DEBUG 893 /* 894 * Verify that apps don't forget to clear 895 * sin6_scope_id etc 896 */ 897 if (sin6->sin6_scope_id != 0 && 898 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 899 zcmn_err(getzoneid(), CE_WARN, 900 "bind with uninitialized sin6_scope_id " 901 "(%d) on socket. Pid = %d\n", 902 (int)sin6->sin6_scope_id, 903 (int)curproc->p_pid); 904 } 905 if (sin6->__sin6_src_id != 0) { 906 zcmn_err(getzoneid(), CE_WARN, 907 "bind with uninitialized __sin6_src_id " 908 "(%d) on socket. Pid = %d\n", 909 (int)sin6->__sin6_src_id, 910 (int)curproc->p_pid); 911 } 912 #endif /* DEBUG */ 913 break; 914 } 915 default: 916 /* 917 * Don't do any length or sa_family check to allow 918 * non-sockaddr style addresses. 919 */ 920 if (name == NULL) { 921 error = EINVAL; 922 eprintsoline(so, error); 923 goto done; 924 } 925 break; 926 } 927 928 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 929 error = ENAMETOOLONG; 930 eprintsoline(so, error); 931 goto done; 932 } 933 /* 934 * Save local address. 935 */ 936 sti->sti_laddr_len = (socklen_t)namelen; 937 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 938 bcopy(name, sti->sti_laddr_sa, namelen); 939 940 addr = sti->sti_laddr_sa; 941 addrlen = (t_uscalar_t)sti->sti_laddr_len; 942 switch (so->so_family) { 943 case AF_INET6: 944 case AF_INET: 945 break; 946 case AF_UNIX: { 947 struct sockaddr_un *soun = 948 (struct sockaddr_un *)sti->sti_laddr_sa; 949 struct vnode *vp, *rvp; 950 struct vattr vattr; 951 952 ASSERT(sti->sti_ux_bound_vp == NULL); 953 /* 954 * Create vnode for the specified path name. 955 * Keep vnode held with a reference in sti_ux_bound_vp. 956 * Use the vnode pointer as the address used in the 957 * bind with the transport. 958 * 959 * Use the same mode as in BSD. In particular this does 960 * not observe the umask. 961 */ 962 /* MAXPATHLEN + soun_family + nul termination */ 963 if (sti->sti_laddr_len > 964 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 965 error = ENAMETOOLONG; 966 eprintsoline(so, error); 967 goto done; 968 } 969 vattr.va_type = VSOCK; 970 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 971 vattr.va_mask = AT_TYPE|AT_MODE; 972 /* NOTE: holding so_lock */ 973 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 974 EXCL, 0, &vp, CRMKNOD, 0, 0); 975 if (error) { 976 if (error == EEXIST) 977 error = EADDRINUSE; 978 eprintsoline(so, error); 979 goto done; 980 } 981 /* 982 * Establish pointer from the underlying filesystem 983 * vnode to the socket node. 984 * sti_ux_bound_vp and v_stream->sd_vnode form the 985 * cross-linkage between the underlying filesystem 986 * node and the socket node. 987 */ 988 989 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 990 VN_HOLD(rvp); 991 VN_RELE(vp); 992 vp = rvp; 993 } 994 995 ASSERT(SOTOV(so)->v_stream); 996 mutex_enter(&vp->v_lock); 997 vp->v_stream = SOTOV(so)->v_stream; 998 sti->sti_ux_bound_vp = vp; 999 mutex_exit(&vp->v_lock); 1000 1001 /* 1002 * Use the vnode pointer value as a unique address 1003 * (together with the magic number to avoid conflicts 1004 * with implicit binds) in the transport provider. 1005 */ 1006 sti->sti_ux_laddr.soua_vp = 1007 (void *)sti->sti_ux_bound_vp; 1008 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1009 addr = &sti->sti_ux_laddr; 1010 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1011 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1012 addrlen, 1013 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1014 break; 1015 } 1016 } /* end switch (so->so_family) */ 1017 } 1018 1019 /* 1020 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1021 * the transport can start passing up T_CONN_IND messages 1022 * as soon as it receives the bind req and strsock_proto() 1023 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1024 */ 1025 if (flags & _SOBIND_LISTEN) { 1026 if ((so->so_state & SS_ACCEPTCONN) == 0) 1027 clear_acceptconn_on_err = B_TRUE; 1028 save_so_backlog = so->so_backlog; 1029 restore_backlog_on_err = B_TRUE; 1030 so->so_state |= SS_ACCEPTCONN; 1031 so->so_backlog = backlog; 1032 } 1033 1034 /* 1035 * If NL7C addr(s) have been configured check for addr/port match, 1036 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1037 * 1038 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1039 * family sockets only. If match mark as such. 1040 */ 1041 if (nl7c_enabled && ((addr != NULL && 1042 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1043 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1044 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1045 /* 1046 * NL7C is not supported in non-global zones, 1047 * we enforce this restriction here. 1048 */ 1049 if (so->so_zoneid == GLOBAL_ZONEID) { 1050 /* An NL7C socket, mark it */ 1051 sti->sti_nl7c_flags |= NL7C_ENABLED; 1052 if (nl7c == NULL) { 1053 /* 1054 * Was an AF_NCA bind() so add it to the 1055 * addr list for reporting purposes. 1056 */ 1057 nl7c = nl7c_add_addr(addr, addrlen); 1058 } 1059 } else 1060 nl7c = NULL; 1061 } 1062 1063 /* 1064 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1065 * for other transports we will send in a O_T_BIND_REQ. 1066 */ 1067 if (tcp_udp_xport && 1068 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1069 PRIM_type = T_BIND_REQ; 1070 1071 bind_req.PRIM_type = PRIM_type; 1072 bind_req.ADDR_length = addrlen; 1073 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1074 bind_req.CONIND_number = backlog; 1075 /* NOTE: holding so_lock while sleeping */ 1076 mp = soallocproto2(&bind_req, sizeof (bind_req), 1077 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1078 sti->sti_laddr_valid = 0; 1079 1080 /* Done using sti_laddr_sa - can drop the lock */ 1081 mutex_exit(&so->so_lock); 1082 1083 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1084 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1085 if (error) { 1086 eprintsoline(so, error); 1087 mutex_enter(&so->so_lock); 1088 goto done; 1089 } 1090 1091 mutex_enter(&so->so_lock); 1092 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1093 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1094 if (error) { 1095 eprintsoline(so, error); 1096 goto done; 1097 } 1098 ASSERT(mp); 1099 /* 1100 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1101 * strsock_proto while the lock was dropped above, the bind 1102 * is allowed to complete. 1103 */ 1104 1105 /* Mark as bound. This will be undone if we detect errors below. */ 1106 if (flags & _SOBIND_NOXLATE) { 1107 ASSERT(so->so_family == AF_UNIX); 1108 sti->sti_faddr_noxlate = 1; 1109 } 1110 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1111 so->so_state |= SS_ISBOUND; 1112 ASSERT(sti->sti_unbind_mp); 1113 1114 /* note that we've already set SS_ACCEPTCONN above */ 1115 1116 /* 1117 * Recompute addrlen - an unspecied bind sent down an 1118 * address of length zero but we expect the appropriate length 1119 * in return. 1120 */ 1121 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1122 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1123 1124 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1125 /* 1126 * The alignment restriction is really too strict but 1127 * we want enough alignment to inspect the fields of 1128 * a sockaddr_in. 1129 */ 1130 addr = sogetoff(mp, bind_ack->ADDR_offset, 1131 bind_ack->ADDR_length, 1132 __TPI_ALIGN_SIZE); 1133 if (addr == NULL) { 1134 freemsg(mp); 1135 error = EPROTO; 1136 eprintsoline(so, error); 1137 goto done; 1138 } 1139 if (!(flags & _SOBIND_UNSPEC)) { 1140 /* 1141 * Verify that the transport didn't return something we 1142 * did not want e.g. an address other than what we asked for. 1143 * 1144 * NOTE: These checks would go away if/when we switch to 1145 * using the new TPI (in which the transport would fail 1146 * the request instead of assigning a different address). 1147 * 1148 * NOTE2: For protocols that we don't know (i.e. any 1149 * other than AF_INET6, AF_INET and AF_UNIX), we 1150 * cannot know if the transport should be expected to 1151 * return the same address as that requested. 1152 * 1153 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1154 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1155 * 1156 * For example, in the case of netatalk it may be 1157 * inappropriate for the transport to return the 1158 * requested address (as it may have allocated a local 1159 * port number in behaviour similar to that of an 1160 * AF_INET bind request with a port number of zero). 1161 * 1162 * Given the definition of O_T_BIND_REQ, where the 1163 * transport may bind to an address other than the 1164 * requested address, it's not possible to determine 1165 * whether a returned address that differs from the 1166 * requested address is a reason to fail (because the 1167 * requested address was not available) or succeed 1168 * (because the transport allocated an appropriate 1169 * address and/or port). 1170 * 1171 * sockfs currently requires that the transport return 1172 * the requested address in the T_BIND_ACK, unless 1173 * there is code here to allow for any discrepancy. 1174 * Such code exists for AF_INET and AF_INET6. 1175 * 1176 * Netatalk chooses to return the requested address 1177 * rather than the (correct) allocated address. This 1178 * means that netatalk violates the TPI specification 1179 * (and would not function correctly if used from a 1180 * TLI application), but it does mean that it works 1181 * with sockfs. 1182 * 1183 * As noted above, using the newer XTI bind primitive 1184 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1185 * allow sockfs to be more sure about whether or not 1186 * the bind request had succeeded (as transports are 1187 * not permitted to bind to a different address than 1188 * that requested - they must return failure). 1189 * Unfortunately, support for T_BIND_REQ may not be 1190 * present in all transport implementations (netatalk, 1191 * for example, doesn't have it), making the 1192 * transition difficult. 1193 */ 1194 if (bind_ack->ADDR_length != addrlen) { 1195 /* Assumes that the requested address was in use */ 1196 freemsg(mp); 1197 error = EADDRINUSE; 1198 eprintsoline(so, error); 1199 goto done; 1200 } 1201 1202 switch (so->so_family) { 1203 case AF_INET6: 1204 case AF_INET: { 1205 sin_t *rname, *aname; 1206 1207 rname = (sin_t *)addr; 1208 aname = (sin_t *)sti->sti_laddr_sa; 1209 1210 /* 1211 * Take advantage of the alignment 1212 * of sin_port and sin6_port which fall 1213 * in the same place in their data structures. 1214 * Just use sin_port for either address family. 1215 * 1216 * This may become a problem if (heaven forbid) 1217 * there's a separate ipv6port_reserved... :-P 1218 * 1219 * Binding to port 0 has the semantics of letting 1220 * the transport bind to any port. 1221 * 1222 * If the transport is TCP or UDP since we had sent 1223 * a T_BIND_REQ we would not get a port other than 1224 * what we asked for. 1225 */ 1226 if (tcp_udp_xport) { 1227 /* 1228 * Pick up the new port number if we bound to 1229 * port 0. 1230 */ 1231 if (aname->sin_port == 0) 1232 aname->sin_port = rname->sin_port; 1233 sti->sti_laddr_valid = 1; 1234 break; 1235 } 1236 if (aname->sin_port != 0 && 1237 aname->sin_port != rname->sin_port) { 1238 freemsg(mp); 1239 error = EADDRINUSE; 1240 eprintsoline(so, error); 1241 goto done; 1242 } 1243 /* 1244 * Pick up the new port number if we bound to port 0. 1245 */ 1246 aname->sin_port = rname->sin_port; 1247 1248 /* 1249 * Unfortunately, addresses aren't _quite_ the same. 1250 */ 1251 if (so->so_family == AF_INET) { 1252 if (aname->sin_addr.s_addr != 1253 rname->sin_addr.s_addr) { 1254 freemsg(mp); 1255 error = EADDRNOTAVAIL; 1256 eprintsoline(so, error); 1257 goto done; 1258 } 1259 } else { 1260 sin6_t *rname6 = (sin6_t *)rname; 1261 sin6_t *aname6 = (sin6_t *)aname; 1262 1263 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1264 &rname6->sin6_addr)) { 1265 freemsg(mp); 1266 error = EADDRNOTAVAIL; 1267 eprintsoline(so, error); 1268 goto done; 1269 } 1270 } 1271 break; 1272 } 1273 case AF_UNIX: 1274 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1275 freemsg(mp); 1276 error = EADDRINUSE; 1277 eprintsoline(so, error); 1278 eprintso(so, 1279 ("addrlen %d, addr 0x%x, vp %p\n", 1280 addrlen, *((int *)addr), 1281 (void *)sti->sti_ux_bound_vp)); 1282 goto done; 1283 } 1284 sti->sti_laddr_valid = 1; 1285 break; 1286 default: 1287 /* 1288 * NOTE: This assumes that addresses can be 1289 * byte-compared for equivalence. 1290 */ 1291 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1292 freemsg(mp); 1293 error = EADDRINUSE; 1294 eprintsoline(so, error); 1295 goto done; 1296 } 1297 /* 1298 * Don't mark sti_laddr_valid, as we cannot be 1299 * sure that the returned address is the real 1300 * bound address when talking to an unknown 1301 * transport. 1302 */ 1303 break; 1304 } 1305 } else { 1306 /* 1307 * Save for returned address for getsockname. 1308 * Needed for unspecific bind unless transport supports 1309 * the TI_GETMYNAME ioctl. 1310 * Do this for AF_INET{,6} even though they do, as 1311 * caching info here is much better performance than 1312 * a TPI/STREAMS trip to the transport for getsockname. 1313 * Any which can't for some reason _must_ _not_ set 1314 * sti_laddr_valid here for the caching version of 1315 * getsockname to not break; 1316 */ 1317 switch (so->so_family) { 1318 case AF_UNIX: 1319 /* 1320 * Record the address bound with the transport 1321 * for use by socketpair. 1322 */ 1323 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1324 sti->sti_laddr_valid = 1; 1325 break; 1326 case AF_INET: 1327 case AF_INET6: 1328 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1329 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1330 sti->sti_laddr_valid = 1; 1331 break; 1332 default: 1333 /* 1334 * Don't mark sti_laddr_valid, as we cannot be 1335 * sure that the returned address is the real 1336 * bound address when talking to an unknown 1337 * transport. 1338 */ 1339 break; 1340 } 1341 } 1342 1343 if (nl7c != NULL) { 1344 /* Register listen()er sonode pointer with NL7C */ 1345 nl7c_listener_addr(nl7c, so); 1346 } 1347 1348 freemsg(mp); 1349 1350 done: 1351 if (error) { 1352 /* reset state & backlog to values held on entry */ 1353 if (clear_acceptconn_on_err == B_TRUE) 1354 so->so_state &= ~SS_ACCEPTCONN; 1355 if (restore_backlog_on_err == B_TRUE) 1356 so->so_backlog = save_so_backlog; 1357 1358 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1359 int err; 1360 1361 err = sotpi_unbind(so, 0); 1362 /* LINTED - statement has no consequent: if */ 1363 if (err) { 1364 eprintsoline(so, error); 1365 } else { 1366 ASSERT(!(so->so_state & SS_ISBOUND)); 1367 } 1368 } 1369 } 1370 if (!(flags & _SOBIND_LOCK_HELD)) { 1371 so_unlock_single(so, SOLOCKED); 1372 mutex_exit(&so->so_lock); 1373 } else { 1374 ASSERT(MUTEX_HELD(&so->so_lock)); 1375 ASSERT(so->so_flag & SOLOCKED); 1376 } 1377 return (error); 1378 } 1379 1380 /* bind the socket */ 1381 static int 1382 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1383 int flags, struct cred *cr) 1384 { 1385 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1386 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1387 1388 flags &= ~_SOBIND_SOCKETPAIR; 1389 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1390 } 1391 1392 /* 1393 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1394 * address, or when listen needs to unbind and bind. 1395 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1396 * so that a sobind can pick them up. 1397 */ 1398 static int 1399 sotpi_unbind(struct sonode *so, int flags) 1400 { 1401 struct T_unbind_req unbind_req; 1402 int error = 0; 1403 mblk_t *mp; 1404 sotpi_info_t *sti = SOTOTPI(so); 1405 1406 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1407 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1408 1409 ASSERT(MUTEX_HELD(&so->so_lock)); 1410 ASSERT(so->so_flag & SOLOCKED); 1411 1412 if (!(so->so_state & SS_ISBOUND)) { 1413 error = EINVAL; 1414 eprintsoline(so, error); 1415 goto done; 1416 } 1417 1418 mutex_exit(&so->so_lock); 1419 1420 /* 1421 * Flush the read and write side (except stream head read queue) 1422 * and send down T_UNBIND_REQ. 1423 */ 1424 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1425 1426 unbind_req.PRIM_type = T_UNBIND_REQ; 1427 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1428 0, _ALLOC_SLEEP, CRED()); 1429 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1430 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1431 mutex_enter(&so->so_lock); 1432 if (error) { 1433 eprintsoline(so, error); 1434 goto done; 1435 } 1436 1437 error = sowaitokack(so, T_UNBIND_REQ); 1438 if (error) { 1439 eprintsoline(so, error); 1440 goto done; 1441 } 1442 1443 /* 1444 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1445 * strsock_proto while the lock was dropped above, the unbind 1446 * is allowed to complete. 1447 */ 1448 if (!(flags & _SOUNBIND_REBIND)) { 1449 /* 1450 * Clear out bound address. 1451 */ 1452 vnode_t *vp; 1453 1454 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1455 sti->sti_ux_bound_vp = NULL; 1456 vn_rele_stream(vp); 1457 } 1458 /* Clear out address */ 1459 sti->sti_laddr_len = 0; 1460 } 1461 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1462 sti->sti_laddr_valid = 0; 1463 1464 done: 1465 1466 /* If the caller held the lock don't release it here */ 1467 ASSERT(MUTEX_HELD(&so->so_lock)); 1468 ASSERT(so->so_flag & SOLOCKED); 1469 1470 return (error); 1471 } 1472 1473 /* 1474 * listen on the socket. 1475 * For TPI conforming transports this has to first unbind with the transport 1476 * and then bind again using the new backlog. 1477 */ 1478 /* ARGSUSED */ 1479 int 1480 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1481 { 1482 int error = 0; 1483 sotpi_info_t *sti = SOTOTPI(so); 1484 1485 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1486 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1487 1488 if (sti->sti_serv_type == T_CLTS) 1489 return (EOPNOTSUPP); 1490 1491 /* 1492 * If the socket is ready to accept connections already, then 1493 * return without doing anything. This avoids a problem where 1494 * a second listen() call fails if a connection is pending and 1495 * leaves the socket unbound. Only when we are not unbinding 1496 * with the transport can we safely increase the backlog. 1497 */ 1498 if (so->so_state & SS_ACCEPTCONN && 1499 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1500 /*CONSTCOND*/ 1501 !solisten_tpi_tcp)) 1502 return (0); 1503 1504 if (so->so_state & SS_ISCONNECTED) 1505 return (EINVAL); 1506 1507 mutex_enter(&so->so_lock); 1508 so_lock_single(so); /* Set SOLOCKED */ 1509 1510 /* 1511 * If the listen doesn't change the backlog we do nothing. 1512 * This avoids an EPROTO error from the transport. 1513 */ 1514 if ((so->so_state & SS_ACCEPTCONN) && 1515 so->so_backlog == backlog) 1516 goto done; 1517 1518 if (!(so->so_state & SS_ISBOUND)) { 1519 /* 1520 * Must have been explicitly bound in the UNIX domain. 1521 */ 1522 if (so->so_family == AF_UNIX) { 1523 error = EINVAL; 1524 goto done; 1525 } 1526 error = sotpi_bindlisten(so, NULL, 0, backlog, 1527 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1528 } else if (backlog > 0) { 1529 /* 1530 * AF_INET{,6} hack to avoid losing the port. 1531 * Assumes that all AF_INET{,6} transports can handle a 1532 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1533 * has already bound thus it is possible to avoid the unbind. 1534 */ 1535 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1536 /*CONSTCOND*/ 1537 !solisten_tpi_tcp)) { 1538 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1539 if (error) 1540 goto done; 1541 } 1542 error = sotpi_bindlisten(so, NULL, 0, backlog, 1543 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1544 } else { 1545 so->so_state |= SS_ACCEPTCONN; 1546 so->so_backlog = backlog; 1547 } 1548 if (error) 1549 goto done; 1550 ASSERT(so->so_state & SS_ACCEPTCONN); 1551 done: 1552 so_unlock_single(so, SOLOCKED); 1553 mutex_exit(&so->so_lock); 1554 return (error); 1555 } 1556 1557 /* 1558 * Disconnect either a specified seqno or all (-1). 1559 * The former is used on listening sockets only. 1560 * 1561 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1562 * the current use of sodisconnect(seqno == -1) is only for shutdown 1563 * so there is no point (and potentially incorrect) to unbind. 1564 */ 1565 static int 1566 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1567 { 1568 struct T_discon_req discon_req; 1569 int error = 0; 1570 mblk_t *mp; 1571 1572 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1573 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1574 1575 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1576 mutex_enter(&so->so_lock); 1577 so_lock_single(so); /* Set SOLOCKED */ 1578 } else { 1579 ASSERT(MUTEX_HELD(&so->so_lock)); 1580 ASSERT(so->so_flag & SOLOCKED); 1581 } 1582 1583 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1584 error = EINVAL; 1585 eprintsoline(so, error); 1586 goto done; 1587 } 1588 1589 mutex_exit(&so->so_lock); 1590 /* 1591 * Flush the write side (unless this is a listener) 1592 * and then send down a T_DISCON_REQ. 1593 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1594 * and other messages.) 1595 */ 1596 if (!(so->so_state & SS_ACCEPTCONN)) 1597 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1598 1599 discon_req.PRIM_type = T_DISCON_REQ; 1600 discon_req.SEQ_number = seqno; 1601 mp = soallocproto1(&discon_req, sizeof (discon_req), 1602 0, _ALLOC_SLEEP, CRED()); 1603 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1604 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1605 mutex_enter(&so->so_lock); 1606 if (error) { 1607 eprintsoline(so, error); 1608 goto done; 1609 } 1610 1611 error = sowaitokack(so, T_DISCON_REQ); 1612 if (error) { 1613 eprintsoline(so, error); 1614 goto done; 1615 } 1616 /* 1617 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1618 * strsock_proto while the lock was dropped above, the disconnect 1619 * is allowed to complete. However, it is not possible to 1620 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1621 */ 1622 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1623 SOTOTPI(so)->sti_laddr_valid = 0; 1624 SOTOTPI(so)->sti_faddr_valid = 0; 1625 done: 1626 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1627 so_unlock_single(so, SOLOCKED); 1628 mutex_exit(&so->so_lock); 1629 } else { 1630 /* If the caller held the lock don't release it here */ 1631 ASSERT(MUTEX_HELD(&so->so_lock)); 1632 ASSERT(so->so_flag & SOLOCKED); 1633 } 1634 return (error); 1635 } 1636 1637 /* ARGSUSED */ 1638 int 1639 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1640 struct sonode **nsop) 1641 { 1642 struct T_conn_ind *conn_ind; 1643 struct T_conn_res *conn_res; 1644 int error = 0; 1645 mblk_t *mp, *ack_mp; 1646 struct sonode *nso; 1647 vnode_t *nvp; 1648 void *src; 1649 t_uscalar_t srclen; 1650 void *opt; 1651 t_uscalar_t optlen; 1652 t_scalar_t PRIM_type; 1653 t_scalar_t SEQ_number; 1654 size_t sinlen; 1655 sotpi_info_t *sti = SOTOTPI(so); 1656 sotpi_info_t *nsti; 1657 1658 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1659 (void *)so, fflag, (void *)nsop, 1660 pr_state(so->so_state, so->so_mode))); 1661 1662 /* 1663 * Defer single-threading the accepting socket until 1664 * the T_CONN_IND has been received and parsed and the 1665 * new sonode has been opened. 1666 */ 1667 1668 /* Check that we are not already connected */ 1669 if ((so->so_state & SS_ACCEPTCONN) == 0) 1670 goto conn_bad; 1671 again: 1672 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1673 goto e_bad; 1674 1675 ASSERT(mp != NULL); 1676 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1677 1678 /* 1679 * Save SEQ_number for error paths. 1680 */ 1681 SEQ_number = conn_ind->SEQ_number; 1682 1683 srclen = conn_ind->SRC_length; 1684 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1685 if (src == NULL) { 1686 error = EPROTO; 1687 freemsg(mp); 1688 eprintsoline(so, error); 1689 goto disconnect_unlocked; 1690 } 1691 optlen = conn_ind->OPT_length; 1692 switch (so->so_family) { 1693 case AF_INET: 1694 case AF_INET6: 1695 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1696 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1697 &opt, conn_ind->OPT_length); 1698 } else { 1699 /* 1700 * The transport (in this case TCP) hasn't sent up 1701 * a pointer to an instance for the accept fast-path. 1702 * Disable fast-path completely because the call to 1703 * sotpi_create() below would otherwise create an 1704 * incomplete TCP instance, which would lead to 1705 * problems when sockfs sends a normal T_CONN_RES 1706 * message down the new stream. 1707 */ 1708 if (sti->sti_direct) { 1709 int rval; 1710 /* 1711 * For consistency we inform tcp to disable 1712 * direct interface on the listener, though 1713 * we can certainly live without doing this 1714 * because no data will ever travel upstream 1715 * on the listening socket. 1716 */ 1717 sti->sti_direct = 0; 1718 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1719 0, 0, K_TO_K, cr, &rval); 1720 } 1721 opt = NULL; 1722 optlen = 0; 1723 } 1724 break; 1725 case AF_UNIX: 1726 default: 1727 if (optlen != 0) { 1728 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1729 __TPI_ALIGN_SIZE); 1730 if (opt == NULL) { 1731 error = EPROTO; 1732 freemsg(mp); 1733 eprintsoline(so, error); 1734 goto disconnect_unlocked; 1735 } 1736 } 1737 if (so->so_family == AF_UNIX) { 1738 if (!sti->sti_faddr_noxlate) { 1739 src = NULL; 1740 srclen = 0; 1741 } 1742 /* Extract src address from options */ 1743 if (optlen != 0) 1744 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1745 } 1746 break; 1747 } 1748 1749 /* 1750 * Create the new socket. 1751 */ 1752 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1753 if (nso == NULL) { 1754 ASSERT(error != 0); 1755 /* 1756 * Accept can not fail with ENOBUFS. sotpi_create 1757 * sleeps waiting for memory until a signal is caught 1758 * so return EINTR. 1759 */ 1760 freemsg(mp); 1761 if (error == ENOBUFS) 1762 error = EINTR; 1763 goto e_disc_unl; 1764 } 1765 nvp = SOTOV(nso); 1766 nsti = SOTOTPI(nso); 1767 1768 #ifdef DEBUG 1769 /* 1770 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1771 * it's inherited early to allow debugging of the accept code itself. 1772 */ 1773 nso->so_options |= so->so_options & SO_DEBUG; 1774 #endif /* DEBUG */ 1775 1776 /* 1777 * Save the SRC address from the T_CONN_IND 1778 * for getpeername to work on AF_UNIX and on transports that do not 1779 * support TI_GETPEERNAME. 1780 * 1781 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1782 * copyin_name(). 1783 */ 1784 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1785 error = EINVAL; 1786 freemsg(mp); 1787 eprintsoline(so, error); 1788 goto disconnect_vp_unlocked; 1789 } 1790 nsti->sti_faddr_len = (socklen_t)srclen; 1791 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1792 bcopy(src, nsti->sti_faddr_sa, srclen); 1793 nsti->sti_faddr_valid = 1; 1794 1795 /* 1796 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1797 */ 1798 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1799 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1800 cred_t *cr; 1801 pid_t cpid; 1802 1803 cr = msg_getcred(mp, &cpid); 1804 if (cr != NULL) { 1805 crhold(cr); 1806 nso->so_peercred = cr; 1807 nso->so_cpid = cpid; 1808 } 1809 freemsg(mp); 1810 1811 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1812 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1813 if (mp == NULL) { 1814 /* 1815 * Accept can not fail with ENOBUFS. 1816 * A signal was caught so return EINTR. 1817 */ 1818 error = EINTR; 1819 eprintsoline(so, error); 1820 goto disconnect_vp_unlocked; 1821 } 1822 conn_res = (struct T_conn_res *)mp->b_rptr; 1823 } else { 1824 /* 1825 * For efficency reasons we use msg_extractcred; no crhold 1826 * needed since db_credp is cleared (i.e., we move the cred 1827 * from the message to so_peercred. 1828 */ 1829 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1830 1831 mp->b_rptr = DB_BASE(mp); 1832 conn_res = (struct T_conn_res *)mp->b_rptr; 1833 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1834 1835 mblk_setcred(mp, cr, curproc->p_pid); 1836 } 1837 1838 /* 1839 * New socket must be bound at least in sockfs and, except for AF_INET, 1840 * (or AF_INET6) it also has to be bound in the transport provider. 1841 * We set the local address in the sonode from the T_OK_ACK of the 1842 * T_CONN_RES. For this reason the address we bind to here isn't 1843 * important. 1844 */ 1845 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1846 /*CONSTCOND*/ 1847 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1848 /* 1849 * Optimization for AF_INET{,6} transports 1850 * that can handle a T_CONN_RES without being bound. 1851 */ 1852 mutex_enter(&nso->so_lock); 1853 so_automatic_bind(nso); 1854 mutex_exit(&nso->so_lock); 1855 } else { 1856 /* Perform NULL bind with the transport provider. */ 1857 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1858 cr)) != 0) { 1859 ASSERT(error != ENOBUFS); 1860 freemsg(mp); 1861 eprintsoline(nso, error); 1862 goto disconnect_vp_unlocked; 1863 } 1864 } 1865 1866 /* 1867 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1868 * so that any data arriving on the new socket will cause the 1869 * appropriate signals to be delivered for the new socket. 1870 * 1871 * No other thread (except strsock_proto and strsock_misc) 1872 * can access the new socket thus we relax the locking. 1873 */ 1874 nso->so_pgrp = so->so_pgrp; 1875 nso->so_state |= so->so_state & SS_ASYNC; 1876 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1877 1878 if (nso->so_pgrp != 0) { 1879 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1880 eprintsoline(nso, error); 1881 error = 0; 1882 nso->so_pgrp = 0; 1883 } 1884 } 1885 1886 /* 1887 * Make note of the socket level options. TCP and IP level options 1888 * are already inherited. We could do all this after accept is 1889 * successful but doing it here simplifies code and no harm done 1890 * for error case. 1891 */ 1892 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1893 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1894 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1895 nso->so_sndbuf = so->so_sndbuf; 1896 nso->so_rcvbuf = so->so_rcvbuf; 1897 if (nso->so_options & SO_LINGER) 1898 nso->so_linger = so->so_linger; 1899 1900 /* 1901 * Note that the following sti_direct code path should be 1902 * removed once we are confident that the direct sockets 1903 * do not result in any degradation. 1904 */ 1905 if (sti->sti_direct) { 1906 1907 ASSERT(opt != NULL); 1908 1909 conn_res->OPT_length = optlen; 1910 conn_res->OPT_offset = MBLKL(mp); 1911 bcopy(&opt, mp->b_wptr, optlen); 1912 mp->b_wptr += optlen; 1913 conn_res->PRIM_type = T_CONN_RES; 1914 conn_res->ACCEPTOR_id = 0; 1915 PRIM_type = T_CONN_RES; 1916 1917 /* Send down the T_CONN_RES on acceptor STREAM */ 1918 error = kstrputmsg(SOTOV(nso), mp, NULL, 1919 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1920 if (error) { 1921 mutex_enter(&so->so_lock); 1922 so_lock_single(so); 1923 eprintsoline(so, error); 1924 goto disconnect_vp; 1925 } 1926 mutex_enter(&nso->so_lock); 1927 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1928 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1929 if (error) { 1930 mutex_exit(&nso->so_lock); 1931 mutex_enter(&so->so_lock); 1932 so_lock_single(so); 1933 eprintsoline(so, error); 1934 goto disconnect_vp; 1935 } 1936 if (nso->so_family == AF_INET) { 1937 sin_t *sin; 1938 1939 sin = (sin_t *)(ack_mp->b_rptr + 1940 sizeof (struct T_ok_ack)); 1941 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1942 nsti->sti_laddr_len = sizeof (sin_t); 1943 } else { 1944 sin6_t *sin6; 1945 1946 sin6 = (sin6_t *)(ack_mp->b_rptr + 1947 sizeof (struct T_ok_ack)); 1948 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1949 nsti->sti_laddr_len = sizeof (sin6_t); 1950 } 1951 freemsg(ack_mp); 1952 1953 nso->so_state |= SS_ISCONNECTED; 1954 nso->so_proto_handle = (sock_lower_handle_t)opt; 1955 nsti->sti_laddr_valid = 1; 1956 1957 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 1958 /* 1959 * A NL7C marked listen()er so the new socket 1960 * inherits the listen()er's NL7C state, except 1961 * for NL7C_POLLIN. 1962 * 1963 * Only call NL7C to process the new socket if 1964 * the listen socket allows blocking i/o. 1965 */ 1966 nsti->sti_nl7c_flags = 1967 sti->sti_nl7c_flags & (~NL7C_POLLIN); 1968 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1969 /* 1970 * Nonblocking accept() just make it 1971 * persist to defer processing to the 1972 * read-side syscall (e.g. read). 1973 */ 1974 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 1975 } else if (nl7c_process(nso, B_FALSE)) { 1976 /* 1977 * NL7C has completed processing on the 1978 * socket, close the socket and back to 1979 * the top to await the next T_CONN_IND. 1980 */ 1981 mutex_exit(&nso->so_lock); 1982 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1983 cr, NULL); 1984 VN_RELE(nvp); 1985 goto again; 1986 } 1987 /* Pass the new socket out */ 1988 } 1989 1990 mutex_exit(&nso->so_lock); 1991 1992 /* 1993 * It's possible, through the use of autopush for example, 1994 * that the acceptor stream may not support sti_direct 1995 * semantics. If the new socket does not support sti_direct 1996 * we issue a _SIOCSOCKFALLBACK to inform the transport 1997 * as we would in the I_PUSH case. 1998 */ 1999 if (nsti->sti_direct == 0) { 2000 int rval; 2001 2002 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2003 0, 0, K_TO_K, cr, &rval)) != 0) { 2004 mutex_enter(&so->so_lock); 2005 so_lock_single(so); 2006 eprintsoline(so, error); 2007 goto disconnect_vp; 2008 } 2009 } 2010 2011 /* 2012 * Pass out new socket. 2013 */ 2014 if (nsop != NULL) 2015 *nsop = nso; 2016 2017 return (0); 2018 } 2019 2020 /* 2021 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2022 * which don't support the FireEngine accept fast-path. It is also 2023 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2024 * again. Neither sockfs nor TCP attempt to find out if some other 2025 * random module has been inserted in between (in which case we 2026 * should follow TLI accept behaviour). We blindly assume the worst 2027 * case and revert back to old behaviour i.e. TCP will not send us 2028 * any option (eager) and the accept should happen on the listener 2029 * queue. Any queued T_conn_ind have already got their options removed 2030 * by so_sock2_stream() when "sockmod" was I_POP'd. 2031 */ 2032 /* 2033 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2034 */ 2035 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2036 #ifdef _ILP32 2037 queue_t *q; 2038 2039 /* 2040 * Find read queue in driver 2041 * Can safely do this since we "own" nso/nvp. 2042 */ 2043 q = strvp2wq(nvp)->q_next; 2044 while (SAMESTR(q)) 2045 q = q->q_next; 2046 q = RD(q); 2047 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2048 #else 2049 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2050 #endif /* _ILP32 */ 2051 conn_res->PRIM_type = O_T_CONN_RES; 2052 PRIM_type = O_T_CONN_RES; 2053 } else { 2054 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2055 conn_res->PRIM_type = T_CONN_RES; 2056 PRIM_type = T_CONN_RES; 2057 } 2058 conn_res->SEQ_number = SEQ_number; 2059 conn_res->OPT_length = 0; 2060 conn_res->OPT_offset = 0; 2061 2062 mutex_enter(&so->so_lock); 2063 so_lock_single(so); /* Set SOLOCKED */ 2064 mutex_exit(&so->so_lock); 2065 2066 error = kstrputmsg(SOTOV(so), mp, NULL, 2067 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2068 mutex_enter(&so->so_lock); 2069 if (error) { 2070 eprintsoline(so, error); 2071 goto disconnect_vp; 2072 } 2073 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2074 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2075 if (error) { 2076 eprintsoline(so, error); 2077 goto disconnect_vp; 2078 } 2079 mutex_exit(&so->so_lock); 2080 /* 2081 * If there is a sin/sin6 appended onto the T_OK_ACK use 2082 * that to set the local address. If this is not present 2083 * then we zero out the address and don't set the 2084 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2085 * the pathname from the listening socket. 2086 * In the case where this is TCP or an AF_UNIX socket the 2087 * client side may have queued data or a T_ORDREL in the 2088 * transport. Having now sent the T_CONN_RES we may receive 2089 * those queued messages at any time. Hold the acceptor 2090 * so_lock until its state and laddr are finalized. 2091 */ 2092 mutex_enter(&nso->so_lock); 2093 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2094 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2095 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2096 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2097 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2098 nsti->sti_laddr_len = sinlen; 2099 nsti->sti_laddr_valid = 1; 2100 } else if (nso->so_family == AF_UNIX) { 2101 ASSERT(so->so_family == AF_UNIX); 2102 nsti->sti_laddr_len = sti->sti_laddr_len; 2103 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2104 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2105 nsti->sti_laddr_len); 2106 nsti->sti_laddr_valid = 1; 2107 } else { 2108 nsti->sti_laddr_len = sti->sti_laddr_len; 2109 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2110 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2111 nsti->sti_laddr_sa->sa_family = nso->so_family; 2112 } 2113 nso->so_state |= SS_ISCONNECTED; 2114 mutex_exit(&nso->so_lock); 2115 2116 freemsg(ack_mp); 2117 2118 mutex_enter(&so->so_lock); 2119 so_unlock_single(so, SOLOCKED); 2120 mutex_exit(&so->so_lock); 2121 2122 /* 2123 * Pass out new socket. 2124 */ 2125 if (nsop != NULL) 2126 *nsop = nso; 2127 2128 return (0); 2129 2130 2131 eproto_disc_unl: 2132 error = EPROTO; 2133 e_disc_unl: 2134 eprintsoline(so, error); 2135 goto disconnect_unlocked; 2136 2137 pr_disc_vp_unl: 2138 eprintsoline(so, error); 2139 disconnect_vp_unlocked: 2140 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2141 VN_RELE(nvp); 2142 disconnect_unlocked: 2143 (void) sodisconnect(so, SEQ_number, 0); 2144 return (error); 2145 2146 pr_disc_vp: 2147 eprintsoline(so, error); 2148 disconnect_vp: 2149 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2150 so_unlock_single(so, SOLOCKED); 2151 mutex_exit(&so->so_lock); 2152 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2153 VN_RELE(nvp); 2154 return (error); 2155 2156 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2157 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2158 ? EOPNOTSUPP : EINVAL; 2159 e_bad: 2160 eprintsoline(so, error); 2161 return (error); 2162 } 2163 2164 /* 2165 * connect a socket. 2166 * 2167 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2168 * unconnect (by specifying a null address). 2169 */ 2170 int 2171 sotpi_connect(struct sonode *so, 2172 struct sockaddr *name, 2173 socklen_t namelen, 2174 int fflag, 2175 int flags, 2176 struct cred *cr) 2177 { 2178 struct T_conn_req conn_req; 2179 int error = 0; 2180 mblk_t *mp; 2181 void *src; 2182 socklen_t srclen; 2183 void *addr; 2184 socklen_t addrlen; 2185 boolean_t need_unlock; 2186 sotpi_info_t *sti = SOTOTPI(so); 2187 2188 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2189 (void *)so, (void *)name, namelen, fflag, flags, 2190 pr_state(so->so_state, so->so_mode))); 2191 2192 /* 2193 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2194 * avoid sleeping for memory with SOLOCKED held. 2195 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2196 * + sizeof (struct T_opthdr). 2197 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2198 * exceed sti_faddr_maxlen). 2199 */ 2200 mp = soallocproto(sizeof (struct T_conn_req) + 2201 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2202 cr); 2203 if (mp == NULL) { 2204 /* 2205 * Connect can not fail with ENOBUFS. A signal was 2206 * caught so return EINTR. 2207 */ 2208 error = EINTR; 2209 eprintsoline(so, error); 2210 return (error); 2211 } 2212 2213 mutex_enter(&so->so_lock); 2214 /* 2215 * Make sure there is a preallocated T_unbind_req message 2216 * before any binding. This message is allocated when the 2217 * socket is created. Since another thread can consume 2218 * so_unbind_mp by the time we return from so_lock_single(), 2219 * we should check the availability of so_unbind_mp after 2220 * we return from so_lock_single(). 2221 */ 2222 2223 so_lock_single(so); /* Set SOLOCKED */ 2224 need_unlock = B_TRUE; 2225 2226 if (sti->sti_unbind_mp == NULL) { 2227 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2228 /* NOTE: holding so_lock while sleeping */ 2229 sti->sti_unbind_mp = 2230 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2231 if (sti->sti_unbind_mp == NULL) { 2232 error = EINTR; 2233 goto done; 2234 } 2235 } 2236 2237 /* 2238 * Can't have done a listen before connecting. 2239 */ 2240 if (so->so_state & SS_ACCEPTCONN) { 2241 error = EOPNOTSUPP; 2242 goto done; 2243 } 2244 2245 /* 2246 * Must be bound with the transport 2247 */ 2248 if (!(so->so_state & SS_ISBOUND)) { 2249 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2250 /*CONSTCOND*/ 2251 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2252 /* 2253 * Optimization for AF_INET{,6} transports 2254 * that can handle a T_CONN_REQ without being bound. 2255 */ 2256 so_automatic_bind(so); 2257 } else { 2258 error = sotpi_bind(so, NULL, 0, 2259 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2260 if (error) 2261 goto done; 2262 } 2263 ASSERT(so->so_state & SS_ISBOUND); 2264 flags |= _SOCONNECT_DID_BIND; 2265 } 2266 2267 /* 2268 * Handle a connect to a name parameter of type AF_UNSPEC like a 2269 * connect to a null address. This is the portable method to 2270 * unconnect a socket. 2271 */ 2272 if ((namelen >= sizeof (sa_family_t)) && 2273 (name->sa_family == AF_UNSPEC)) { 2274 name = NULL; 2275 namelen = 0; 2276 } 2277 2278 /* 2279 * Check that we are not already connected. 2280 * A connection-oriented socket cannot be reconnected. 2281 * A connected connection-less socket can be 2282 * - connected to a different address by a subsequent connect 2283 * - "unconnected" by a connect to the NULL address 2284 */ 2285 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2286 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2287 if (so->so_mode & SM_CONNREQUIRED) { 2288 /* Connection-oriented socket */ 2289 error = so->so_state & SS_ISCONNECTED ? 2290 EISCONN : EALREADY; 2291 goto done; 2292 } 2293 /* Connection-less socket */ 2294 if (name == NULL) { 2295 /* 2296 * Remove the connected state and clear SO_DGRAM_ERRIND 2297 * since it was set when the socket was connected. 2298 * If this is UDP also send down a T_DISCON_REQ. 2299 */ 2300 int val; 2301 2302 if ((so->so_family == AF_INET || 2303 so->so_family == AF_INET6) && 2304 (so->so_type == SOCK_DGRAM || 2305 so->so_type == SOCK_RAW) && 2306 /*CONSTCOND*/ 2307 !soconnect_tpi_udp) { 2308 /* XXX What about implicitly unbinding here? */ 2309 error = sodisconnect(so, -1, 2310 _SODISCONNECT_LOCK_HELD); 2311 } else { 2312 so->so_state &= 2313 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2314 sti->sti_faddr_valid = 0; 2315 sti->sti_faddr_len = 0; 2316 } 2317 2318 /* Remove SOLOCKED since setsockopt will grab it */ 2319 so_unlock_single(so, SOLOCKED); 2320 mutex_exit(&so->so_lock); 2321 2322 val = 0; 2323 (void) sotpi_setsockopt(so, SOL_SOCKET, 2324 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2325 cr); 2326 2327 mutex_enter(&so->so_lock); 2328 so_lock_single(so); /* Set SOLOCKED */ 2329 goto done; 2330 } 2331 } 2332 ASSERT(so->so_state & SS_ISBOUND); 2333 2334 if (name == NULL || namelen == 0) { 2335 error = EINVAL; 2336 goto done; 2337 } 2338 /* 2339 * Mark the socket if sti_faddr_sa represents the transport level 2340 * address. 2341 */ 2342 if (flags & _SOCONNECT_NOXLATE) { 2343 struct sockaddr_ux *soaddr_ux; 2344 2345 ASSERT(so->so_family == AF_UNIX); 2346 if (namelen != sizeof (struct sockaddr_ux)) { 2347 error = EINVAL; 2348 goto done; 2349 } 2350 soaddr_ux = (struct sockaddr_ux *)name; 2351 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2352 namelen = sizeof (soaddr_ux->sou_addr); 2353 sti->sti_faddr_noxlate = 1; 2354 } 2355 2356 /* 2357 * Length and family checks. 2358 */ 2359 error = so_addr_verify(so, name, namelen); 2360 if (error) 2361 goto bad; 2362 2363 /* 2364 * Save foreign address. Needed for AF_UNIX as well as 2365 * transport providers that do not support TI_GETPEERNAME. 2366 * Also used for cached foreign address for TCP and UDP. 2367 */ 2368 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2369 error = EINVAL; 2370 goto done; 2371 } 2372 sti->sti_faddr_len = (socklen_t)namelen; 2373 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2374 bcopy(name, sti->sti_faddr_sa, namelen); 2375 sti->sti_faddr_valid = 1; 2376 2377 if (so->so_family == AF_UNIX) { 2378 if (sti->sti_faddr_noxlate) { 2379 /* 2380 * Already have a transport internal address. Do not 2381 * pass any (transport internal) source address. 2382 */ 2383 addr = sti->sti_faddr_sa; 2384 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2385 src = NULL; 2386 srclen = 0; 2387 } else { 2388 /* 2389 * Pass the sockaddr_un source address as an option 2390 * and translate the remote address. 2391 * Holding so_lock thus sti_laddr_sa can not change. 2392 */ 2393 src = sti->sti_laddr_sa; 2394 srclen = (t_uscalar_t)sti->sti_laddr_len; 2395 dprintso(so, 1, 2396 ("sotpi_connect UNIX: srclen %d, src %p\n", 2397 srclen, src)); 2398 error = so_ux_addr_xlate(so, 2399 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2400 (flags & _SOCONNECT_XPG4_2), 2401 &addr, &addrlen); 2402 if (error) 2403 goto bad; 2404 } 2405 } else { 2406 addr = sti->sti_faddr_sa; 2407 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2408 src = NULL; 2409 srclen = 0; 2410 } 2411 /* 2412 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2413 * option which asks the transport provider to send T_UDERR_IND 2414 * messages. These T_UDERR_IND messages are used to return connected 2415 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2416 * 2417 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2418 * we send down a T_CONN_REQ. This is needed to let the 2419 * transport assign a local address that is consistent with 2420 * the remote address. Applications depend on a getsockname() 2421 * after a connect() to retrieve the "source" IP address for 2422 * the connected socket. Invalidate the cached local address 2423 * to force getsockname() to enquire of the transport. 2424 */ 2425 if (!(so->so_mode & SM_CONNREQUIRED)) { 2426 /* 2427 * Datagram socket. 2428 */ 2429 int32_t val; 2430 2431 so_unlock_single(so, SOLOCKED); 2432 mutex_exit(&so->so_lock); 2433 2434 val = 1; 2435 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2436 &val, (t_uscalar_t)sizeof (val), cr); 2437 2438 mutex_enter(&so->so_lock); 2439 so_lock_single(so); /* Set SOLOCKED */ 2440 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2441 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2442 soconnect_tpi_udp) { 2443 soisconnected(so); 2444 goto done; 2445 } 2446 /* 2447 * Send down T_CONN_REQ etc. 2448 * Clear fflag to avoid returning EWOULDBLOCK. 2449 */ 2450 fflag = 0; 2451 ASSERT(so->so_family != AF_UNIX); 2452 sti->sti_laddr_valid = 0; 2453 } else if (sti->sti_laddr_len != 0) { 2454 /* 2455 * If the local address or port was "any" then it may be 2456 * changed by the transport as a result of the 2457 * connect. Invalidate the cached version if we have one. 2458 */ 2459 switch (so->so_family) { 2460 case AF_INET: 2461 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2462 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2463 INADDR_ANY || 2464 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2465 sti->sti_laddr_valid = 0; 2466 break; 2467 2468 case AF_INET6: 2469 ASSERT(sti->sti_laddr_len == 2470 (socklen_t)sizeof (sin6_t)); 2471 if (IN6_IS_ADDR_UNSPECIFIED( 2472 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2473 IN6_IS_ADDR_V4MAPPED_ANY( 2474 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2475 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2476 sti->sti_laddr_valid = 0; 2477 break; 2478 2479 default: 2480 break; 2481 } 2482 } 2483 2484 /* 2485 * Check for failure of an earlier call 2486 */ 2487 if (so->so_error != 0) 2488 goto so_bad; 2489 2490 /* 2491 * Send down T_CONN_REQ. Message was allocated above. 2492 */ 2493 conn_req.PRIM_type = T_CONN_REQ; 2494 conn_req.DEST_length = addrlen; 2495 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2496 if (srclen == 0) { 2497 conn_req.OPT_length = 0; 2498 conn_req.OPT_offset = 0; 2499 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2500 soappendmsg(mp, addr, addrlen); 2501 } else { 2502 /* 2503 * There is a AF_UNIX sockaddr_un to include as a source 2504 * address option. 2505 */ 2506 struct T_opthdr toh; 2507 2508 toh.level = SOL_SOCKET; 2509 toh.name = SO_SRCADDR; 2510 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2511 toh.status = 0; 2512 conn_req.OPT_length = 2513 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2514 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2515 _TPI_ALIGN_TOPT(addrlen)); 2516 2517 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2518 soappendmsg(mp, addr, addrlen); 2519 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2520 soappendmsg(mp, &toh, sizeof (toh)); 2521 soappendmsg(mp, src, srclen); 2522 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2523 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2524 } 2525 /* 2526 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2527 * in order to have the right state when the T_CONN_CON shows up. 2528 */ 2529 soisconnecting(so); 2530 mutex_exit(&so->so_lock); 2531 2532 if (AU_AUDITING()) 2533 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2534 2535 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2536 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2537 mp = NULL; 2538 mutex_enter(&so->so_lock); 2539 if (error != 0) 2540 goto bad; 2541 2542 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2543 goto bad; 2544 2545 /* Allow other threads to access the socket */ 2546 so_unlock_single(so, SOLOCKED); 2547 need_unlock = B_FALSE; 2548 2549 /* 2550 * Wait until we get a T_CONN_CON or an error 2551 */ 2552 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2553 so_lock_single(so); /* Set SOLOCKED */ 2554 need_unlock = B_TRUE; 2555 } 2556 2557 done: 2558 freemsg(mp); 2559 switch (error) { 2560 case EINPROGRESS: 2561 case EALREADY: 2562 case EISCONN: 2563 case EINTR: 2564 /* Non-fatal errors */ 2565 sti->sti_laddr_valid = 0; 2566 /* FALLTHRU */ 2567 case 0: 2568 break; 2569 default: 2570 ASSERT(need_unlock); 2571 /* 2572 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2573 * and invalidate local-address cache 2574 */ 2575 so->so_state &= ~SS_ISCONNECTING; 2576 sti->sti_laddr_valid = 0; 2577 /* A discon_ind might have already unbound us */ 2578 if ((flags & _SOCONNECT_DID_BIND) && 2579 (so->so_state & SS_ISBOUND)) { 2580 int err; 2581 2582 err = sotpi_unbind(so, 0); 2583 /* LINTED - statement has no conseq */ 2584 if (err) { 2585 eprintsoline(so, err); 2586 } 2587 } 2588 break; 2589 } 2590 if (need_unlock) 2591 so_unlock_single(so, SOLOCKED); 2592 mutex_exit(&so->so_lock); 2593 return (error); 2594 2595 so_bad: error = sogeterr(so, B_TRUE); 2596 bad: eprintsoline(so, error); 2597 goto done; 2598 } 2599 2600 /* ARGSUSED */ 2601 int 2602 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2603 { 2604 struct T_ordrel_req ordrel_req; 2605 mblk_t *mp; 2606 uint_t old_state, state_change; 2607 int error = 0; 2608 sotpi_info_t *sti = SOTOTPI(so); 2609 2610 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2611 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2612 2613 mutex_enter(&so->so_lock); 2614 so_lock_single(so); /* Set SOLOCKED */ 2615 2616 /* 2617 * SunOS 4.X has no check for datagram sockets. 2618 * 5.X checks that it is connected (ENOTCONN) 2619 * X/Open requires that we check the connected state. 2620 */ 2621 if (!(so->so_state & SS_ISCONNECTED)) { 2622 if (!xnet_skip_checks) { 2623 error = ENOTCONN; 2624 if (xnet_check_print) { 2625 printf("sockfs: X/Open shutdown check " 2626 "caused ENOTCONN\n"); 2627 } 2628 } 2629 goto done; 2630 } 2631 /* 2632 * Record the current state and then perform any state changes. 2633 * Then use the difference between the old and new states to 2634 * determine which messages need to be sent. 2635 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2636 * duplicate calls to shutdown(). 2637 */ 2638 old_state = so->so_state; 2639 2640 switch (how) { 2641 case 0: 2642 socantrcvmore(so); 2643 break; 2644 case 1: 2645 socantsendmore(so); 2646 break; 2647 case 2: 2648 socantsendmore(so); 2649 socantrcvmore(so); 2650 break; 2651 default: 2652 error = EINVAL; 2653 goto done; 2654 } 2655 2656 /* 2657 * Assumes that the SS_CANT* flags are never cleared in the above code. 2658 */ 2659 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2660 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2661 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2662 2663 switch (state_change) { 2664 case 0: 2665 dprintso(so, 1, 2666 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2667 so->so_state)); 2668 goto done; 2669 2670 case SS_CANTRCVMORE: 2671 mutex_exit(&so->so_lock); 2672 strseteof(SOTOV(so), 1); 2673 /* 2674 * strseteof takes care of read side wakeups, 2675 * pollwakeups, and signals. 2676 */ 2677 /* 2678 * Get the read lock before flushing data to avoid problems 2679 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2680 */ 2681 mutex_enter(&so->so_lock); 2682 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2683 mutex_exit(&so->so_lock); 2684 2685 /* Flush read side queue */ 2686 strflushrq(SOTOV(so), FLUSHALL); 2687 2688 mutex_enter(&so->so_lock); 2689 so_unlock_read(so); /* Clear SOREADLOCKED */ 2690 break; 2691 2692 case SS_CANTSENDMORE: 2693 mutex_exit(&so->so_lock); 2694 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2695 mutex_enter(&so->so_lock); 2696 break; 2697 2698 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2699 mutex_exit(&so->so_lock); 2700 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2701 strseteof(SOTOV(so), 1); 2702 /* 2703 * strseteof takes care of read side wakeups, 2704 * pollwakeups, and signals. 2705 */ 2706 /* 2707 * Get the read lock before flushing data to avoid problems 2708 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2709 */ 2710 mutex_enter(&so->so_lock); 2711 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2712 mutex_exit(&so->so_lock); 2713 2714 /* Flush read side queue */ 2715 strflushrq(SOTOV(so), FLUSHALL); 2716 2717 mutex_enter(&so->so_lock); 2718 so_unlock_read(so); /* Clear SOREADLOCKED */ 2719 break; 2720 } 2721 2722 ASSERT(MUTEX_HELD(&so->so_lock)); 2723 2724 /* 2725 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2726 * was set due to this call and the new state has both of them set: 2727 * Send the AF_UNIX close indication 2728 * For T_COTS send a discon_ind 2729 * 2730 * If cantsend was set due to this call: 2731 * For T_COTSORD send an ordrel_ind 2732 * 2733 * Note that for T_CLTS there is no message sent here. 2734 */ 2735 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2736 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2737 /* 2738 * For SunOS 4.X compatibility we tell the other end 2739 * that we are unable to receive at this point. 2740 */ 2741 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2742 so_unix_close(so); 2743 2744 if (sti->sti_serv_type == T_COTS) 2745 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2746 } 2747 if ((state_change & SS_CANTSENDMORE) && 2748 (sti->sti_serv_type == T_COTS_ORD)) { 2749 /* Send an orderly release */ 2750 ordrel_req.PRIM_type = T_ORDREL_REQ; 2751 2752 mutex_exit(&so->so_lock); 2753 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2754 0, _ALLOC_SLEEP, cr); 2755 /* 2756 * Send down the T_ORDREL_REQ even if there is flow control. 2757 * This prevents shutdown from blocking. 2758 * Note that there is no T_OK_ACK for ordrel_req. 2759 */ 2760 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2761 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2762 mutex_enter(&so->so_lock); 2763 if (error) { 2764 eprintsoline(so, error); 2765 goto done; 2766 } 2767 } 2768 2769 done: 2770 so_unlock_single(so, SOLOCKED); 2771 mutex_exit(&so->so_lock); 2772 return (error); 2773 } 2774 2775 /* 2776 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2777 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2778 * that we have closed. 2779 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2780 * T_UNITDATA_REQ containing the same option. 2781 * 2782 * For SOCK_DGRAM half-connections (somebody connected to this end 2783 * but this end is not connect) we don't know where to send any 2784 * SO_UNIX_CLOSE. 2785 * 2786 * We have to ignore stream head errors just in case there has been 2787 * a shutdown(output). 2788 * Ignore any flow control to try to get the message more quickly to the peer. 2789 * While locally ignoring flow control solves the problem when there 2790 * is only the loopback transport on the stream it would not provide 2791 * the correct AF_UNIX socket semantics when one or more modules have 2792 * been pushed. 2793 */ 2794 void 2795 so_unix_close(struct sonode *so) 2796 { 2797 int error; 2798 struct T_opthdr toh; 2799 mblk_t *mp; 2800 sotpi_info_t *sti = SOTOTPI(so); 2801 2802 ASSERT(MUTEX_HELD(&so->so_lock)); 2803 2804 ASSERT(so->so_family == AF_UNIX); 2805 2806 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2807 (SS_ISCONNECTED|SS_ISBOUND)) 2808 return; 2809 2810 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2811 (void *)so, pr_state(so->so_state, so->so_mode))); 2812 2813 toh.level = SOL_SOCKET; 2814 toh.name = SO_UNIX_CLOSE; 2815 2816 /* zero length + header */ 2817 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2818 toh.status = 0; 2819 2820 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2821 struct T_optdata_req tdr; 2822 2823 tdr.PRIM_type = T_OPTDATA_REQ; 2824 tdr.DATA_flag = 0; 2825 2826 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2827 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2828 2829 /* NOTE: holding so_lock while sleeping */ 2830 mp = soallocproto2(&tdr, sizeof (tdr), 2831 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2832 } else { 2833 struct T_unitdata_req tudr; 2834 void *addr; 2835 socklen_t addrlen; 2836 void *src; 2837 socklen_t srclen; 2838 struct T_opthdr toh2; 2839 t_scalar_t size; 2840 2841 /* Connecteded DGRAM socket */ 2842 2843 /* 2844 * For AF_UNIX the destination address is translated to 2845 * an internal name and the source address is passed as 2846 * an option. 2847 */ 2848 /* 2849 * Length and family checks. 2850 */ 2851 error = so_addr_verify(so, sti->sti_faddr_sa, 2852 (t_uscalar_t)sti->sti_faddr_len); 2853 if (error) { 2854 eprintsoline(so, error); 2855 return; 2856 } 2857 if (sti->sti_faddr_noxlate) { 2858 /* 2859 * Already have a transport internal address. Do not 2860 * pass any (transport internal) source address. 2861 */ 2862 addr = sti->sti_faddr_sa; 2863 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2864 src = NULL; 2865 srclen = 0; 2866 } else { 2867 /* 2868 * Pass the sockaddr_un source address as an option 2869 * and translate the remote address. 2870 * Holding so_lock thus sti_laddr_sa can not change. 2871 */ 2872 src = sti->sti_laddr_sa; 2873 srclen = (socklen_t)sti->sti_laddr_len; 2874 dprintso(so, 1, 2875 ("so_ux_close: srclen %d, src %p\n", 2876 srclen, src)); 2877 error = so_ux_addr_xlate(so, 2878 sti->sti_faddr_sa, 2879 (socklen_t)sti->sti_faddr_len, 0, 2880 &addr, &addrlen); 2881 if (error) { 2882 eprintsoline(so, error); 2883 return; 2884 } 2885 } 2886 tudr.PRIM_type = T_UNITDATA_REQ; 2887 tudr.DEST_length = addrlen; 2888 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2889 if (srclen == 0) { 2890 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2891 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2892 _TPI_ALIGN_TOPT(addrlen)); 2893 2894 size = tudr.OPT_offset + tudr.OPT_length; 2895 /* NOTE: holding so_lock while sleeping */ 2896 mp = soallocproto2(&tudr, sizeof (tudr), 2897 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2898 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2899 soappendmsg(mp, &toh, sizeof (toh)); 2900 } else { 2901 /* 2902 * There is a AF_UNIX sockaddr_un to include as a 2903 * source address option. 2904 */ 2905 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2906 _TPI_ALIGN_TOPT(srclen)); 2907 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2908 _TPI_ALIGN_TOPT(addrlen)); 2909 2910 toh2.level = SOL_SOCKET; 2911 toh2.name = SO_SRCADDR; 2912 toh2.len = (t_uscalar_t)(srclen + 2913 sizeof (struct T_opthdr)); 2914 toh2.status = 0; 2915 2916 size = tudr.OPT_offset + tudr.OPT_length; 2917 2918 /* NOTE: holding so_lock while sleeping */ 2919 mp = soallocproto2(&tudr, sizeof (tudr), 2920 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2921 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2922 soappendmsg(mp, &toh, sizeof (toh)); 2923 soappendmsg(mp, &toh2, sizeof (toh2)); 2924 soappendmsg(mp, src, srclen); 2925 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2926 } 2927 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2928 } 2929 mutex_exit(&so->so_lock); 2930 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2931 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2932 mutex_enter(&so->so_lock); 2933 } 2934 2935 /* 2936 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2937 * In addition, the caller typically verifies that there is some 2938 * potential state to clear by checking 2939 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2940 * before calling this routine. 2941 * Note that such a check can be made without holding so_lock since 2942 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2943 * decrements sti_oobsigcnt. 2944 * 2945 * When data is read *after* the point that all pending 2946 * oob data has been consumed the oob indication is cleared. 2947 * 2948 * This logic keeps select/poll returning POLLRDBAND and 2949 * SIOCATMARK returning true until we have read past 2950 * the mark. 2951 */ 2952 static void 2953 sorecv_update_oobstate(struct sonode *so) 2954 { 2955 sotpi_info_t *sti = SOTOTPI(so); 2956 2957 mutex_enter(&so->so_lock); 2958 ASSERT(so_verify_oobstate(so)); 2959 dprintso(so, 1, 2960 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2961 sti->sti_oobsigcnt, 2962 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2963 if (sti->sti_oobsigcnt == 0) { 2964 /* No more pending oob indications */ 2965 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2966 freemsg(so->so_oobmsg); 2967 so->so_oobmsg = NULL; 2968 } 2969 ASSERT(so_verify_oobstate(so)); 2970 mutex_exit(&so->so_lock); 2971 } 2972 2973 /* 2974 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2975 */ 2976 static int 2977 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2978 { 2979 sotpi_info_t *sti = SOTOTPI(so); 2980 int error = 0; 2981 mblk_t *tmp = NULL; 2982 mblk_t *pmp = NULL; 2983 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 2984 2985 ASSERT(nmp != NULL); 2986 2987 while (nmp != NULL && uiop->uio_resid > 0) { 2988 ssize_t n; 2989 2990 if (DB_TYPE(nmp) == M_DATA) { 2991 /* 2992 * We have some data, uiomove up to resid bytes. 2993 */ 2994 n = MIN(MBLKL(nmp), uiop->uio_resid); 2995 if (n > 0) 2996 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2997 nmp->b_rptr += n; 2998 if (nmp->b_rptr == nmp->b_wptr) { 2999 pmp = nmp; 3000 nmp = nmp->b_cont; 3001 } 3002 if (error) 3003 break; 3004 } else { 3005 /* 3006 * We only handle data, save for caller to handle. 3007 */ 3008 if (pmp != NULL) { 3009 pmp->b_cont = nmp->b_cont; 3010 } 3011 nmp->b_cont = NULL; 3012 if (*rmp == NULL) { 3013 *rmp = nmp; 3014 } else { 3015 tmp->b_cont = nmp; 3016 } 3017 nmp = nmp->b_cont; 3018 tmp = nmp; 3019 } 3020 } 3021 if (pmp != NULL) { 3022 /* Free any mblk_t(s) which we have consumed */ 3023 pmp->b_cont = NULL; 3024 freemsg(sti->sti_nl7c_rcv_mp); 3025 } 3026 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3027 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3028 if (error == 0) { 3029 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3030 3031 error = p->r_v.r_v2; 3032 p->r_v.r_v2 = 0; 3033 } 3034 rp->r_vals = sti->sti_nl7c_rcv_rval; 3035 sti->sti_nl7c_rcv_rval = 0; 3036 } else { 3037 /* More mblk_t(s) to process so no rval to return */ 3038 rp->r_vals = 0; 3039 } 3040 return (error); 3041 } 3042 /* 3043 * Receive the next message on the queue. 3044 * If msg_controllen is non-zero when called the caller is interested in 3045 * any received control info (options). 3046 * If msg_namelen is non-zero when called the caller is interested in 3047 * any received source address. 3048 * The routine returns with msg_control and msg_name pointing to 3049 * kmem_alloc'ed memory which the caller has to free. 3050 */ 3051 /* ARGSUSED */ 3052 int 3053 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3054 struct cred *cr) 3055 { 3056 union T_primitives *tpr; 3057 mblk_t *mp; 3058 uchar_t pri; 3059 int pflag, opflag; 3060 void *control; 3061 t_uscalar_t controllen; 3062 t_uscalar_t namelen; 3063 int so_state = so->so_state; /* Snapshot */ 3064 ssize_t saved_resid; 3065 rval_t rval; 3066 int flags; 3067 clock_t timout; 3068 int error = 0; 3069 sotpi_info_t *sti = SOTOTPI(so); 3070 3071 flags = msg->msg_flags; 3072 msg->msg_flags = 0; 3073 3074 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3075 (void *)so, (void *)msg, flags, 3076 pr_state(so->so_state, so->so_mode), so->so_error)); 3077 3078 if (so->so_version == SOV_STREAM) { 3079 so_update_attrs(so, SOACC); 3080 /* The imaginary "sockmod" has been popped - act as a stream */ 3081 return (strread(SOTOV(so), uiop, cr)); 3082 } 3083 3084 /* 3085 * If we are not connected because we have never been connected 3086 * we return ENOTCONN. If we have been connected (but are no longer 3087 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3088 * the EOF. 3089 * 3090 * An alternative would be to post an ENOTCONN error in stream head 3091 * (read+write) and clear it when we're connected. However, that error 3092 * would cause incorrect poll/select behavior! 3093 */ 3094 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3095 (so->so_mode & SM_CONNREQUIRED)) { 3096 return (ENOTCONN); 3097 } 3098 3099 /* 3100 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3101 * after checking that the read queue is empty) and returns zero. 3102 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3103 * is zero. 3104 */ 3105 3106 if (flags & MSG_OOB) { 3107 /* Check that the transport supports OOB */ 3108 if (!(so->so_mode & SM_EXDATA)) 3109 return (EOPNOTSUPP); 3110 so_update_attrs(so, SOACC); 3111 return (sorecvoob(so, msg, uiop, flags, 3112 (so->so_options & SO_OOBINLINE))); 3113 } 3114 3115 so_update_attrs(so, SOACC); 3116 3117 /* 3118 * Set msg_controllen and msg_namelen to zero here to make it 3119 * simpler in the cases that no control or name is returned. 3120 */ 3121 controllen = msg->msg_controllen; 3122 namelen = msg->msg_namelen; 3123 msg->msg_controllen = 0; 3124 msg->msg_namelen = 0; 3125 3126 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3127 namelen, controllen)); 3128 3129 mutex_enter(&so->so_lock); 3130 /* 3131 * If an NL7C enabled socket and not waiting for write data. 3132 */ 3133 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3134 NL7C_ENABLED) { 3135 if (sti->sti_nl7c_uri) { 3136 /* Close uri processing for a previous request */ 3137 nl7c_close(so); 3138 } 3139 if ((so_state & SS_CANTRCVMORE) && 3140 sti->sti_nl7c_rcv_mp == NULL) { 3141 /* Nothing to process, EOF */ 3142 mutex_exit(&so->so_lock); 3143 return (0); 3144 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3145 /* Persistent NL7C socket, try to process request */ 3146 boolean_t ret; 3147 3148 ret = nl7c_process(so, 3149 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3150 rval.r_vals = sti->sti_nl7c_rcv_rval; 3151 error = rval.r_v.r_v2; 3152 if (error) { 3153 /* Error of some sort, return it */ 3154 mutex_exit(&so->so_lock); 3155 return (error); 3156 } 3157 if (sti->sti_nl7c_flags && 3158 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3159 /* 3160 * Still an NL7C socket and no data 3161 * to pass up to the caller. 3162 */ 3163 mutex_exit(&so->so_lock); 3164 if (ret) { 3165 /* EOF */ 3166 return (0); 3167 } else { 3168 /* Need more data */ 3169 return (EAGAIN); 3170 } 3171 } 3172 } else { 3173 /* 3174 * Not persistent so no further NL7C processing. 3175 */ 3176 sti->sti_nl7c_flags = 0; 3177 } 3178 } 3179 /* 3180 * Only one reader is allowed at any given time. This is needed 3181 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3182 * 3183 * This is slightly different that BSD behavior in that it fails with 3184 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3185 * is single-threaded using sblock(), which is dropped while waiting 3186 * for data to appear. The difference shows up e.g. if one 3187 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3188 * does use nonblocking io and different threads are reading each 3189 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3190 * in this case as long as the read queue doesn't get empty. 3191 * In this implementation the thread using nonblocking io can 3192 * get an EWOULDBLOCK error due to the blocking thread executing 3193 * e.g. in the uiomove in kstrgetmsg. 3194 * This difference is not believed to be significant. 3195 */ 3196 /* Set SOREADLOCKED */ 3197 error = so_lock_read_intr(so, 3198 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3199 mutex_exit(&so->so_lock); 3200 if (error) 3201 return (error); 3202 3203 /* 3204 * Tell kstrgetmsg to not inspect the stream head errors until all 3205 * queued data has been consumed. 3206 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3207 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3208 * 3209 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3210 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3211 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3212 */ 3213 pflag = MSG_ANY | MSG_DELAYERROR; 3214 if (flags & MSG_PEEK) { 3215 pflag |= MSG_IPEEK; 3216 flags &= ~MSG_WAITALL; 3217 } 3218 if (so->so_mode & SM_ATOMIC) 3219 pflag |= MSG_DISCARDTAIL; 3220 3221 if (flags & MSG_DONTWAIT) 3222 timout = 0; 3223 else 3224 timout = -1; 3225 opflag = pflag; 3226 retry: 3227 saved_resid = uiop->uio_resid; 3228 pri = 0; 3229 mp = NULL; 3230 if (sti->sti_nl7c_rcv_mp != NULL) { 3231 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3232 error = nl7c_sorecv(so, &mp, uiop, &rval); 3233 } else { 3234 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3235 timout, &rval); 3236 } 3237 if (error != 0) { 3238 /* kstrgetmsg returns ETIME when timeout expires */ 3239 if (error == ETIME) 3240 error = EWOULDBLOCK; 3241 goto out; 3242 } 3243 /* 3244 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3245 * For non-datagrams MOREDATA is used to set MSG_EOR. 3246 */ 3247 ASSERT(!(rval.r_val1 & MORECTL)); 3248 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3249 msg->msg_flags |= MSG_TRUNC; 3250 3251 if (mp == NULL) { 3252 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3253 /* 3254 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3255 * The draft Posix socket spec states that the mark should 3256 * not be cleared when peeking. We follow the latter. 3257 */ 3258 if ((so->so_state & 3259 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3260 (uiop->uio_resid != saved_resid) && 3261 !(flags & MSG_PEEK)) { 3262 sorecv_update_oobstate(so); 3263 } 3264 3265 mutex_enter(&so->so_lock); 3266 /* Set MSG_EOR based on MOREDATA */ 3267 if (!(rval.r_val1 & MOREDATA)) { 3268 if (so->so_state & SS_SAVEDEOR) { 3269 msg->msg_flags |= MSG_EOR; 3270 so->so_state &= ~SS_SAVEDEOR; 3271 } 3272 } 3273 /* 3274 * If some data was received (i.e. not EOF) and the 3275 * read/recv* has not been satisfied wait for some more. 3276 */ 3277 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3278 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3279 mutex_exit(&so->so_lock); 3280 pflag = opflag | MSG_NOMARK; 3281 goto retry; 3282 } 3283 goto out_locked; 3284 } 3285 3286 /* strsock_proto has already verified length and alignment */ 3287 tpr = (union T_primitives *)mp->b_rptr; 3288 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3289 3290 switch (tpr->type) { 3291 case T_DATA_IND: { 3292 if ((so->so_state & 3293 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3294 (uiop->uio_resid != saved_resid) && 3295 !(flags & MSG_PEEK)) { 3296 sorecv_update_oobstate(so); 3297 } 3298 3299 /* 3300 * Set msg_flags to MSG_EOR based on 3301 * MORE_flag and MOREDATA. 3302 */ 3303 mutex_enter(&so->so_lock); 3304 so->so_state &= ~SS_SAVEDEOR; 3305 if (!(tpr->data_ind.MORE_flag & 1)) { 3306 if (!(rval.r_val1 & MOREDATA)) 3307 msg->msg_flags |= MSG_EOR; 3308 else 3309 so->so_state |= SS_SAVEDEOR; 3310 } 3311 freemsg(mp); 3312 /* 3313 * If some data was received (i.e. not EOF) and the 3314 * read/recv* has not been satisfied wait for some more. 3315 */ 3316 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3317 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3318 mutex_exit(&so->so_lock); 3319 pflag = opflag | MSG_NOMARK; 3320 goto retry; 3321 } 3322 goto out_locked; 3323 } 3324 case T_UNITDATA_IND: { 3325 void *addr; 3326 t_uscalar_t addrlen; 3327 void *abuf; 3328 t_uscalar_t optlen; 3329 void *opt; 3330 3331 if ((so->so_state & 3332 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3333 (uiop->uio_resid != saved_resid) && 3334 !(flags & MSG_PEEK)) { 3335 sorecv_update_oobstate(so); 3336 } 3337 3338 if (namelen != 0) { 3339 /* Caller wants source address */ 3340 addrlen = tpr->unitdata_ind.SRC_length; 3341 addr = sogetoff(mp, 3342 tpr->unitdata_ind.SRC_offset, 3343 addrlen, 1); 3344 if (addr == NULL) { 3345 freemsg(mp); 3346 error = EPROTO; 3347 eprintsoline(so, error); 3348 goto out; 3349 } 3350 if (so->so_family == AF_UNIX) { 3351 /* 3352 * Can not use the transport level address. 3353 * If there is a SO_SRCADDR option carrying 3354 * the socket level address it will be 3355 * extracted below. 3356 */ 3357 addr = NULL; 3358 addrlen = 0; 3359 } 3360 } 3361 optlen = tpr->unitdata_ind.OPT_length; 3362 if (optlen != 0) { 3363 t_uscalar_t ncontrollen; 3364 3365 /* 3366 * Extract any source address option. 3367 * Determine how large cmsg buffer is needed. 3368 */ 3369 opt = sogetoff(mp, 3370 tpr->unitdata_ind.OPT_offset, 3371 optlen, __TPI_ALIGN_SIZE); 3372 3373 if (opt == NULL) { 3374 freemsg(mp); 3375 error = EPROTO; 3376 eprintsoline(so, error); 3377 goto out; 3378 } 3379 if (so->so_family == AF_UNIX) 3380 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3381 ncontrollen = so_cmsglen(mp, opt, optlen, 3382 !(flags & MSG_XPG4_2)); 3383 if (controllen != 0) 3384 controllen = ncontrollen; 3385 else if (ncontrollen != 0) 3386 msg->msg_flags |= MSG_CTRUNC; 3387 } else { 3388 controllen = 0; 3389 } 3390 3391 if (namelen != 0) { 3392 /* 3393 * Return address to caller. 3394 * Caller handles truncation if length 3395 * exceeds msg_namelen. 3396 * NOTE: AF_UNIX NUL termination is ensured by 3397 * the sender's copyin_name(). 3398 */ 3399 abuf = kmem_alloc(addrlen, KM_SLEEP); 3400 3401 bcopy(addr, abuf, addrlen); 3402 msg->msg_name = abuf; 3403 msg->msg_namelen = addrlen; 3404 } 3405 3406 if (controllen != 0) { 3407 /* 3408 * Return control msg to caller. 3409 * Caller handles truncation if length 3410 * exceeds msg_controllen. 3411 */ 3412 control = kmem_zalloc(controllen, KM_SLEEP); 3413 3414 error = so_opt2cmsg(mp, opt, optlen, 3415 !(flags & MSG_XPG4_2), 3416 control, controllen); 3417 if (error) { 3418 freemsg(mp); 3419 if (msg->msg_namelen != 0) 3420 kmem_free(msg->msg_name, 3421 msg->msg_namelen); 3422 kmem_free(control, controllen); 3423 eprintsoline(so, error); 3424 goto out; 3425 } 3426 msg->msg_control = control; 3427 msg->msg_controllen = controllen; 3428 } 3429 3430 freemsg(mp); 3431 goto out; 3432 } 3433 case T_OPTDATA_IND: { 3434 struct T_optdata_req *tdr; 3435 void *opt; 3436 t_uscalar_t optlen; 3437 3438 if ((so->so_state & 3439 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3440 (uiop->uio_resid != saved_resid) && 3441 !(flags & MSG_PEEK)) { 3442 sorecv_update_oobstate(so); 3443 } 3444 3445 tdr = (struct T_optdata_req *)mp->b_rptr; 3446 optlen = tdr->OPT_length; 3447 if (optlen != 0) { 3448 t_uscalar_t ncontrollen; 3449 /* 3450 * Determine how large cmsg buffer is needed. 3451 */ 3452 opt = sogetoff(mp, 3453 tpr->optdata_ind.OPT_offset, 3454 optlen, __TPI_ALIGN_SIZE); 3455 3456 if (opt == NULL) { 3457 freemsg(mp); 3458 error = EPROTO; 3459 eprintsoline(so, error); 3460 goto out; 3461 } 3462 3463 ncontrollen = so_cmsglen(mp, opt, optlen, 3464 !(flags & MSG_XPG4_2)); 3465 if (controllen != 0) 3466 controllen = ncontrollen; 3467 else if (ncontrollen != 0) 3468 msg->msg_flags |= MSG_CTRUNC; 3469 } else { 3470 controllen = 0; 3471 } 3472 3473 if (controllen != 0) { 3474 /* 3475 * Return control msg to caller. 3476 * Caller handles truncation if length 3477 * exceeds msg_controllen. 3478 */ 3479 control = kmem_zalloc(controllen, KM_SLEEP); 3480 3481 error = so_opt2cmsg(mp, opt, optlen, 3482 !(flags & MSG_XPG4_2), 3483 control, controllen); 3484 if (error) { 3485 freemsg(mp); 3486 kmem_free(control, controllen); 3487 eprintsoline(so, error); 3488 goto out; 3489 } 3490 msg->msg_control = control; 3491 msg->msg_controllen = controllen; 3492 } 3493 3494 /* 3495 * Set msg_flags to MSG_EOR based on 3496 * DATA_flag and MOREDATA. 3497 */ 3498 mutex_enter(&so->so_lock); 3499 so->so_state &= ~SS_SAVEDEOR; 3500 if (!(tpr->data_ind.MORE_flag & 1)) { 3501 if (!(rval.r_val1 & MOREDATA)) 3502 msg->msg_flags |= MSG_EOR; 3503 else 3504 so->so_state |= SS_SAVEDEOR; 3505 } 3506 freemsg(mp); 3507 /* 3508 * If some data was received (i.e. not EOF) and the 3509 * read/recv* has not been satisfied wait for some more. 3510 * Not possible to wait if control info was received. 3511 */ 3512 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3513 controllen == 0 && 3514 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3515 mutex_exit(&so->so_lock); 3516 pflag = opflag | MSG_NOMARK; 3517 goto retry; 3518 } 3519 goto out_locked; 3520 } 3521 case T_EXDATA_IND: { 3522 dprintso(so, 1, 3523 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3524 "state %s\n", 3525 sti->sti_oobsigcnt, sti->sti_oobcnt, 3526 saved_resid - uiop->uio_resid, 3527 pr_state(so->so_state, so->so_mode))); 3528 /* 3529 * kstrgetmsg handles MSGMARK so there is nothing to 3530 * inspect in the T_EXDATA_IND. 3531 * strsock_proto makes the stream head queue the T_EXDATA_IND 3532 * as a separate message with no M_DATA component. Furthermore, 3533 * the stream head does not consolidate M_DATA messages onto 3534 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3535 * remains a message by itself. This is needed since MSGMARK 3536 * marks both the whole message as well as the last byte 3537 * of the message. 3538 */ 3539 freemsg(mp); 3540 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3541 if (flags & MSG_PEEK) { 3542 /* 3543 * Even though we are peeking we consume the 3544 * T_EXDATA_IND thereby moving the mark information 3545 * to SS_RCVATMARK. Then the oob code below will 3546 * retry the peeking kstrgetmsg. 3547 * Note that the stream head read queue is 3548 * never flushed without holding SOREADLOCKED 3549 * thus the T_EXDATA_IND can not disappear 3550 * underneath us. 3551 */ 3552 dprintso(so, 1, 3553 ("sotpi_recvmsg: consume EXDATA_IND " 3554 "counts %d/%d state %s\n", 3555 sti->sti_oobsigcnt, 3556 sti->sti_oobcnt, 3557 pr_state(so->so_state, so->so_mode))); 3558 3559 pflag = MSG_ANY | MSG_DELAYERROR; 3560 if (so->so_mode & SM_ATOMIC) 3561 pflag |= MSG_DISCARDTAIL; 3562 3563 pri = 0; 3564 mp = NULL; 3565 3566 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3567 &pri, &pflag, (clock_t)-1, &rval); 3568 ASSERT(uiop->uio_resid == saved_resid); 3569 3570 if (error) { 3571 #ifdef SOCK_DEBUG 3572 if (error != EWOULDBLOCK && error != EINTR) { 3573 eprintsoline(so, error); 3574 } 3575 #endif /* SOCK_DEBUG */ 3576 goto out; 3577 } 3578 ASSERT(mp); 3579 tpr = (union T_primitives *)mp->b_rptr; 3580 ASSERT(tpr->type == T_EXDATA_IND); 3581 freemsg(mp); 3582 } /* end "if (flags & MSG_PEEK)" */ 3583 3584 /* 3585 * Decrement the number of queued and pending oob. 3586 * 3587 * SS_RCVATMARK is cleared when we read past a mark. 3588 * SS_HAVEOOBDATA is cleared when we've read past the 3589 * last mark. 3590 * SS_OOBPEND is cleared if we've read past the last 3591 * mark and no (new) SIGURG has been posted. 3592 */ 3593 mutex_enter(&so->so_lock); 3594 ASSERT(so_verify_oobstate(so)); 3595 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3596 ASSERT(sti->sti_oobsigcnt > 0); 3597 sti->sti_oobsigcnt--; 3598 ASSERT(sti->sti_oobcnt > 0); 3599 sti->sti_oobcnt--; 3600 /* 3601 * Since the T_EXDATA_IND has been removed from the stream 3602 * head, but we have not read data past the mark, 3603 * sockfs needs to track that the socket is still at the mark. 3604 * 3605 * Since no data was received call kstrgetmsg again to wait 3606 * for data. 3607 */ 3608 so->so_state |= SS_RCVATMARK; 3609 mutex_exit(&so->so_lock); 3610 dprintso(so, 1, 3611 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3612 sti->sti_oobsigcnt, sti->sti_oobcnt, 3613 pr_state(so->so_state, so->so_mode))); 3614 pflag = opflag; 3615 goto retry; 3616 } 3617 default: 3618 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3619 (void *)so, tpr->type, (void *)mp); 3620 ASSERT(0); 3621 freemsg(mp); 3622 error = EPROTO; 3623 eprintsoline(so, error); 3624 goto out; 3625 } 3626 /* NOTREACHED */ 3627 out: 3628 mutex_enter(&so->so_lock); 3629 out_locked: 3630 so_unlock_read(so); /* Clear SOREADLOCKED */ 3631 mutex_exit(&so->so_lock); 3632 return (error); 3633 } 3634 3635 /* 3636 * Sending data with options on a datagram socket. 3637 * Assumes caller has verified that SS_ISBOUND etc. are set. 3638 */ 3639 static int 3640 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3641 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3642 { 3643 struct T_unitdata_req tudr; 3644 mblk_t *mp; 3645 int error; 3646 void *addr; 3647 socklen_t addrlen; 3648 void *src; 3649 socklen_t srclen; 3650 ssize_t len; 3651 int size; 3652 struct T_opthdr toh; 3653 struct fdbuf *fdbuf; 3654 t_uscalar_t optlen; 3655 void *fds; 3656 int fdlen; 3657 sotpi_info_t *sti = SOTOTPI(so); 3658 3659 ASSERT(name && namelen); 3660 ASSERT(control && controllen); 3661 3662 len = uiop->uio_resid; 3663 if (len > (ssize_t)sti->sti_tidu_size) { 3664 return (EMSGSIZE); 3665 } 3666 3667 /* 3668 * For AF_UNIX the destination address is translated to an internal 3669 * name and the source address is passed as an option. 3670 * Also, file descriptors are passed as file pointers in an 3671 * option. 3672 */ 3673 3674 /* 3675 * Length and family checks. 3676 */ 3677 error = so_addr_verify(so, name, namelen); 3678 if (error) { 3679 eprintsoline(so, error); 3680 return (error); 3681 } 3682 if (so->so_family == AF_UNIX) { 3683 if (sti->sti_faddr_noxlate) { 3684 /* 3685 * Already have a transport internal address. Do not 3686 * pass any (transport internal) source address. 3687 */ 3688 addr = name; 3689 addrlen = namelen; 3690 src = NULL; 3691 srclen = 0; 3692 } else { 3693 /* 3694 * Pass the sockaddr_un source address as an option 3695 * and translate the remote address. 3696 * 3697 * Note that this code does not prevent sti_laddr_sa 3698 * from changing while it is being used. Thus 3699 * if an unbind+bind occurs concurrently with this 3700 * send the peer might see a partially new and a 3701 * partially old "from" address. 3702 */ 3703 src = sti->sti_laddr_sa; 3704 srclen = (t_uscalar_t)sti->sti_laddr_len; 3705 dprintso(so, 1, 3706 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3707 srclen, src)); 3708 error = so_ux_addr_xlate(so, name, namelen, 3709 (flags & MSG_XPG4_2), 3710 &addr, &addrlen); 3711 if (error) { 3712 eprintsoline(so, error); 3713 return (error); 3714 } 3715 } 3716 } else { 3717 addr = name; 3718 addrlen = namelen; 3719 src = NULL; 3720 srclen = 0; 3721 } 3722 optlen = so_optlen(control, controllen, 3723 !(flags & MSG_XPG4_2)); 3724 tudr.PRIM_type = T_UNITDATA_REQ; 3725 tudr.DEST_length = addrlen; 3726 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3727 if (srclen != 0) 3728 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3729 _TPI_ALIGN_TOPT(srclen)); 3730 else 3731 tudr.OPT_length = optlen; 3732 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3733 _TPI_ALIGN_TOPT(addrlen)); 3734 3735 size = tudr.OPT_offset + tudr.OPT_length; 3736 3737 /* 3738 * File descriptors only when SM_FDPASSING set. 3739 */ 3740 error = so_getfdopt(control, controllen, 3741 !(flags & MSG_XPG4_2), &fds, &fdlen); 3742 if (error) 3743 return (error); 3744 if (fdlen != -1) { 3745 if (!(so->so_mode & SM_FDPASSING)) 3746 return (EOPNOTSUPP); 3747 3748 error = fdbuf_create(fds, fdlen, &fdbuf); 3749 if (error) 3750 return (error); 3751 mp = fdbuf_allocmsg(size, fdbuf); 3752 } else { 3753 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3754 if (mp == NULL) { 3755 /* 3756 * Caught a signal waiting for memory. 3757 * Let send* return EINTR. 3758 */ 3759 return (EINTR); 3760 } 3761 } 3762 soappendmsg(mp, &tudr, sizeof (tudr)); 3763 soappendmsg(mp, addr, addrlen); 3764 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3765 3766 if (fdlen != -1) { 3767 ASSERT(fdbuf != NULL); 3768 toh.level = SOL_SOCKET; 3769 toh.name = SO_FILEP; 3770 toh.len = fdbuf->fd_size + 3771 (t_uscalar_t)sizeof (struct T_opthdr); 3772 toh.status = 0; 3773 soappendmsg(mp, &toh, sizeof (toh)); 3774 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3775 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3776 } 3777 if (srclen != 0) { 3778 /* 3779 * There is a AF_UNIX sockaddr_un to include as a source 3780 * address option. 3781 */ 3782 toh.level = SOL_SOCKET; 3783 toh.name = SO_SRCADDR; 3784 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3785 toh.status = 0; 3786 soappendmsg(mp, &toh, sizeof (toh)); 3787 soappendmsg(mp, src, srclen); 3788 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3789 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3790 } 3791 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3792 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3793 /* At most 3 bytes left in the message */ 3794 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3795 ASSERT(MBLKL(mp) <= (ssize_t)size); 3796 3797 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3798 if (AU_AUDITING()) 3799 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3800 3801 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3802 #ifdef SOCK_DEBUG 3803 if (error) { 3804 eprintsoline(so, error); 3805 } 3806 #endif /* SOCK_DEBUG */ 3807 return (error); 3808 } 3809 3810 /* 3811 * Sending data with options on a connected stream socket. 3812 * Assumes caller has verified that SS_ISCONNECTED is set. 3813 */ 3814 static int 3815 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3816 t_uscalar_t controllen, int flags) 3817 { 3818 struct T_optdata_req tdr; 3819 mblk_t *mp; 3820 int error; 3821 ssize_t iosize; 3822 int size; 3823 struct fdbuf *fdbuf; 3824 t_uscalar_t optlen; 3825 void *fds; 3826 int fdlen; 3827 struct T_opthdr toh; 3828 sotpi_info_t *sti = SOTOTPI(so); 3829 3830 dprintso(so, 1, 3831 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3832 3833 /* 3834 * Has to be bound and connected. However, since no locks are 3835 * held the state could have changed after sotpi_sendmsg checked it 3836 * thus it is not possible to ASSERT on the state. 3837 */ 3838 3839 /* Options on connection-oriented only when SM_OPTDATA set. */ 3840 if (!(so->so_mode & SM_OPTDATA)) 3841 return (EOPNOTSUPP); 3842 3843 do { 3844 /* 3845 * Set the MORE flag if uio_resid does not fit in this 3846 * message or if the caller passed in "more". 3847 * Error for transports with zero tidu_size. 3848 */ 3849 tdr.PRIM_type = T_OPTDATA_REQ; 3850 iosize = sti->sti_tidu_size; 3851 if (iosize <= 0) 3852 return (EMSGSIZE); 3853 if (uiop->uio_resid > iosize) { 3854 tdr.DATA_flag = 1; 3855 } else { 3856 if (more) 3857 tdr.DATA_flag = 1; 3858 else 3859 tdr.DATA_flag = 0; 3860 iosize = uiop->uio_resid; 3861 } 3862 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3863 tdr.DATA_flag, iosize)); 3864 3865 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3866 tdr.OPT_length = optlen; 3867 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3868 3869 size = (int)sizeof (tdr) + optlen; 3870 /* 3871 * File descriptors only when SM_FDPASSING set. 3872 */ 3873 error = so_getfdopt(control, controllen, 3874 !(flags & MSG_XPG4_2), &fds, &fdlen); 3875 if (error) 3876 return (error); 3877 if (fdlen != -1) { 3878 if (!(so->so_mode & SM_FDPASSING)) 3879 return (EOPNOTSUPP); 3880 3881 error = fdbuf_create(fds, fdlen, &fdbuf); 3882 if (error) 3883 return (error); 3884 mp = fdbuf_allocmsg(size, fdbuf); 3885 } else { 3886 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3887 if (mp == NULL) { 3888 /* 3889 * Caught a signal waiting for memory. 3890 * Let send* return EINTR. 3891 */ 3892 return (EINTR); 3893 } 3894 } 3895 soappendmsg(mp, &tdr, sizeof (tdr)); 3896 3897 if (fdlen != -1) { 3898 ASSERT(fdbuf != NULL); 3899 toh.level = SOL_SOCKET; 3900 toh.name = SO_FILEP; 3901 toh.len = fdbuf->fd_size + 3902 (t_uscalar_t)sizeof (struct T_opthdr); 3903 toh.status = 0; 3904 soappendmsg(mp, &toh, sizeof (toh)); 3905 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3906 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3907 } 3908 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3909 /* At most 3 bytes left in the message */ 3910 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3911 ASSERT(MBLKL(mp) <= (ssize_t)size); 3912 3913 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3914 3915 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3916 0, MSG_BAND, 0); 3917 if (error) { 3918 eprintsoline(so, error); 3919 return (error); 3920 } 3921 control = NULL; 3922 if (uiop->uio_resid > 0) { 3923 /* 3924 * Recheck for fatal errors. Fail write even though 3925 * some data have been written. This is consistent 3926 * with strwrite semantics and BSD sockets semantics. 3927 */ 3928 if (so->so_state & SS_CANTSENDMORE) { 3929 eprintsoline(so, error); 3930 return (EPIPE); 3931 } 3932 if (so->so_error != 0) { 3933 mutex_enter(&so->so_lock); 3934 error = sogeterr(so, B_TRUE); 3935 mutex_exit(&so->so_lock); 3936 if (error != 0) { 3937 eprintsoline(so, error); 3938 return (error); 3939 } 3940 } 3941 } 3942 } while (uiop->uio_resid > 0); 3943 return (0); 3944 } 3945 3946 /* 3947 * Sending data on a datagram socket. 3948 * Assumes caller has verified that SS_ISBOUND etc. are set. 3949 * 3950 * For AF_UNIX the destination address is translated to an internal 3951 * name and the source address is passed as an option. 3952 */ 3953 int 3954 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3955 struct uio *uiop, int flags) 3956 { 3957 struct T_unitdata_req tudr; 3958 mblk_t *mp; 3959 int error; 3960 void *addr; 3961 socklen_t addrlen; 3962 void *src; 3963 socklen_t srclen; 3964 ssize_t len; 3965 sotpi_info_t *sti = SOTOTPI(so); 3966 3967 ASSERT(name != NULL && namelen != 0); 3968 3969 len = uiop->uio_resid; 3970 if (len > sti->sti_tidu_size) { 3971 error = EMSGSIZE; 3972 goto done; 3973 } 3974 3975 /* Length and family checks */ 3976 error = so_addr_verify(so, name, namelen); 3977 if (error != 0) 3978 goto done; 3979 3980 if (sti->sti_direct) 3981 return (sodgram_direct(so, name, namelen, uiop, flags)); 3982 3983 if (so->so_family == AF_UNIX) { 3984 if (sti->sti_faddr_noxlate) { 3985 /* 3986 * Already have a transport internal address. Do not 3987 * pass any (transport internal) source address. 3988 */ 3989 addr = name; 3990 addrlen = namelen; 3991 src = NULL; 3992 srclen = 0; 3993 } else { 3994 /* 3995 * Pass the sockaddr_un source address as an option 3996 * and translate the remote address. 3997 * 3998 * Note that this code does not prevent sti_laddr_sa 3999 * from changing while it is being used. Thus 4000 * if an unbind+bind occurs concurrently with this 4001 * send the peer might see a partially new and a 4002 * partially old "from" address. 4003 */ 4004 src = sti->sti_laddr_sa; 4005 srclen = (socklen_t)sti->sti_laddr_len; 4006 dprintso(so, 1, 4007 ("sosend_dgram UNIX: srclen %d, src %p\n", 4008 srclen, src)); 4009 error = so_ux_addr_xlate(so, name, namelen, 4010 (flags & MSG_XPG4_2), 4011 &addr, &addrlen); 4012 if (error) { 4013 eprintsoline(so, error); 4014 goto done; 4015 } 4016 } 4017 } else { 4018 addr = name; 4019 addrlen = namelen; 4020 src = NULL; 4021 srclen = 0; 4022 } 4023 tudr.PRIM_type = T_UNITDATA_REQ; 4024 tudr.DEST_length = addrlen; 4025 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4026 if (srclen == 0) { 4027 tudr.OPT_length = 0; 4028 tudr.OPT_offset = 0; 4029 4030 mp = soallocproto2(&tudr, sizeof (tudr), 4031 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4032 if (mp == NULL) { 4033 /* 4034 * Caught a signal waiting for memory. 4035 * Let send* return EINTR. 4036 */ 4037 error = EINTR; 4038 goto done; 4039 } 4040 } else { 4041 /* 4042 * There is a AF_UNIX sockaddr_un to include as a source 4043 * address option. 4044 */ 4045 struct T_opthdr toh; 4046 ssize_t size; 4047 4048 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4049 _TPI_ALIGN_TOPT(srclen)); 4050 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4051 _TPI_ALIGN_TOPT(addrlen)); 4052 4053 toh.level = SOL_SOCKET; 4054 toh.name = SO_SRCADDR; 4055 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4056 toh.status = 0; 4057 4058 size = tudr.OPT_offset + tudr.OPT_length; 4059 mp = soallocproto2(&tudr, sizeof (tudr), 4060 addr, addrlen, size, _ALLOC_INTR, CRED()); 4061 if (mp == NULL) { 4062 /* 4063 * Caught a signal waiting for memory. 4064 * Let send* return EINTR. 4065 */ 4066 error = EINTR; 4067 goto done; 4068 } 4069 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4070 soappendmsg(mp, &toh, sizeof (toh)); 4071 soappendmsg(mp, src, srclen); 4072 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4073 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4074 } 4075 4076 if (AU_AUDITING()) 4077 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4078 4079 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4080 done: 4081 #ifdef SOCK_DEBUG 4082 if (error) { 4083 eprintsoline(so, error); 4084 } 4085 #endif /* SOCK_DEBUG */ 4086 return (error); 4087 } 4088 4089 /* 4090 * Sending data on a connected stream socket. 4091 * Assumes caller has verified that SS_ISCONNECTED is set. 4092 */ 4093 int 4094 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4095 int sflag) 4096 { 4097 struct T_data_req tdr; 4098 mblk_t *mp; 4099 int error; 4100 ssize_t iosize; 4101 sotpi_info_t *sti = SOTOTPI(so); 4102 4103 dprintso(so, 1, 4104 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4105 (void *)so, uiop->uio_resid, prim, sflag)); 4106 4107 /* 4108 * Has to be bound and connected. However, since no locks are 4109 * held the state could have changed after sotpi_sendmsg checked it 4110 * thus it is not possible to ASSERT on the state. 4111 */ 4112 4113 do { 4114 /* 4115 * Set the MORE flag if uio_resid does not fit in this 4116 * message or if the caller passed in "more". 4117 * Error for transports with zero tidu_size. 4118 */ 4119 tdr.PRIM_type = prim; 4120 iosize = sti->sti_tidu_size; 4121 if (iosize <= 0) 4122 return (EMSGSIZE); 4123 if (uiop->uio_resid > iosize) { 4124 tdr.MORE_flag = 1; 4125 } else { 4126 if (more) 4127 tdr.MORE_flag = 1; 4128 else 4129 tdr.MORE_flag = 0; 4130 iosize = uiop->uio_resid; 4131 } 4132 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4133 prim, tdr.MORE_flag, iosize)); 4134 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4135 if (mp == NULL) { 4136 /* 4137 * Caught a signal waiting for memory. 4138 * Let send* return EINTR. 4139 */ 4140 return (EINTR); 4141 } 4142 4143 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4144 0, sflag | MSG_BAND, 0); 4145 if (error) { 4146 eprintsoline(so, error); 4147 return (error); 4148 } 4149 if (uiop->uio_resid > 0) { 4150 /* 4151 * Recheck for fatal errors. Fail write even though 4152 * some data have been written. This is consistent 4153 * with strwrite semantics and BSD sockets semantics. 4154 */ 4155 if (so->so_state & SS_CANTSENDMORE) { 4156 eprintsoline(so, error); 4157 return (EPIPE); 4158 } 4159 if (so->so_error != 0) { 4160 mutex_enter(&so->so_lock); 4161 error = sogeterr(so, B_TRUE); 4162 mutex_exit(&so->so_lock); 4163 if (error != 0) { 4164 eprintsoline(so, error); 4165 return (error); 4166 } 4167 } 4168 } 4169 } while (uiop->uio_resid > 0); 4170 return (0); 4171 } 4172 4173 /* 4174 * Check the state for errors and call the appropriate send function. 4175 * 4176 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4177 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4178 * after sending the message. 4179 */ 4180 static int 4181 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4182 struct cred *cr) 4183 { 4184 int so_state; 4185 int so_mode; 4186 int error; 4187 struct sockaddr *name; 4188 t_uscalar_t namelen; 4189 int dontroute; 4190 int flags; 4191 sotpi_info_t *sti = SOTOTPI(so); 4192 4193 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4194 (void *)so, (void *)msg, msg->msg_flags, 4195 pr_state(so->so_state, so->so_mode), so->so_error)); 4196 4197 if (so->so_version == SOV_STREAM) { 4198 /* The imaginary "sockmod" has been popped - act as a stream */ 4199 so_update_attrs(so, SOMOD); 4200 return (strwrite(SOTOV(so), uiop, cr)); 4201 } 4202 4203 mutex_enter(&so->so_lock); 4204 so_state = so->so_state; 4205 4206 if (so_state & SS_CANTSENDMORE) { 4207 mutex_exit(&so->so_lock); 4208 return (EPIPE); 4209 } 4210 4211 if (so->so_error != 0) { 4212 error = sogeterr(so, B_TRUE); 4213 if (error != 0) { 4214 mutex_exit(&so->so_lock); 4215 return (error); 4216 } 4217 } 4218 4219 name = (struct sockaddr *)msg->msg_name; 4220 namelen = msg->msg_namelen; 4221 4222 so_mode = so->so_mode; 4223 4224 if (name == NULL) { 4225 if (!(so_state & SS_ISCONNECTED)) { 4226 mutex_exit(&so->so_lock); 4227 if (so_mode & SM_CONNREQUIRED) 4228 return (ENOTCONN); 4229 else 4230 return (EDESTADDRREQ); 4231 } 4232 if (so_mode & SM_CONNREQUIRED) { 4233 name = NULL; 4234 namelen = 0; 4235 } else { 4236 /* 4237 * Note that this code does not prevent sti_faddr_sa 4238 * from changing while it is being used. Thus 4239 * if an "unconnect"+connect occurs concurrently with 4240 * this send the datagram might be delivered to a 4241 * garbaled address. 4242 */ 4243 ASSERT(sti->sti_faddr_sa); 4244 name = sti->sti_faddr_sa; 4245 namelen = (t_uscalar_t)sti->sti_faddr_len; 4246 } 4247 } else { 4248 if (!(so_state & SS_ISCONNECTED) && 4249 (so_mode & SM_CONNREQUIRED)) { 4250 /* Required but not connected */ 4251 mutex_exit(&so->so_lock); 4252 return (ENOTCONN); 4253 } 4254 /* 4255 * Ignore the address on connection-oriented sockets. 4256 * Just like BSD this code does not generate an error for 4257 * TCP (a CONNREQUIRED socket) when sending to an address 4258 * passed in with sendto/sendmsg. Instead the data is 4259 * delivered on the connection as if no address had been 4260 * supplied. 4261 */ 4262 if ((so_state & SS_ISCONNECTED) && 4263 !(so_mode & SM_CONNREQUIRED)) { 4264 mutex_exit(&so->so_lock); 4265 return (EISCONN); 4266 } 4267 if (!(so_state & SS_ISBOUND)) { 4268 so_lock_single(so); /* Set SOLOCKED */ 4269 error = sotpi_bind(so, NULL, 0, 4270 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4271 so_unlock_single(so, SOLOCKED); 4272 if (error) { 4273 mutex_exit(&so->so_lock); 4274 eprintsoline(so, error); 4275 return (error); 4276 } 4277 } 4278 /* 4279 * Handle delayed datagram errors. These are only queued 4280 * when the application sets SO_DGRAM_ERRIND. 4281 * Return the error if we are sending to the address 4282 * that was returned in the last T_UDERROR_IND. 4283 * If sending to some other address discard the delayed 4284 * error indication. 4285 */ 4286 if (sti->sti_delayed_error) { 4287 struct T_uderror_ind *tudi; 4288 void *addr; 4289 t_uscalar_t addrlen; 4290 boolean_t match = B_FALSE; 4291 4292 ASSERT(sti->sti_eaddr_mp); 4293 error = sti->sti_delayed_error; 4294 sti->sti_delayed_error = 0; 4295 tudi = 4296 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4297 addrlen = tudi->DEST_length; 4298 addr = sogetoff(sti->sti_eaddr_mp, 4299 tudi->DEST_offset, addrlen, 1); 4300 ASSERT(addr); /* Checked by strsock_proto */ 4301 switch (so->so_family) { 4302 case AF_INET: { 4303 /* Compare just IP address and port */ 4304 sin_t *sin1 = (sin_t *)name; 4305 sin_t *sin2 = (sin_t *)addr; 4306 4307 if (addrlen == sizeof (sin_t) && 4308 namelen == addrlen && 4309 sin1->sin_port == sin2->sin_port && 4310 sin1->sin_addr.s_addr == 4311 sin2->sin_addr.s_addr) 4312 match = B_TRUE; 4313 break; 4314 } 4315 case AF_INET6: { 4316 /* Compare just IP address and port. Not flow */ 4317 sin6_t *sin1 = (sin6_t *)name; 4318 sin6_t *sin2 = (sin6_t *)addr; 4319 4320 if (addrlen == sizeof (sin6_t) && 4321 namelen == addrlen && 4322 sin1->sin6_port == sin2->sin6_port && 4323 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4324 &sin2->sin6_addr)) 4325 match = B_TRUE; 4326 break; 4327 } 4328 case AF_UNIX: 4329 default: 4330 if (namelen == addrlen && 4331 bcmp(name, addr, namelen) == 0) 4332 match = B_TRUE; 4333 } 4334 if (match) { 4335 freemsg(sti->sti_eaddr_mp); 4336 sti->sti_eaddr_mp = NULL; 4337 mutex_exit(&so->so_lock); 4338 #ifdef DEBUG 4339 dprintso(so, 0, 4340 ("sockfs delayed error %d for %s\n", 4341 error, 4342 pr_addr(so->so_family, name, namelen))); 4343 #endif /* DEBUG */ 4344 return (error); 4345 } 4346 freemsg(sti->sti_eaddr_mp); 4347 sti->sti_eaddr_mp = NULL; 4348 } 4349 } 4350 mutex_exit(&so->so_lock); 4351 4352 flags = msg->msg_flags; 4353 dontroute = 0; 4354 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4355 uint32_t val; 4356 4357 val = 1; 4358 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4359 &val, (t_uscalar_t)sizeof (val), cr); 4360 if (error) 4361 return (error); 4362 dontroute = 1; 4363 } 4364 4365 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4366 error = EOPNOTSUPP; 4367 goto done; 4368 } 4369 if (msg->msg_controllen != 0) { 4370 if (!(so_mode & SM_CONNREQUIRED)) { 4371 so_update_attrs(so, SOMOD); 4372 error = sosend_dgramcmsg(so, name, namelen, uiop, 4373 msg->msg_control, msg->msg_controllen, flags); 4374 } else { 4375 if (flags & MSG_OOB) { 4376 /* Can't generate T_EXDATA_REQ with options */ 4377 error = EOPNOTSUPP; 4378 goto done; 4379 } 4380 so_update_attrs(so, SOMOD); 4381 error = sosend_svccmsg(so, uiop, 4382 !(flags & MSG_EOR), 4383 msg->msg_control, msg->msg_controllen, 4384 flags); 4385 } 4386 goto done; 4387 } 4388 4389 so_update_attrs(so, SOMOD); 4390 if (!(so_mode & SM_CONNREQUIRED)) { 4391 /* 4392 * If there is no SO_DONTROUTE to turn off return immediately 4393 * from send_dgram. This can allow tail-call optimizations. 4394 */ 4395 if (!dontroute) { 4396 return (sosend_dgram(so, name, namelen, uiop, flags)); 4397 } 4398 error = sosend_dgram(so, name, namelen, uiop, flags); 4399 } else { 4400 t_scalar_t prim; 4401 int sflag; 4402 4403 /* Ignore msg_name in the connected state */ 4404 if (flags & MSG_OOB) { 4405 prim = T_EXDATA_REQ; 4406 /* 4407 * Send down T_EXDATA_REQ even if there is flow 4408 * control for data. 4409 */ 4410 sflag = MSG_IGNFLOW; 4411 } else { 4412 if (so_mode & SM_BYTESTREAM) { 4413 /* Byte stream transport - use write */ 4414 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4415 4416 /* Send M_DATA messages */ 4417 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4418 (error = nl7c_data(so, uiop)) >= 0) { 4419 /* NL7C consumed the data */ 4420 return (error); 4421 } 4422 /* 4423 * If there is no SO_DONTROUTE to turn off, 4424 * sti_direct is on, and there is no flow 4425 * control, we can take the fast path. 4426 */ 4427 if (!dontroute && sti->sti_direct != 0 && 4428 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4429 return (sostream_direct(so, uiop, 4430 NULL, cr)); 4431 } 4432 error = strwrite(SOTOV(so), uiop, cr); 4433 goto done; 4434 } 4435 prim = T_DATA_REQ; 4436 sflag = 0; 4437 } 4438 /* 4439 * If there is no SO_DONTROUTE to turn off return immediately 4440 * from sosend_svc. This can allow tail-call optimizations. 4441 */ 4442 if (!dontroute) 4443 return (sosend_svc(so, uiop, prim, 4444 !(flags & MSG_EOR), sflag)); 4445 error = sosend_svc(so, uiop, prim, 4446 !(flags & MSG_EOR), sflag); 4447 } 4448 ASSERT(dontroute); 4449 done: 4450 if (dontroute) { 4451 uint32_t val; 4452 4453 val = 0; 4454 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4455 &val, (t_uscalar_t)sizeof (val), cr); 4456 } 4457 return (error); 4458 } 4459 4460 /* 4461 * kstrwritemp() has very similar semantics as that of strwrite(). 4462 * The main difference is it obtains mblks from the caller and also 4463 * does not do any copy as done in strwrite() from user buffers to 4464 * kernel buffers. 4465 * 4466 * Currently, this routine is used by sendfile to send data allocated 4467 * within the kernel without any copying. This interface does not use the 4468 * synchronous stream interface as synch. stream interface implies 4469 * copying. 4470 */ 4471 int 4472 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4473 { 4474 struct stdata *stp; 4475 struct queue *wqp; 4476 mblk_t *newmp; 4477 char waitflag; 4478 int tempmode; 4479 int error = 0; 4480 int done = 0; 4481 struct sonode *so; 4482 boolean_t direct; 4483 4484 ASSERT(vp->v_stream); 4485 stp = vp->v_stream; 4486 4487 so = VTOSO(vp); 4488 direct = _SOTOTPI(so)->sti_direct; 4489 4490 /* 4491 * This is the sockfs direct fast path. canputnext() need 4492 * not be accurate so we don't grab the sd_lock here. If 4493 * we get flow-controlled, we grab sd_lock just before the 4494 * do..while loop below to emulate what strwrite() does. 4495 */ 4496 wqp = stp->sd_wrq; 4497 if (canputnext(wqp) && direct && 4498 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4499 return (sostream_direct(so, NULL, mp, CRED())); 4500 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4501 /* Fast check of flags before acquiring the lock */ 4502 mutex_enter(&stp->sd_lock); 4503 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4504 mutex_exit(&stp->sd_lock); 4505 if (error != 0) { 4506 if (!(stp->sd_flag & STPLEX) && 4507 (stp->sd_wput_opt & SW_SIGPIPE)) { 4508 error = EPIPE; 4509 } 4510 return (error); 4511 } 4512 } 4513 4514 waitflag = WRITEWAIT; 4515 if (stp->sd_flag & OLDNDELAY) 4516 tempmode = fmode & ~FNDELAY; 4517 else 4518 tempmode = fmode; 4519 4520 mutex_enter(&stp->sd_lock); 4521 do { 4522 if (canputnext(wqp)) { 4523 mutex_exit(&stp->sd_lock); 4524 if (stp->sd_wputdatafunc != NULL) { 4525 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4526 NULL, NULL, NULL); 4527 if (newmp == NULL) { 4528 /* The caller will free mp */ 4529 return (ECOMM); 4530 } 4531 mp = newmp; 4532 } 4533 putnext(wqp, mp); 4534 return (0); 4535 } 4536 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4537 &done); 4538 } while (error == 0 && !done); 4539 4540 mutex_exit(&stp->sd_lock); 4541 /* 4542 * EAGAIN tells the application to try again. ENOMEM 4543 * is returned only if the memory allocation size 4544 * exceeds the physical limits of the system. ENOMEM 4545 * can't be true here. 4546 */ 4547 if (error == ENOMEM) 4548 error = EAGAIN; 4549 return (error); 4550 } 4551 4552 /* ARGSUSED */ 4553 static int 4554 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4555 struct cred *cr, mblk_t **mpp) 4556 { 4557 int error; 4558 4559 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4560 return (EAFNOSUPPORT); 4561 4562 if (so->so_state & SS_CANTSENDMORE) 4563 return (EPIPE); 4564 4565 if (so->so_type != SOCK_STREAM) 4566 return (EOPNOTSUPP); 4567 4568 if ((so->so_state & SS_ISCONNECTED) == 0) 4569 return (ENOTCONN); 4570 4571 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4572 if (error == 0) 4573 *mpp = NULL; 4574 return (error); 4575 } 4576 4577 /* 4578 * Sending data on a datagram socket. 4579 * Assumes caller has verified that SS_ISBOUND etc. are set. 4580 */ 4581 /* ARGSUSED */ 4582 static int 4583 sodgram_direct(struct sonode *so, struct sockaddr *name, 4584 socklen_t namelen, struct uio *uiop, int flags) 4585 { 4586 struct T_unitdata_req tudr; 4587 mblk_t *mp = NULL; 4588 int error = 0; 4589 void *addr; 4590 socklen_t addrlen; 4591 ssize_t len; 4592 struct stdata *stp = SOTOV(so)->v_stream; 4593 int so_state; 4594 queue_t *udp_wq; 4595 boolean_t connected; 4596 mblk_t *mpdata = NULL; 4597 sotpi_info_t *sti = SOTOTPI(so); 4598 uint32_t auditing = AU_AUDITING(); 4599 4600 ASSERT(name != NULL && namelen != 0); 4601 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4602 ASSERT(!(so->so_mode & SM_EXDATA)); 4603 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4604 ASSERT(SOTOV(so)->v_type == VSOCK); 4605 4606 /* Caller checked for proper length */ 4607 len = uiop->uio_resid; 4608 ASSERT(len <= sti->sti_tidu_size); 4609 4610 /* Length and family checks have been done by caller */ 4611 ASSERT(name->sa_family == so->so_family); 4612 ASSERT(so->so_family == AF_INET || 4613 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4614 ASSERT(so->so_family == AF_INET6 || 4615 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4616 4617 addr = name; 4618 addrlen = namelen; 4619 4620 if (stp->sd_sidp != NULL && 4621 (error = straccess(stp, JCWRITE)) != 0) 4622 goto done; 4623 4624 so_state = so->so_state; 4625 4626 connected = so_state & SS_ISCONNECTED; 4627 if (!connected) { 4628 tudr.PRIM_type = T_UNITDATA_REQ; 4629 tudr.DEST_length = addrlen; 4630 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4631 tudr.OPT_length = 0; 4632 tudr.OPT_offset = 0; 4633 4634 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4635 _ALLOC_INTR, CRED()); 4636 if (mp == NULL) { 4637 /* 4638 * Caught a signal waiting for memory. 4639 * Let send* return EINTR. 4640 */ 4641 error = EINTR; 4642 goto done; 4643 } 4644 } 4645 4646 /* 4647 * For UDP we don't break up the copyin into smaller pieces 4648 * as in the TCP case. That means if ENOMEM is returned by 4649 * mcopyinuio() then the uio vector has not been modified at 4650 * all and we fallback to either strwrite() or kstrputmsg() 4651 * below. Note also that we never generate priority messages 4652 * from here. 4653 */ 4654 udp_wq = stp->sd_wrq->q_next; 4655 if (canput(udp_wq) && 4656 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4657 ASSERT(DB_TYPE(mpdata) == M_DATA); 4658 ASSERT(uiop->uio_resid == 0); 4659 if (!connected) 4660 linkb(mp, mpdata); 4661 else 4662 mp = mpdata; 4663 if (auditing) 4664 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4665 4666 udp_wput(udp_wq, mp); 4667 return (0); 4668 } 4669 4670 ASSERT(mpdata == NULL); 4671 if (error != 0 && error != ENOMEM) { 4672 freemsg(mp); 4673 return (error); 4674 } 4675 4676 /* 4677 * For connected, let strwrite() handle the blocking case. 4678 * Otherwise we fall thru and use kstrputmsg(). 4679 */ 4680 if (connected) 4681 return (strwrite(SOTOV(so), uiop, CRED())); 4682 4683 if (auditing) 4684 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4685 4686 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4687 done: 4688 #ifdef SOCK_DEBUG 4689 if (error != 0) { 4690 eprintsoline(so, error); 4691 } 4692 #endif /* SOCK_DEBUG */ 4693 return (error); 4694 } 4695 4696 int 4697 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4698 { 4699 struct stdata *stp = SOTOV(so)->v_stream; 4700 ssize_t iosize, rmax, maxblk; 4701 queue_t *tcp_wq = stp->sd_wrq->q_next; 4702 mblk_t *newmp; 4703 int error = 0, wflag = 0; 4704 4705 ASSERT(so->so_mode & SM_BYTESTREAM); 4706 ASSERT(SOTOV(so)->v_type == VSOCK); 4707 4708 if (stp->sd_sidp != NULL && 4709 (error = straccess(stp, JCWRITE)) != 0) 4710 return (error); 4711 4712 if (uiop == NULL) { 4713 /* 4714 * kstrwritemp() should have checked sd_flag and 4715 * flow-control before coming here. If we end up 4716 * here it means that we can simply pass down the 4717 * data to tcp. 4718 */ 4719 ASSERT(mp != NULL); 4720 if (stp->sd_wputdatafunc != NULL) { 4721 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4722 NULL, NULL, NULL); 4723 if (newmp == NULL) { 4724 /* The caller will free mp */ 4725 return (ECOMM); 4726 } 4727 mp = newmp; 4728 } 4729 tcp_wput(tcp_wq, mp); 4730 return (0); 4731 } 4732 4733 /* Fallback to strwrite() to do proper error handling */ 4734 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4735 return (strwrite(SOTOV(so), uiop, cr)); 4736 4737 rmax = stp->sd_qn_maxpsz; 4738 ASSERT(rmax >= 0 || rmax == INFPSZ); 4739 if (rmax == 0 || uiop->uio_resid <= 0) 4740 return (0); 4741 4742 if (rmax == INFPSZ) 4743 rmax = uiop->uio_resid; 4744 4745 maxblk = stp->sd_maxblk; 4746 4747 for (;;) { 4748 iosize = MIN(uiop->uio_resid, rmax); 4749 4750 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4751 if (mp == NULL) { 4752 /* 4753 * Fallback to strwrite() for ENOMEM; if this 4754 * is our first time in this routine and the uio 4755 * vector has not been modified, we will end up 4756 * calling strwrite() without any flag set. 4757 */ 4758 if (error == ENOMEM) 4759 goto slow_send; 4760 else 4761 return (error); 4762 } 4763 ASSERT(uiop->uio_resid >= 0); 4764 /* 4765 * If mp is non-NULL and ENOMEM is set, it means that 4766 * mcopyinuio() was able to break down some of the user 4767 * data into one or more mblks. Send the partial data 4768 * to tcp and let the rest be handled in strwrite(). 4769 */ 4770 ASSERT(error == 0 || error == ENOMEM); 4771 if (stp->sd_wputdatafunc != NULL) { 4772 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4773 NULL, NULL, NULL); 4774 if (newmp == NULL) { 4775 /* The caller will free mp */ 4776 return (ECOMM); 4777 } 4778 mp = newmp; 4779 } 4780 tcp_wput(tcp_wq, mp); 4781 4782 wflag |= NOINTR; 4783 4784 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4785 ASSERT(error == 0); 4786 break; 4787 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4788 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4789 slow_send: 4790 /* 4791 * We were able to send down partial data using 4792 * the direct call interface, but are now relying 4793 * on strwrite() to handle the non-fastpath cases. 4794 * If the socket is blocking we will sleep in 4795 * strwaitq() until write is permitted, otherwise, 4796 * we will need to return the amount of bytes 4797 * written so far back to the app. This is the 4798 * reason why we pass NOINTR flag to strwrite() 4799 * for non-blocking socket, because we don't want 4800 * to return EAGAIN when portion of the user data 4801 * has actually been sent down. 4802 */ 4803 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4804 } 4805 } 4806 return (0); 4807 } 4808 4809 /* 4810 * Update sti_faddr by asking the transport (unless AF_UNIX). 4811 */ 4812 /* ARGSUSED */ 4813 int 4814 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4815 boolean_t accept, struct cred *cr) 4816 { 4817 struct strbuf strbuf; 4818 int error = 0, res; 4819 void *addr; 4820 t_uscalar_t addrlen; 4821 k_sigset_t smask; 4822 sotpi_info_t *sti = SOTOTPI(so); 4823 4824 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4825 (void *)so, pr_state(so->so_state, so->so_mode))); 4826 4827 ASSERT(*namelen > 0); 4828 mutex_enter(&so->so_lock); 4829 so_lock_single(so); /* Set SOLOCKED */ 4830 4831 if (accept) { 4832 bcopy(sti->sti_faddr_sa, name, 4833 MIN(*namelen, sti->sti_faddr_len)); 4834 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4835 goto done; 4836 } 4837 4838 if (!(so->so_state & SS_ISCONNECTED)) { 4839 error = ENOTCONN; 4840 goto done; 4841 } 4842 /* Added this check for X/Open */ 4843 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4844 error = EINVAL; 4845 if (xnet_check_print) { 4846 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4847 } 4848 goto done; 4849 } 4850 4851 if (sti->sti_faddr_valid) { 4852 bcopy(sti->sti_faddr_sa, name, 4853 MIN(*namelen, sti->sti_faddr_len)); 4854 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4855 goto done; 4856 } 4857 4858 #ifdef DEBUG 4859 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4860 pr_addr(so->so_family, sti->sti_faddr_sa, 4861 (t_uscalar_t)sti->sti_faddr_len))); 4862 #endif /* DEBUG */ 4863 4864 if (so->so_family == AF_UNIX) { 4865 /* Transport has different name space - return local info */ 4866 if (sti->sti_faddr_noxlate) 4867 *namelen = 0; 4868 error = 0; 4869 goto done; 4870 } 4871 4872 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4873 4874 ASSERT(sti->sti_faddr_sa); 4875 /* Allocate local buffer to use with ioctl */ 4876 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4877 mutex_exit(&so->so_lock); 4878 addr = kmem_alloc(addrlen, KM_SLEEP); 4879 4880 /* 4881 * Issue TI_GETPEERNAME with signals masked. 4882 * Put the result in sti_faddr_sa so that getpeername works after 4883 * a shutdown(output). 4884 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4885 * back to the socket. 4886 */ 4887 strbuf.buf = addr; 4888 strbuf.maxlen = addrlen; 4889 strbuf.len = 0; 4890 4891 sigintr(&smask, 0); 4892 res = 0; 4893 ASSERT(cr); 4894 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4895 0, K_TO_K, cr, &res); 4896 sigunintr(&smask); 4897 4898 mutex_enter(&so->so_lock); 4899 /* 4900 * If there is an error record the error in so_error put don't fail 4901 * the getpeername. Instead fallback on the recorded 4902 * sti->sti_faddr_sa. 4903 */ 4904 if (error) { 4905 /* 4906 * Various stream head errors can be returned to the ioctl. 4907 * However, it is impossible to determine which ones of 4908 * these are really socket level errors that were incorrectly 4909 * consumed by the ioctl. Thus this code silently ignores the 4910 * error - to code explicitly does not reinstate the error 4911 * using soseterror(). 4912 * Experiments have shows that at least this set of 4913 * errors are reported and should not be reinstated on the 4914 * socket: 4915 * EINVAL E.g. if an I_LINK was in effect when 4916 * getpeername was called. 4917 * EPIPE The ioctl error semantics prefer the write 4918 * side error over the read side error. 4919 * ENOTCONN The transport just got disconnected but 4920 * sockfs had not yet seen the T_DISCON_IND 4921 * when issuing the ioctl. 4922 */ 4923 error = 0; 4924 } else if (res == 0 && strbuf.len > 0 && 4925 (so->so_state & SS_ISCONNECTED)) { 4926 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4927 sti->sti_faddr_len = (socklen_t)strbuf.len; 4928 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4929 sti->sti_faddr_valid = 1; 4930 4931 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4932 *namelen = sti->sti_faddr_len; 4933 } 4934 kmem_free(addr, addrlen); 4935 #ifdef DEBUG 4936 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4937 pr_addr(so->so_family, sti->sti_faddr_sa, 4938 (t_uscalar_t)sti->sti_faddr_len))); 4939 #endif /* DEBUG */ 4940 done: 4941 so_unlock_single(so, SOLOCKED); 4942 mutex_exit(&so->so_lock); 4943 return (error); 4944 } 4945 4946 /* 4947 * Update sti_laddr by asking the transport (unless AF_UNIX). 4948 */ 4949 int 4950 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4951 struct cred *cr) 4952 { 4953 struct strbuf strbuf; 4954 int error = 0, res; 4955 void *addr; 4956 t_uscalar_t addrlen; 4957 k_sigset_t smask; 4958 sotpi_info_t *sti = SOTOTPI(so); 4959 4960 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4961 (void *)so, pr_state(so->so_state, so->so_mode))); 4962 4963 ASSERT(*namelen > 0); 4964 mutex_enter(&so->so_lock); 4965 so_lock_single(so); /* Set SOLOCKED */ 4966 4967 #ifdef DEBUG 4968 4969 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4970 pr_addr(so->so_family, sti->sti_laddr_sa, 4971 (t_uscalar_t)sti->sti_laddr_len))); 4972 #endif /* DEBUG */ 4973 if (sti->sti_laddr_valid) { 4974 bcopy(sti->sti_laddr_sa, name, 4975 MIN(*namelen, sti->sti_laddr_len)); 4976 *namelen = sti->sti_laddr_len; 4977 goto done; 4978 } 4979 4980 if (so->so_family == AF_UNIX) { 4981 /* Transport has different name space - return local info */ 4982 error = 0; 4983 *namelen = 0; 4984 goto done; 4985 } 4986 if (!(so->so_state & SS_ISBOUND)) { 4987 /* If not bound, then nothing to return. */ 4988 error = 0; 4989 goto done; 4990 } 4991 4992 /* Allocate local buffer to use with ioctl */ 4993 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 4994 mutex_exit(&so->so_lock); 4995 addr = kmem_alloc(addrlen, KM_SLEEP); 4996 4997 /* 4998 * Issue TI_GETMYNAME with signals masked. 4999 * Put the result in sti_laddr_sa so that getsockname works after 5000 * a shutdown(output). 5001 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5002 * back to the socket. 5003 */ 5004 strbuf.buf = addr; 5005 strbuf.maxlen = addrlen; 5006 strbuf.len = 0; 5007 5008 sigintr(&smask, 0); 5009 res = 0; 5010 ASSERT(cr); 5011 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5012 0, K_TO_K, cr, &res); 5013 sigunintr(&smask); 5014 5015 mutex_enter(&so->so_lock); 5016 /* 5017 * If there is an error record the error in so_error put don't fail 5018 * the getsockname. Instead fallback on the recorded 5019 * sti->sti_laddr_sa. 5020 */ 5021 if (error) { 5022 /* 5023 * Various stream head errors can be returned to the ioctl. 5024 * However, it is impossible to determine which ones of 5025 * these are really socket level errors that were incorrectly 5026 * consumed by the ioctl. Thus this code silently ignores the 5027 * error - to code explicitly does not reinstate the error 5028 * using soseterror(). 5029 * Experiments have shows that at least this set of 5030 * errors are reported and should not be reinstated on the 5031 * socket: 5032 * EINVAL E.g. if an I_LINK was in effect when 5033 * getsockname was called. 5034 * EPIPE The ioctl error semantics prefer the write 5035 * side error over the read side error. 5036 */ 5037 error = 0; 5038 } else if (res == 0 && strbuf.len > 0 && 5039 (so->so_state & SS_ISBOUND)) { 5040 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5041 sti->sti_laddr_len = (socklen_t)strbuf.len; 5042 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5043 sti->sti_laddr_valid = 1; 5044 5045 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5046 *namelen = sti->sti_laddr_len; 5047 } 5048 kmem_free(addr, addrlen); 5049 #ifdef DEBUG 5050 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5051 pr_addr(so->so_family, sti->sti_laddr_sa, 5052 (t_uscalar_t)sti->sti_laddr_len))); 5053 #endif /* DEBUG */ 5054 done: 5055 so_unlock_single(so, SOLOCKED); 5056 mutex_exit(&so->so_lock); 5057 return (error); 5058 } 5059 5060 /* 5061 * Get socket options. For SOL_SOCKET options some options are handled 5062 * by the sockfs while others use the value recorded in the sonode as a 5063 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5064 * 5065 * On the return most *optlenp bytes are copied to optval. 5066 */ 5067 /* ARGSUSED */ 5068 int 5069 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5070 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5071 { 5072 struct T_optmgmt_req optmgmt_req; 5073 struct T_optmgmt_ack *optmgmt_ack; 5074 struct opthdr oh; 5075 struct opthdr *opt_res; 5076 mblk_t *mp = NULL; 5077 int error = 0; 5078 void *option = NULL; /* Set if fallback value */ 5079 t_uscalar_t maxlen = *optlenp; 5080 t_uscalar_t len; 5081 uint32_t value; 5082 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5083 struct timeval32 tmo_val32; 5084 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5085 5086 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5087 (void *)so, level, option_name, optval, (void *)optlenp, 5088 pr_state(so->so_state, so->so_mode))); 5089 5090 mutex_enter(&so->so_lock); 5091 so_lock_single(so); /* Set SOLOCKED */ 5092 5093 /* 5094 * Check for SOL_SOCKET options. 5095 * Certain SOL_SOCKET options are returned directly whereas 5096 * others only provide a default (fallback) value should 5097 * the T_SVR4_OPTMGMT_REQ fail. 5098 */ 5099 if (level == SOL_SOCKET) { 5100 /* Check parameters */ 5101 switch (option_name) { 5102 case SO_TYPE: 5103 case SO_ERROR: 5104 case SO_DEBUG: 5105 case SO_ACCEPTCONN: 5106 case SO_REUSEADDR: 5107 case SO_KEEPALIVE: 5108 case SO_DONTROUTE: 5109 case SO_BROADCAST: 5110 case SO_USELOOPBACK: 5111 case SO_OOBINLINE: 5112 case SO_SNDBUF: 5113 case SO_RCVBUF: 5114 #ifdef notyet 5115 case SO_SNDLOWAT: 5116 case SO_RCVLOWAT: 5117 #endif /* notyet */ 5118 case SO_DOMAIN: 5119 case SO_DGRAM_ERRIND: 5120 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5121 error = EINVAL; 5122 eprintsoline(so, error); 5123 goto done2; 5124 } 5125 break; 5126 case SO_RCVTIMEO: 5127 case SO_SNDTIMEO: 5128 if (get_udatamodel() == DATAMODEL_NONE || 5129 get_udatamodel() == DATAMODEL_NATIVE) { 5130 if (maxlen < sizeof (struct timeval)) { 5131 error = EINVAL; 5132 eprintsoline(so, error); 5133 goto done2; 5134 } 5135 } else { 5136 if (maxlen < sizeof (struct timeval32)) { 5137 error = EINVAL; 5138 eprintsoline(so, error); 5139 goto done2; 5140 } 5141 5142 } 5143 break; 5144 case SO_LINGER: 5145 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5146 error = EINVAL; 5147 eprintsoline(so, error); 5148 goto done2; 5149 } 5150 break; 5151 case SO_SND_BUFINFO: 5152 if (maxlen < (t_uscalar_t) 5153 sizeof (struct so_snd_bufinfo)) { 5154 error = EINVAL; 5155 eprintsoline(so, error); 5156 goto done2; 5157 } 5158 break; 5159 } 5160 5161 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5162 5163 switch (option_name) { 5164 case SO_TYPE: 5165 value = so->so_type; 5166 option = &value; 5167 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5168 5169 case SO_ERROR: 5170 value = sogeterr(so, B_TRUE); 5171 option = &value; 5172 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5173 5174 case SO_ACCEPTCONN: 5175 if (so->so_state & SS_ACCEPTCONN) 5176 value = SO_ACCEPTCONN; 5177 else 5178 value = 0; 5179 #ifdef DEBUG 5180 if (value) { 5181 dprintso(so, 1, 5182 ("sotpi_getsockopt: 0x%x is set\n", 5183 option_name)); 5184 } else { 5185 dprintso(so, 1, 5186 ("sotpi_getsockopt: 0x%x not set\n", 5187 option_name)); 5188 } 5189 #endif /* DEBUG */ 5190 option = &value; 5191 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5192 5193 case SO_DEBUG: 5194 case SO_REUSEADDR: 5195 case SO_KEEPALIVE: 5196 case SO_DONTROUTE: 5197 case SO_BROADCAST: 5198 case SO_USELOOPBACK: 5199 case SO_OOBINLINE: 5200 case SO_DGRAM_ERRIND: 5201 value = (so->so_options & option_name); 5202 #ifdef DEBUG 5203 if (value) { 5204 dprintso(so, 1, 5205 ("sotpi_getsockopt: 0x%x is set\n", 5206 option_name)); 5207 } else { 5208 dprintso(so, 1, 5209 ("sotpi_getsockopt: 0x%x not set\n", 5210 option_name)); 5211 } 5212 #endif /* DEBUG */ 5213 option = &value; 5214 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5215 5216 /* 5217 * The following options are only returned by sockfs when the 5218 * T_SVR4_OPTMGMT_REQ fails. 5219 */ 5220 case SO_LINGER: 5221 option = &so->so_linger; 5222 len = (t_uscalar_t)sizeof (struct linger); 5223 break; 5224 case SO_SNDBUF: { 5225 ssize_t lvalue; 5226 5227 /* 5228 * If the option has not been set then get a default 5229 * value from the read queue. This value is 5230 * returned if the transport fails 5231 * the T_SVR4_OPTMGMT_REQ. 5232 */ 5233 lvalue = so->so_sndbuf; 5234 if (lvalue == 0) { 5235 mutex_exit(&so->so_lock); 5236 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5237 QHIWAT, 0, &lvalue); 5238 mutex_enter(&so->so_lock); 5239 dprintso(so, 1, 5240 ("got SO_SNDBUF %ld from q\n", lvalue)); 5241 } 5242 value = (int)lvalue; 5243 option = &value; 5244 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5245 break; 5246 } 5247 case SO_RCVBUF: { 5248 ssize_t lvalue; 5249 5250 /* 5251 * If the option has not been set then get a default 5252 * value from the read queue. This value is 5253 * returned if the transport fails 5254 * the T_SVR4_OPTMGMT_REQ. 5255 * 5256 * XXX If SO_RCVBUF has been set and this is an 5257 * XPG 4.2 application then do not ask the transport 5258 * since the transport might adjust the value and not 5259 * return exactly what was set by the application. 5260 * For non-XPG 4.2 application we return the value 5261 * that the transport is actually using. 5262 */ 5263 lvalue = so->so_rcvbuf; 5264 if (lvalue == 0) { 5265 mutex_exit(&so->so_lock); 5266 (void) strqget(RD(strvp2wq(SOTOV(so))), 5267 QHIWAT, 0, &lvalue); 5268 mutex_enter(&so->so_lock); 5269 dprintso(so, 1, 5270 ("got SO_RCVBUF %ld from q\n", lvalue)); 5271 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5272 value = (int)lvalue; 5273 option = &value; 5274 goto copyout; /* skip asking transport */ 5275 } 5276 value = (int)lvalue; 5277 option = &value; 5278 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5279 break; 5280 } 5281 case SO_DOMAIN: 5282 value = so->so_family; 5283 option = &value; 5284 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5285 5286 #ifdef notyet 5287 /* 5288 * We do not implement the semantics of these options 5289 * thus we shouldn't implement the options either. 5290 */ 5291 case SO_SNDLOWAT: 5292 value = so->so_sndlowat; 5293 option = &value; 5294 break; 5295 case SO_RCVLOWAT: 5296 value = so->so_rcvlowat; 5297 option = &value; 5298 break; 5299 #endif /* notyet */ 5300 case SO_SNDTIMEO: 5301 case SO_RCVTIMEO: { 5302 clock_t val; 5303 5304 if (option_name == SO_RCVTIMEO) 5305 val = drv_hztousec(so->so_rcvtimeo); 5306 else 5307 val = drv_hztousec(so->so_sndtimeo); 5308 tmo_val.tv_sec = val / (1000 * 1000); 5309 tmo_val.tv_usec = val % (1000 * 1000); 5310 if (get_udatamodel() == DATAMODEL_NONE || 5311 get_udatamodel() == DATAMODEL_NATIVE) { 5312 option = &tmo_val; 5313 len = sizeof (struct timeval); 5314 } else { 5315 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5316 option = &tmo_val32; 5317 len = sizeof (struct timeval32); 5318 } 5319 break; 5320 } 5321 case SO_SND_BUFINFO: { 5322 snd_bufinfo.sbi_wroff = 5323 (so->so_proto_props).sopp_wroff; 5324 snd_bufinfo.sbi_maxblk = 5325 (so->so_proto_props).sopp_maxblk; 5326 snd_bufinfo.sbi_maxpsz = 5327 (so->so_proto_props).sopp_maxpsz; 5328 snd_bufinfo.sbi_tail = 5329 (so->so_proto_props).sopp_tail; 5330 option = &snd_bufinfo; 5331 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5332 break; 5333 } 5334 } 5335 } 5336 5337 mutex_exit(&so->so_lock); 5338 5339 /* Send request */ 5340 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5341 optmgmt_req.MGMT_flags = T_CHECK; 5342 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5343 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5344 5345 oh.level = level; 5346 oh.name = option_name; 5347 oh.len = maxlen; 5348 5349 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5350 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5351 /* Let option management work in the presence of data flow control */ 5352 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5353 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5354 mp = NULL; 5355 mutex_enter(&so->so_lock); 5356 if (error) { 5357 eprintsoline(so, error); 5358 goto done2; 5359 } 5360 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5361 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5362 if (error) { 5363 if (option != NULL) { 5364 /* We have a fallback value */ 5365 error = 0; 5366 goto copyout; 5367 } 5368 eprintsoline(so, error); 5369 goto done2; 5370 } 5371 ASSERT(mp); 5372 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5373 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5374 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5375 if (opt_res == NULL) { 5376 if (option != NULL) { 5377 /* We have a fallback value */ 5378 error = 0; 5379 goto copyout; 5380 } 5381 error = EPROTO; 5382 eprintsoline(so, error); 5383 goto done; 5384 } 5385 option = &opt_res[1]; 5386 5387 /* check to ensure that the option is within bounds */ 5388 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5389 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5390 if (option != NULL) { 5391 /* We have a fallback value */ 5392 error = 0; 5393 goto copyout; 5394 } 5395 error = EPROTO; 5396 eprintsoline(so, error); 5397 goto done; 5398 } 5399 5400 len = opt_res->len; 5401 5402 copyout: { 5403 t_uscalar_t size = MIN(len, maxlen); 5404 bcopy(option, optval, size); 5405 bcopy(&size, optlenp, sizeof (size)); 5406 } 5407 done: 5408 freemsg(mp); 5409 done2: 5410 so_unlock_single(so, SOLOCKED); 5411 mutex_exit(&so->so_lock); 5412 5413 return (error); 5414 } 5415 5416 /* 5417 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5418 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5419 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5420 * setsockopt has to work even if the transport does not support the option. 5421 */ 5422 /* ARGSUSED */ 5423 int 5424 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5425 const void *optval, t_uscalar_t optlen, struct cred *cr) 5426 { 5427 struct T_optmgmt_req optmgmt_req; 5428 struct opthdr oh; 5429 mblk_t *mp; 5430 int error = 0; 5431 boolean_t handled = B_FALSE; 5432 5433 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5434 (void *)so, level, option_name, optval, optlen, 5435 pr_state(so->so_state, so->so_mode))); 5436 5437 /* X/Open requires this check */ 5438 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5439 if (xnet_check_print) 5440 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5441 return (EINVAL); 5442 } 5443 5444 mutex_enter(&so->so_lock); 5445 so_lock_single(so); /* Set SOLOCKED */ 5446 mutex_exit(&so->so_lock); 5447 5448 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5449 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5450 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5451 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5452 5453 oh.level = level; 5454 oh.name = option_name; 5455 oh.len = optlen; 5456 5457 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5458 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5459 /* Let option management work in the presence of data flow control */ 5460 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5461 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5462 mp = NULL; 5463 mutex_enter(&so->so_lock); 5464 if (error) { 5465 eprintsoline(so, error); 5466 goto done2; 5467 } 5468 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5469 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5470 if (error) { 5471 eprintsoline(so, error); 5472 goto done; 5473 } 5474 ASSERT(mp); 5475 /* No need to verify T_optmgmt_ack */ 5476 freemsg(mp); 5477 done: 5478 /* 5479 * Check for SOL_SOCKET options and record their values. 5480 * If we know about a SOL_SOCKET parameter and the transport 5481 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5482 * EPROTO) we let the setsockopt succeed. 5483 */ 5484 if (level == SOL_SOCKET) { 5485 /* Check parameters */ 5486 switch (option_name) { 5487 case SO_DEBUG: 5488 case SO_REUSEADDR: 5489 case SO_KEEPALIVE: 5490 case SO_DONTROUTE: 5491 case SO_BROADCAST: 5492 case SO_USELOOPBACK: 5493 case SO_OOBINLINE: 5494 case SO_SNDBUF: 5495 case SO_RCVBUF: 5496 #ifdef notyet 5497 case SO_SNDLOWAT: 5498 case SO_RCVLOWAT: 5499 #endif /* notyet */ 5500 case SO_DGRAM_ERRIND: 5501 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5502 error = EINVAL; 5503 eprintsoline(so, error); 5504 goto done2; 5505 } 5506 ASSERT(optval); 5507 handled = B_TRUE; 5508 break; 5509 case SO_SNDTIMEO: 5510 case SO_RCVTIMEO: 5511 if (get_udatamodel() == DATAMODEL_NONE || 5512 get_udatamodel() == DATAMODEL_NATIVE) { 5513 if (optlen != sizeof (struct timeval)) { 5514 error = EINVAL; 5515 eprintsoline(so, error); 5516 goto done2; 5517 } 5518 } else { 5519 if (optlen != sizeof (struct timeval32)) { 5520 error = EINVAL; 5521 eprintsoline(so, error); 5522 goto done2; 5523 } 5524 } 5525 ASSERT(optval); 5526 handled = B_TRUE; 5527 break; 5528 case SO_LINGER: 5529 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5530 error = EINVAL; 5531 eprintsoline(so, error); 5532 goto done2; 5533 } 5534 ASSERT(optval); 5535 handled = B_TRUE; 5536 break; 5537 } 5538 5539 #define intvalue (*(int32_t *)optval) 5540 5541 switch (option_name) { 5542 case SO_TYPE: 5543 case SO_ERROR: 5544 case SO_ACCEPTCONN: 5545 /* Can't be set */ 5546 error = ENOPROTOOPT; 5547 goto done2; 5548 case SO_LINGER: { 5549 struct linger *l = (struct linger *)optval; 5550 5551 so->so_linger.l_linger = l->l_linger; 5552 if (l->l_onoff) { 5553 so->so_linger.l_onoff = SO_LINGER; 5554 so->so_options |= SO_LINGER; 5555 } else { 5556 so->so_linger.l_onoff = 0; 5557 so->so_options &= ~SO_LINGER; 5558 } 5559 break; 5560 } 5561 5562 case SO_DEBUG: 5563 #ifdef SOCK_TEST 5564 if (intvalue & 2) 5565 sock_test_timelimit = 10 * hz; 5566 else 5567 sock_test_timelimit = 0; 5568 5569 if (intvalue & 4) 5570 do_useracc = 0; 5571 else 5572 do_useracc = 1; 5573 #endif /* SOCK_TEST */ 5574 /* FALLTHRU */ 5575 case SO_REUSEADDR: 5576 case SO_KEEPALIVE: 5577 case SO_DONTROUTE: 5578 case SO_BROADCAST: 5579 case SO_USELOOPBACK: 5580 case SO_OOBINLINE: 5581 case SO_DGRAM_ERRIND: 5582 if (intvalue != 0) { 5583 dprintso(so, 1, 5584 ("socket_setsockopt: setting 0x%x\n", 5585 option_name)); 5586 so->so_options |= option_name; 5587 } else { 5588 dprintso(so, 1, 5589 ("socket_setsockopt: clearing 0x%x\n", 5590 option_name)); 5591 so->so_options &= ~option_name; 5592 } 5593 break; 5594 /* 5595 * The following options are only returned by us when the 5596 * transport layer fails. 5597 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5598 * since the transport might adjust the value and not 5599 * return exactly what was set by the application. 5600 */ 5601 case SO_SNDBUF: 5602 so->so_sndbuf = intvalue; 5603 break; 5604 case SO_RCVBUF: 5605 so->so_rcvbuf = intvalue; 5606 break; 5607 case SO_RCVPSH: 5608 so->so_rcv_timer_interval = intvalue; 5609 break; 5610 #ifdef notyet 5611 /* 5612 * We do not implement the semantics of these options 5613 * thus we shouldn't implement the options either. 5614 */ 5615 case SO_SNDLOWAT: 5616 so->so_sndlowat = intvalue; 5617 break; 5618 case SO_RCVLOWAT: 5619 so->so_rcvlowat = intvalue; 5620 break; 5621 #endif /* notyet */ 5622 case SO_SNDTIMEO: 5623 case SO_RCVTIMEO: { 5624 struct timeval tl; 5625 clock_t val; 5626 5627 if (get_udatamodel() == DATAMODEL_NONE || 5628 get_udatamodel() == DATAMODEL_NATIVE) 5629 bcopy(&tl, (struct timeval *)optval, 5630 sizeof (struct timeval)); 5631 else 5632 TIMEVAL32_TO_TIMEVAL(&tl, 5633 (struct timeval32 *)optval); 5634 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5635 if (option_name == SO_RCVTIMEO) 5636 so->so_rcvtimeo = drv_usectohz(val); 5637 else 5638 so->so_sndtimeo = drv_usectohz(val); 5639 break; 5640 } 5641 } 5642 #undef intvalue 5643 5644 if (error) { 5645 if ((error == ENOPROTOOPT || error == EPROTO || 5646 error == EINVAL) && handled) { 5647 dprintso(so, 1, 5648 ("setsockopt: ignoring error %d for 0x%x\n", 5649 error, option_name)); 5650 error = 0; 5651 } 5652 } 5653 } 5654 done2: 5655 so_unlock_single(so, SOLOCKED); 5656 mutex_exit(&so->so_lock); 5657 return (error); 5658 } 5659 5660 /* 5661 * sotpi_close() is called when the last open reference goes away. 5662 */ 5663 /* ARGSUSED */ 5664 int 5665 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5666 { 5667 struct vnode *vp = SOTOV(so); 5668 dev_t dev; 5669 int error = 0; 5670 sotpi_info_t *sti = SOTOTPI(so); 5671 5672 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5673 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5674 5675 dev = sti->sti_dev; 5676 5677 ASSERT(STREAMSTAB(getmajor(dev))); 5678 5679 mutex_enter(&so->so_lock); 5680 so_lock_single(so); /* Set SOLOCKED */ 5681 5682 ASSERT(so_verify_oobstate(so)); 5683 5684 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5685 sti->sti_nl7c_flags = 0; 5686 nl7c_close(so); 5687 } 5688 5689 if (vp->v_stream != NULL) { 5690 vnode_t *ux_vp; 5691 5692 if (so->so_family == AF_UNIX) { 5693 /* Could avoid this when CANTSENDMORE for !dgram */ 5694 so_unix_close(so); 5695 } 5696 5697 mutex_exit(&so->so_lock); 5698 /* 5699 * Disassemble the linkage from the AF_UNIX underlying file 5700 * system vnode to this socket (by atomically clearing 5701 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5702 * and frees the stream head. 5703 */ 5704 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5705 ASSERT(ux_vp->v_stream); 5706 sti->sti_ux_bound_vp = NULL; 5707 vn_rele_stream(ux_vp); 5708 } 5709 error = strclose(vp, flag, cr); 5710 vp->v_stream = NULL; 5711 mutex_enter(&so->so_lock); 5712 } 5713 5714 /* 5715 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5716 */ 5717 so_flush_discon_ind(so); 5718 5719 so_unlock_single(so, SOLOCKED); 5720 mutex_exit(&so->so_lock); 5721 5722 /* 5723 * Needed for STREAMs. 5724 * Decrement the device driver's reference count for streams 5725 * opened via the clone dip. The driver was held in clone_open(). 5726 * The absence of clone_close() forces this asymmetry. 5727 */ 5728 if (so->so_flag & SOCLONE) 5729 ddi_rele_driver(getmajor(dev)); 5730 5731 return (error); 5732 } 5733 5734 static int 5735 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5736 struct cred *cr, int32_t *rvalp) 5737 { 5738 struct vnode *vp = SOTOV(so); 5739 sotpi_info_t *sti = SOTOTPI(so); 5740 int error = 0; 5741 5742 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5743 cmd, arg, pr_state(so->so_state, so->so_mode))); 5744 5745 switch (cmd) { 5746 case SIOCSQPTR: 5747 /* 5748 * SIOCSQPTR is valid only when helper stream is created 5749 * by the protocol. 5750 */ 5751 case _I_INSERT: 5752 case _I_REMOVE: 5753 /* 5754 * Since there's no compelling reason to support these ioctls 5755 * on sockets, and doing so would increase the complexity 5756 * markedly, prevent it. 5757 */ 5758 return (EOPNOTSUPP); 5759 5760 case I_FIND: 5761 case I_LIST: 5762 case I_LOOK: 5763 case I_POP: 5764 case I_PUSH: 5765 /* 5766 * To prevent races and inconsistencies between the actual 5767 * state of the stream and the state according to the sonode, 5768 * we serialize all operations which modify or operate on the 5769 * list of modules on the socket's stream. 5770 */ 5771 mutex_enter(&sti->sti_plumb_lock); 5772 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5773 mutex_exit(&sti->sti_plumb_lock); 5774 return (error); 5775 5776 default: 5777 if (so->so_version != SOV_STREAM) 5778 break; 5779 5780 /* 5781 * The imaginary "sockmod" has been popped; act as a stream. 5782 */ 5783 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5784 } 5785 5786 ASSERT(so->so_version != SOV_STREAM); 5787 5788 /* 5789 * Process socket-specific ioctls. 5790 */ 5791 switch (cmd) { 5792 case FIONBIO: { 5793 int32_t value; 5794 5795 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5796 (mode & (int)FKIOCTL))) 5797 return (EFAULT); 5798 5799 mutex_enter(&so->so_lock); 5800 if (value) { 5801 so->so_state |= SS_NDELAY; 5802 } else { 5803 so->so_state &= ~SS_NDELAY; 5804 } 5805 mutex_exit(&so->so_lock); 5806 return (0); 5807 } 5808 5809 case FIOASYNC: { 5810 int32_t value; 5811 5812 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5813 (mode & (int)FKIOCTL))) 5814 return (EFAULT); 5815 5816 mutex_enter(&so->so_lock); 5817 /* 5818 * SS_ASYNC flag not already set correctly? 5819 * (!value != !(so->so_state & SS_ASYNC)) 5820 * but some engineers find that too hard to read. 5821 */ 5822 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5823 value != 0 && (so->so_state & SS_ASYNC) == 0) 5824 error = so_flip_async(so, vp, mode, cr); 5825 mutex_exit(&so->so_lock); 5826 return (error); 5827 } 5828 5829 case SIOCSPGRP: 5830 case FIOSETOWN: { 5831 pid_t pgrp; 5832 5833 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5834 (mode & (int)FKIOCTL))) 5835 return (EFAULT); 5836 5837 mutex_enter(&so->so_lock); 5838 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5839 /* Any change? */ 5840 if (pgrp != so->so_pgrp) 5841 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5842 mutex_exit(&so->so_lock); 5843 return (error); 5844 } 5845 case SIOCGPGRP: 5846 case FIOGETOWN: 5847 if (so_copyout(&so->so_pgrp, (void *)arg, 5848 sizeof (pid_t), (mode & (int)FKIOCTL))) 5849 return (EFAULT); 5850 return (0); 5851 5852 case SIOCATMARK: { 5853 int retval; 5854 uint_t so_state; 5855 5856 /* 5857 * strwaitmark has a finite timeout after which it 5858 * returns -1 if the mark state is undetermined. 5859 * In order to avoid any race between the mark state 5860 * in sockfs and the mark state in the stream head this 5861 * routine loops until the mark state can be determined 5862 * (or the urgent data indication has been removed by some 5863 * other thread). 5864 */ 5865 do { 5866 mutex_enter(&so->so_lock); 5867 so_state = so->so_state; 5868 mutex_exit(&so->so_lock); 5869 if (so_state & SS_RCVATMARK) { 5870 retval = 1; 5871 } else if (!(so_state & SS_OOBPEND)) { 5872 /* 5873 * No SIGURG has been generated -- there is no 5874 * pending or present urgent data. Thus can't 5875 * possibly be at the mark. 5876 */ 5877 retval = 0; 5878 } else { 5879 /* 5880 * Have the stream head wait until there is 5881 * either some messages on the read queue, or 5882 * STRATMARK or STRNOTATMARK gets set. The 5883 * STRNOTATMARK flag is used so that the 5884 * transport can send up a MSGNOTMARKNEXT 5885 * M_DATA to indicate that it is not 5886 * at the mark and additional data is not about 5887 * to be send upstream. 5888 * 5889 * If the mark state is undetermined this will 5890 * return -1 and we will loop rechecking the 5891 * socket state. 5892 */ 5893 retval = strwaitmark(vp); 5894 } 5895 } while (retval == -1); 5896 5897 if (so_copyout(&retval, (void *)arg, sizeof (int), 5898 (mode & (int)FKIOCTL))) 5899 return (EFAULT); 5900 return (0); 5901 } 5902 5903 case I_FDINSERT: 5904 case I_SENDFD: 5905 case I_RECVFD: 5906 case I_ATMARK: 5907 case _SIOCSOCKFALLBACK: 5908 /* 5909 * These ioctls do not apply to sockets. I_FDINSERT can be 5910 * used to send M_PROTO messages without modifying the socket 5911 * state. I_SENDFD/RECVFD should not be used for socket file 5912 * descriptor passing since they assume a twisted stream. 5913 * SIOCATMARK must be used instead of I_ATMARK. 5914 * 5915 * _SIOCSOCKFALLBACK from an application should never be 5916 * processed. It is only generated by socktpi_open() or 5917 * in response to I_POP or I_PUSH. 5918 */ 5919 #ifdef DEBUG 5920 zcmn_err(getzoneid(), CE_WARN, 5921 "Unsupported STREAMS ioctl 0x%x on socket. " 5922 "Pid = %d\n", cmd, curproc->p_pid); 5923 #endif /* DEBUG */ 5924 return (EOPNOTSUPP); 5925 5926 case _I_GETPEERCRED: 5927 if ((mode & FKIOCTL) == 0) 5928 return (EINVAL); 5929 5930 mutex_enter(&so->so_lock); 5931 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 5932 error = ENOTSUP; 5933 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 5934 error = ENOTCONN; 5935 } else if (so->so_peercred != NULL) { 5936 k_peercred_t *kp = (k_peercred_t *)arg; 5937 kp->pc_cr = so->so_peercred; 5938 kp->pc_cpid = so->so_cpid; 5939 crhold(so->so_peercred); 5940 } else { 5941 error = EINVAL; 5942 } 5943 mutex_exit(&so->so_lock); 5944 return (error); 5945 5946 default: 5947 /* 5948 * Do the higher-order bits of the ioctl cmd indicate 5949 * that it is an I_* streams ioctl? 5950 */ 5951 if ((cmd & 0xffffff00U) == STR && 5952 so->so_version == SOV_SOCKBSD) { 5953 #ifdef DEBUG 5954 zcmn_err(getzoneid(), CE_WARN, 5955 "Unsupported STREAMS ioctl 0x%x on socket. " 5956 "Pid = %d\n", cmd, curproc->p_pid); 5957 #endif /* DEBUG */ 5958 return (EOPNOTSUPP); 5959 } 5960 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5961 } 5962 } 5963 5964 /* 5965 * Handle plumbing-related ioctls. 5966 */ 5967 static int 5968 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 5969 struct cred *cr, int32_t *rvalp) 5970 { 5971 static const char sockmod_name[] = "sockmod"; 5972 struct sonode *so = VTOSO(vp); 5973 char mname[FMNAMESZ + 1]; 5974 int error; 5975 sotpi_info_t *sti = SOTOTPI(so); 5976 5977 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 5978 5979 if (so->so_version == SOV_SOCKBSD) 5980 return (EOPNOTSUPP); 5981 5982 if (so->so_version == SOV_STREAM) { 5983 /* 5984 * The imaginary "sockmod" has been popped - act as a stream. 5985 * If this is a push of sockmod then change back to a socket. 5986 */ 5987 if (cmd == I_PUSH) { 5988 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 5989 (void *)arg, mname, sizeof (mname), NULL); 5990 5991 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 5992 dprintso(so, 0, ("socktpi_ioctl: going to " 5993 "socket version\n")); 5994 so_stream2sock(so); 5995 return (0); 5996 } 5997 } 5998 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5999 } 6000 6001 switch (cmd) { 6002 case I_PUSH: 6003 if (sti->sti_direct) { 6004 mutex_enter(&so->so_lock); 6005 so_lock_single(so); 6006 mutex_exit(&so->so_lock); 6007 6008 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6009 cr, rvalp); 6010 6011 mutex_enter(&so->so_lock); 6012 if (error == 0) 6013 sti->sti_direct = 0; 6014 so_unlock_single(so, SOLOCKED); 6015 mutex_exit(&so->so_lock); 6016 6017 if (error != 0) 6018 return (error); 6019 } 6020 6021 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6022 if (error == 0) 6023 sti->sti_pushcnt++; 6024 return (error); 6025 6026 case I_POP: 6027 if (sti->sti_pushcnt == 0) { 6028 /* Emulate sockmod being popped */ 6029 dprintso(so, 0, 6030 ("socktpi_ioctl: going to STREAMS version\n")); 6031 return (so_sock2stream(so)); 6032 } 6033 6034 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6035 if (error == 0) 6036 sti->sti_pushcnt--; 6037 return (error); 6038 6039 case I_LIST: { 6040 struct str_mlist *kmlistp, *umlistp; 6041 struct str_list kstrlist; 6042 ssize_t kstrlistsize; 6043 int i, nmods; 6044 6045 STRUCT_DECL(str_list, ustrlist); 6046 STRUCT_INIT(ustrlist, mode); 6047 6048 if (arg == NULL) { 6049 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6050 if (error == 0) 6051 (*rvalp)++; /* Add one for sockmod */ 6052 return (error); 6053 } 6054 6055 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6056 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6057 if (error != 0) 6058 return (error); 6059 6060 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6061 if (nmods <= 0) 6062 return (EINVAL); 6063 /* 6064 * Ceiling nmods at nstrpush to prevent someone from 6065 * maliciously consuming lots of kernel memory. 6066 */ 6067 nmods = MIN(nmods, nstrpush); 6068 6069 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6070 kstrlist.sl_nmods = nmods; 6071 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6072 6073 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6074 cr, rvalp); 6075 if (error != 0) 6076 goto done; 6077 6078 /* 6079 * Considering the module list as a 0-based array of sl_nmods 6080 * modules, sockmod should conceptually exist at slot 6081 * sti_pushcnt. Insert sockmod at this location by sliding all 6082 * of the module names after so_pushcnt over by one. We know 6083 * that there will be room to do this since we allocated 6084 * sl_modlist with an additional slot. 6085 */ 6086 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6087 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6088 6089 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6090 kstrlist.sl_nmods++; 6091 6092 /* 6093 * Copy all of the entries out to ustrlist. 6094 */ 6095 kmlistp = kstrlist.sl_modlist; 6096 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6097 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6098 error = so_copyout(kmlistp++, umlistp++, 6099 sizeof (struct str_mlist), mode & FKIOCTL); 6100 if (error != 0) 6101 goto done; 6102 } 6103 6104 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6105 mode & FKIOCTL); 6106 if (error == 0) 6107 *rvalp = 0; 6108 done: 6109 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6110 return (error); 6111 } 6112 case I_LOOK: 6113 if (sti->sti_pushcnt == 0) { 6114 return (so_copyout(sockmod_name, (void *)arg, 6115 sizeof (sockmod_name), mode & FKIOCTL)); 6116 } 6117 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6118 6119 case I_FIND: 6120 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6121 if (error && error != EINVAL) 6122 return (error); 6123 6124 /* if not found and string was sockmod return 1 */ 6125 if (*rvalp == 0 || error == EINVAL) { 6126 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6127 (void *)arg, mname, sizeof (mname), NULL); 6128 if (error == ENAMETOOLONG) 6129 error = EINVAL; 6130 6131 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6132 *rvalp = 1; 6133 } 6134 return (error); 6135 6136 default: 6137 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6138 break; 6139 } 6140 6141 return (0); 6142 } 6143 6144 /* 6145 * Wrapper around the streams poll routine that implements socket poll 6146 * semantics. 6147 * The sockfs never calls pollwakeup itself - the stream head take care 6148 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6149 * stream head there can never be a deadlock due to holding so_lock across 6150 * pollwakeup and acquiring so_lock in this routine. 6151 * 6152 * However, since the performance of VOP_POLL is critical we avoid 6153 * acquiring so_lock here. This is based on two assumptions: 6154 * - The poll implementation holds locks to serialize the VOP_POLL call 6155 * and a pollwakeup for the same pollhead. This ensures that should 6156 * e.g. so_state change during a socktpi_poll call the pollwakeup 6157 * (which strsock_* and strrput conspire to issue) is issued after 6158 * the state change. Thus the pollwakeup will block until VOP_POLL has 6159 * returned and then wake up poll and have it call VOP_POLL again. 6160 * - The reading of so_state without holding so_lock does not result in 6161 * stale data that is older than the latest state change that has dropped 6162 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6163 * memory barrier to force the data into the coherency domain. 6164 */ 6165 static int 6166 sotpi_poll( 6167 struct sonode *so, 6168 short events, 6169 int anyyet, 6170 short *reventsp, 6171 struct pollhead **phpp) 6172 { 6173 short origevents = events; 6174 struct vnode *vp = SOTOV(so); 6175 int error; 6176 int so_state = so->so_state; /* snapshot */ 6177 sotpi_info_t *sti = SOTOTPI(so); 6178 6179 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6180 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6181 6182 ASSERT(vp->v_type == VSOCK); 6183 ASSERT(vp->v_stream != NULL); 6184 6185 if (so->so_version == SOV_STREAM) { 6186 /* The imaginary "sockmod" has been popped - act as a stream */ 6187 return (strpoll(vp->v_stream, events, anyyet, 6188 reventsp, phpp)); 6189 } 6190 6191 if (!(so_state & SS_ISCONNECTED) && 6192 (so->so_mode & SM_CONNREQUIRED)) { 6193 /* Not connected yet - turn off write side events */ 6194 events &= ~(POLLOUT|POLLWRBAND); 6195 } 6196 /* 6197 * Check for errors without calling strpoll if the caller wants them. 6198 * In sockets the errors are represented as input/output events 6199 * and there is no need to ask the stream head for this information. 6200 */ 6201 if (so->so_error != 0 && 6202 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6203 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6204 return (0); 6205 } 6206 /* 6207 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6208 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6209 * will not trigger a POLLIN event with POLLRDDATA set. 6210 * The handling of urgent data (causing POLLRDBAND) is done by 6211 * inspecting SS_OOBPEND below. 6212 */ 6213 events |= POLLRDDATA; 6214 6215 /* 6216 * After shutdown(output) a stream head write error is set. 6217 * However, we should not return output events. 6218 */ 6219 events |= POLLNOERR; 6220 error = strpoll(vp->v_stream, events, anyyet, 6221 reventsp, phpp); 6222 if (error) 6223 return (error); 6224 6225 ASSERT(!(*reventsp & POLLERR)); 6226 6227 /* 6228 * Notes on T_CONN_IND handling for sockets. 6229 * 6230 * If strpoll() returned without events, SR_POLLIN is guaranteed 6231 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6232 * 6233 * Since the so_lock is not held, soqueueconnind() may have run 6234 * and a T_CONN_IND may be waiting. We now check for any queued 6235 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6236 * to ensure poll returns. 6237 * 6238 * However: 6239 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6240 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6241 * the following actions will occur; taken together they ensure the 6242 * syscall will return. 6243 * 6244 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6245 * the accept() was run on a non-blocking socket sowaitconnind() 6246 * may have already returned EWOULDBLOCK, so not be waiting to 6247 * process the message. Additionally socktpi_poll() has probably 6248 * proceeded past the sti_conn_ind_head check below. 6249 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6250 * this thread, however that could occur before poll_common() 6251 * has entered cv_wait. 6252 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6253 * 6254 * Before proceeding to cv_wait() in poll_common() for an event, 6255 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6256 * and if set, re-calls strpoll() to ensure the late arriving 6257 * T_CONN_IND is recognized, and pollsys() returns. 6258 */ 6259 6260 if (sti->sti_conn_ind_head != NULL) 6261 *reventsp |= (POLLIN|POLLRDNORM) & events; 6262 6263 if (so->so_state & SS_OOBPEND) 6264 *reventsp |= POLLRDBAND & events; 6265 6266 if (sti->sti_nl7c_rcv_mp != NULL) { 6267 *reventsp |= (POLLIN|POLLRDNORM) & events; 6268 } 6269 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6270 ((POLLIN|POLLRDNORM) & *reventsp)) { 6271 sti->sti_nl7c_flags |= NL7C_POLLIN; 6272 } 6273 6274 return (0); 6275 } 6276 6277 /*ARGSUSED*/ 6278 static int 6279 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6280 { 6281 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6282 int error = 0; 6283 6284 error = sonode_constructor(buf, cdrarg, kmflags); 6285 if (error != 0) 6286 return (error); 6287 6288 error = i_sotpi_info_constructor(&st->st_info); 6289 if (error != 0) 6290 sonode_destructor(buf, cdrarg); 6291 6292 st->st_sonode.so_priv = &st->st_info; 6293 6294 return (error); 6295 } 6296 6297 /*ARGSUSED1*/ 6298 static void 6299 socktpi_destructor(void *buf, void *cdrarg) 6300 { 6301 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6302 6303 ASSERT(st->st_sonode.so_priv == &st->st_info); 6304 st->st_sonode.so_priv = NULL; 6305 6306 i_sotpi_info_destructor(&st->st_info); 6307 sonode_destructor(buf, cdrarg); 6308 } 6309 6310 static int 6311 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6312 { 6313 int retval; 6314 6315 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6316 struct sonode *so = (struct sonode *)buf; 6317 sotpi_info_t *sti = SOTOTPI(so); 6318 6319 mutex_enter(&socklist.sl_lock); 6320 6321 sti->sti_next_so = socklist.sl_list; 6322 sti->sti_prev_so = NULL; 6323 if (sti->sti_next_so != NULL) 6324 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6325 socklist.sl_list = so; 6326 6327 mutex_exit(&socklist.sl_lock); 6328 6329 } 6330 return (retval); 6331 } 6332 6333 static void 6334 socktpi_unix_destructor(void *buf, void *cdrarg) 6335 { 6336 struct sonode *so = (struct sonode *)buf; 6337 sotpi_info_t *sti = SOTOTPI(so); 6338 6339 mutex_enter(&socklist.sl_lock); 6340 6341 if (sti->sti_next_so != NULL) 6342 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6343 if (sti->sti_prev_so != NULL) 6344 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6345 else 6346 socklist.sl_list = sti->sti_next_so; 6347 6348 mutex_exit(&socklist.sl_lock); 6349 6350 socktpi_destructor(buf, cdrarg); 6351 } 6352 6353 int 6354 socktpi_init(void) 6355 { 6356 /* 6357 * Create sonode caches. We create a special one for AF_UNIX so 6358 * that we can track them for netstat(1m). 6359 */ 6360 socktpi_cache = kmem_cache_create("socktpi_cache", 6361 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6362 socktpi_destructor, NULL, NULL, NULL, 0); 6363 6364 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6365 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6366 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6367 6368 return (0); 6369 } 6370 6371 /* 6372 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6373 * 6374 * Caller must still update state and mode using sotpi_update_state(). 6375 */ 6376 int 6377 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6378 boolean_t *direct, queue_t **qp, struct cred *cr) 6379 { 6380 sotpi_info_t *sti; 6381 struct sockparams *origsp = so->so_sockparams; 6382 sock_lower_handle_t handle = so->so_proto_handle; 6383 struct stdata *stp; 6384 struct vnode *vp; 6385 queue_t *q; 6386 int error = 0; 6387 6388 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6389 SS_FALLBACK_PENDING); 6390 ASSERT(SOCK_IS_NONSTR(so)); 6391 6392 *qp = NULL; 6393 *direct = B_FALSE; 6394 so->so_sockparams = newsp; 6395 /* 6396 * Allocate and initalize fields required by TPI. 6397 */ 6398 (void) sotpi_info_create(so, KM_SLEEP); 6399 sotpi_info_init(so); 6400 6401 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6402 sotpi_info_fini(so); 6403 sotpi_info_destroy(so); 6404 return (error); 6405 } 6406 ASSERT(handle == so->so_proto_handle); 6407 sti = SOTOTPI(so); 6408 if (sti->sti_direct != 0) 6409 *direct = B_TRUE; 6410 6411 /* 6412 * Keep the original sp around so we can properly dispose of the 6413 * sonode when the socket is being closed. 6414 */ 6415 sti->sti_orig_sp = origsp; 6416 6417 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6418 so_alloc_addr(so, so->so_max_addr_len); 6419 6420 /* 6421 * If the application has done a SIOCSPGRP, make sure the 6422 * STREAM head is aware. This needs to take place before 6423 * the protocol start sending up messages. Otherwise we 6424 * might miss to generate SIGPOLL. 6425 * 6426 * It is possible that the application will receive duplicate 6427 * signals if some were already generated for either data or 6428 * connection indications. 6429 */ 6430 if (so->so_pgrp != 0) { 6431 if (so_set_events(so, so->so_vnode, cr) != 0) 6432 so->so_pgrp = 0; 6433 } 6434 6435 /* 6436 * Determine which queue to use. 6437 */ 6438 vp = SOTOV(so); 6439 stp = vp->v_stream; 6440 ASSERT(stp != NULL); 6441 q = stp->sd_wrq->q_next; 6442 6443 /* 6444 * Skip any modules that may have been auto pushed when the device 6445 * was opened 6446 */ 6447 while (q->q_next != NULL) 6448 q = q->q_next; 6449 *qp = _RD(q); 6450 6451 /* This is now a STREAMS sockets */ 6452 so->so_not_str = B_FALSE; 6453 6454 return (error); 6455 } 6456 6457 /* 6458 * Revert a TPI sonode. It is only allowed to revert the sonode during 6459 * the fallback process. 6460 */ 6461 void 6462 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6463 { 6464 vnode_t *vp = SOTOV(so); 6465 6466 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6467 SS_FALLBACK_PENDING); 6468 ASSERT(!SOCK_IS_NONSTR(so)); 6469 ASSERT(vp->v_stream != NULL); 6470 6471 strclean(vp); 6472 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6473 6474 /* 6475 * Restore the original sockparams. The caller is responsible for 6476 * dropping the ref to the new sp. 6477 */ 6478 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6479 6480 sotpi_info_fini(so); 6481 sotpi_info_destroy(so); 6482 6483 /* This is no longer a STREAMS sockets */ 6484 so->so_not_str = B_TRUE; 6485 } 6486 6487 void 6488 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6489 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6490 socklen_t faddrlen, short opts) 6491 { 6492 sotpi_info_t *sti = SOTOTPI(so); 6493 6494 so_proc_tcapability_ack(so, tcap); 6495 6496 so->so_options |= opts; 6497 6498 /* 6499 * Determine whether the foreign and local address are valid 6500 */ 6501 if (laddrlen != 0) { 6502 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6503 sti->sti_laddr_len = laddrlen; 6504 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6505 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6506 } 6507 6508 if (faddrlen != 0) { 6509 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6510 sti->sti_faddr_len = faddrlen; 6511 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6512 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6513 } 6514 6515 } 6516 6517 /* 6518 * Allocate enough space to cache the local and foreign addresses. 6519 */ 6520 void 6521 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6522 { 6523 sotpi_info_t *sti = SOTOTPI(so); 6524 6525 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6526 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6527 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6528 P2ROUNDUP(maxlen, KMEM_ALIGN); 6529 so->so_max_addr_len = sti->sti_laddr_maxlen; 6530 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6531 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6532 + sti->sti_laddr_maxlen); 6533 6534 if (so->so_family == AF_UNIX) { 6535 /* 6536 * Initialize AF_UNIX related fields. 6537 */ 6538 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6539 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6540 } 6541 } 6542 6543 6544 sotpi_info_t * 6545 sotpi_sototpi(struct sonode *so) 6546 { 6547 sotpi_info_t *sti; 6548 6549 ASSERT(so != NULL); 6550 6551 sti = (sotpi_info_t *)so->so_priv; 6552 6553 ASSERT(sti != NULL); 6554 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6555 6556 return (sti); 6557 } 6558 6559 static int 6560 i_sotpi_info_constructor(sotpi_info_t *sti) 6561 { 6562 sti->sti_magic = SOTPI_INFO_MAGIC; 6563 sti->sti_ack_mp = NULL; 6564 sti->sti_discon_ind_mp = NULL; 6565 sti->sti_ux_bound_vp = NULL; 6566 sti->sti_unbind_mp = NULL; 6567 6568 sti->sti_conn_ind_head = NULL; 6569 sti->sti_conn_ind_tail = NULL; 6570 6571 sti->sti_laddr_sa = NULL; 6572 sti->sti_faddr_sa = NULL; 6573 6574 sti->sti_nl7c_flags = 0; 6575 sti->sti_nl7c_uri = NULL; 6576 sti->sti_nl7c_rcv_mp = NULL; 6577 6578 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6579 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6580 6581 return (0); 6582 } 6583 6584 static void 6585 i_sotpi_info_destructor(sotpi_info_t *sti) 6586 { 6587 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6588 ASSERT(sti->sti_ack_mp == NULL); 6589 ASSERT(sti->sti_discon_ind_mp == NULL); 6590 ASSERT(sti->sti_ux_bound_vp == NULL); 6591 ASSERT(sti->sti_unbind_mp == NULL); 6592 6593 ASSERT(sti->sti_conn_ind_head == NULL); 6594 ASSERT(sti->sti_conn_ind_tail == NULL); 6595 6596 ASSERT(sti->sti_laddr_sa == NULL); 6597 ASSERT(sti->sti_faddr_sa == NULL); 6598 6599 ASSERT(sti->sti_nl7c_flags == 0); 6600 ASSERT(sti->sti_nl7c_uri == NULL); 6601 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6602 6603 mutex_destroy(&sti->sti_plumb_lock); 6604 cv_destroy(&sti->sti_ack_cv); 6605 } 6606 6607 /* 6608 * Creates and attaches TPI information to the given sonode 6609 */ 6610 static boolean_t 6611 sotpi_info_create(struct sonode *so, int kmflags) 6612 { 6613 sotpi_info_t *sti; 6614 6615 ASSERT(so->so_priv == NULL); 6616 6617 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6618 return (B_FALSE); 6619 6620 if (i_sotpi_info_constructor(sti) != 0) { 6621 kmem_free(sti, sizeof (*sti)); 6622 return (B_FALSE); 6623 } 6624 6625 so->so_priv = (void *)sti; 6626 return (B_TRUE); 6627 } 6628 6629 /* 6630 * Initializes the TPI information. 6631 */ 6632 static void 6633 sotpi_info_init(struct sonode *so) 6634 { 6635 struct vnode *vp = SOTOV(so); 6636 sotpi_info_t *sti = SOTOTPI(so); 6637 time_t now; 6638 6639 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6640 vp->v_rdev = sti->sti_dev; 6641 6642 sti->sti_orig_sp = NULL; 6643 6644 sti->sti_pushcnt = 0; 6645 6646 now = gethrestime_sec(); 6647 sti->sti_atime = now; 6648 sti->sti_mtime = now; 6649 sti->sti_ctime = now; 6650 6651 sti->sti_eaddr_mp = NULL; 6652 sti->sti_delayed_error = 0; 6653 6654 sti->sti_provinfo = NULL; 6655 6656 sti->sti_oobcnt = 0; 6657 sti->sti_oobsigcnt = 0; 6658 6659 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6660 6661 sti->sti_laddr_sa = 0; 6662 sti->sti_faddr_sa = 0; 6663 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6664 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6665 6666 sti->sti_laddr_valid = 0; 6667 sti->sti_faddr_valid = 0; 6668 sti->sti_faddr_noxlate = 0; 6669 6670 sti->sti_direct = 0; 6671 6672 ASSERT(sti->sti_ack_mp == NULL); 6673 ASSERT(sti->sti_ux_bound_vp == NULL); 6674 ASSERT(sti->sti_unbind_mp == NULL); 6675 6676 ASSERT(sti->sti_conn_ind_head == NULL); 6677 ASSERT(sti->sti_conn_ind_tail == NULL); 6678 } 6679 6680 /* 6681 * Given a sonode, grab the TPI info and free any data. 6682 */ 6683 static void 6684 sotpi_info_fini(struct sonode *so) 6685 { 6686 sotpi_info_t *sti = SOTOTPI(so); 6687 mblk_t *mp; 6688 6689 ASSERT(sti->sti_discon_ind_mp == NULL); 6690 6691 if ((mp = sti->sti_conn_ind_head) != NULL) { 6692 mblk_t *mp1; 6693 6694 while (mp) { 6695 mp1 = mp->b_next; 6696 mp->b_next = NULL; 6697 freemsg(mp); 6698 mp = mp1; 6699 } 6700 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6701 } 6702 6703 /* 6704 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6705 * indirect them. It also uses so_count as a validity test. 6706 */ 6707 mutex_enter(&so->so_lock); 6708 6709 if (sti->sti_laddr_sa) { 6710 ASSERT((caddr_t)sti->sti_faddr_sa == 6711 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6712 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6713 sti->sti_laddr_valid = 0; 6714 sti->sti_faddr_valid = 0; 6715 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6716 sti->sti_laddr_sa = NULL; 6717 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6718 sti->sti_faddr_sa = NULL; 6719 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6720 } 6721 6722 mutex_exit(&so->so_lock); 6723 6724 if ((mp = sti->sti_eaddr_mp) != NULL) { 6725 freemsg(mp); 6726 sti->sti_eaddr_mp = NULL; 6727 sti->sti_delayed_error = 0; 6728 } 6729 6730 if ((mp = sti->sti_ack_mp) != NULL) { 6731 freemsg(mp); 6732 sti->sti_ack_mp = NULL; 6733 } 6734 6735 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6736 sti->sti_nl7c_rcv_mp = NULL; 6737 freemsg(mp); 6738 } 6739 sti->sti_nl7c_rcv_rval = 0; 6740 if (sti->sti_nl7c_uri != NULL) { 6741 nl7c_urifree(so); 6742 /* urifree() cleared nl7c_uri */ 6743 } 6744 if (sti->sti_nl7c_flags) { 6745 sti->sti_nl7c_flags = 0; 6746 } 6747 6748 ASSERT(sti->sti_ux_bound_vp == NULL); 6749 if ((mp = sti->sti_unbind_mp) != NULL) { 6750 freemsg(mp); 6751 sti->sti_unbind_mp = NULL; 6752 } 6753 } 6754 6755 /* 6756 * Destroys the TPI information attached to a sonode. 6757 */ 6758 static void 6759 sotpi_info_destroy(struct sonode *so) 6760 { 6761 sotpi_info_t *sti = SOTOTPI(so); 6762 6763 i_sotpi_info_destructor(sti); 6764 kmem_free(sti, sizeof (*sti)); 6765 6766 so->so_priv = NULL; 6767 } 6768 6769 /* 6770 * Create the global sotpi socket module entry. It will never be freed. 6771 */ 6772 smod_info_t * 6773 sotpi_smod_create(void) 6774 { 6775 smod_info_t *smodp; 6776 6777 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6778 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6779 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6780 /* 6781 * Initialize the smod_refcnt to 1 so it will never be freed. 6782 */ 6783 smodp->smod_refcnt = 1; 6784 smodp->smod_uc_version = SOCK_UC_VERSION; 6785 smodp->smod_dc_version = SOCK_DC_VERSION; 6786 smodp->smod_sock_create_func = &sotpi_create; 6787 smodp->smod_sock_destroy_func = &sotpi_destroy; 6788 return (smodp); 6789 } 6790