1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015, Joyent, Inc. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/kmem_impl.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/suntpi.h> 51 #include <sys/ddi.h> 52 #include <sys/esunddi.h> 53 #include <sys/flock.h> 54 #include <sys/modctl.h> 55 #include <sys/vtrace.h> 56 #include <sys/cmn_err.h> 57 #include <sys/pathname.h> 58 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <fs/sockfs/sockcommon.h> 85 #include <fs/sockfs/socktpi.h> 86 #include <fs/sockfs/socktpi_impl.h> 87 88 /* 89 * Possible failures when memory can't be allocated. The documented behavior: 90 * 91 * 5.5: 4.X: XNET: 92 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 93 * EINTR 94 * (4.X does not document EINTR but returns it) 95 * bind: ENOSR - ENOBUFS/ENOSR 96 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 97 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 98 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 99 * (4.X getpeername and getsockname do not fail in practice) 100 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 101 * listen: - - ENOBUFS 102 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 107 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 108 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 109 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 110 * 111 * Resolution. When allocation fails: 112 * recv: return EINTR 113 * send: return EINTR 114 * connect, accept: EINTR 115 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 116 * socket, socketpair: ENOBUFS 117 * getpeername, getsockname: sleep 118 * getsockopt, setsockopt: sleep 119 */ 120 121 #ifdef SOCK_TEST 122 /* 123 * Variables that make sockfs do something other than the standard TPI 124 * for the AF_INET transports. 125 * 126 * solisten_tpi_tcp: 127 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 128 * the transport is already bound. This is needed to avoid loosing the 129 * port number should listen() do a T_UNBIND_REQ followed by a 130 * O_T_BIND_REQ. 131 * 132 * soconnect_tpi_udp: 133 * UDP and ICMP can handle a T_CONN_REQ. 134 * This is needed to make the sequence of connect(), getsockname() 135 * return the local IP address used to send packets to the connected to 136 * destination. 137 * 138 * soconnect_tpi_tcp: 139 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 140 * Set this to non-zero to send TPI conformant messages to TCP in this 141 * respect. This is a performance optimization. 142 * 143 * soaccept_tpi_tcp: 144 * TCP can handle a T_CONN_REQ without the acceptor being bound. 145 * This is a performance optimization that has been picked up in XTI. 146 * 147 * soaccept_tpi_multioptions: 148 * When inheriting SOL_SOCKET options from the listener to the accepting 149 * socket send them as a single message for AF_INET{,6}. 150 */ 151 int solisten_tpi_tcp = 0; 152 int soconnect_tpi_udp = 0; 153 int soconnect_tpi_tcp = 0; 154 int soaccept_tpi_tcp = 0; 155 int soaccept_tpi_multioptions = 1; 156 #else /* SOCK_TEST */ 157 #define soconnect_tpi_tcp 0 158 #define soconnect_tpi_udp 0 159 #define solisten_tpi_tcp 0 160 #define soaccept_tpi_tcp 0 161 #define soaccept_tpi_multioptions 1 162 #endif /* SOCK_TEST */ 163 164 #ifdef SOCK_TEST 165 extern int do_useracc; 166 extern clock_t sock_test_timelimit; 167 #endif /* SOCK_TEST */ 168 169 extern uint32_t ucredsize; 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 static int sotpi_unbind(struct sonode *, int); 203 204 /* TPI sockfs sonode operations */ 205 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 206 int); 207 static int sotpi_accept(struct sonode *, int, struct cred *, 208 struct sonode **); 209 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 210 int, struct cred *); 211 static int sotpi_listen(struct sonode *, int, struct cred *); 212 static int sotpi_connect(struct sonode *, struct sockaddr *, 213 socklen_t, int, int, struct cred *); 214 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 215 struct uio *, struct cred *); 216 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 217 struct uio *, struct cred *); 218 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 219 struct cred *, mblk_t **); 220 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 221 struct uio *, void *, t_uscalar_t, int); 222 static int sodgram_direct(struct sonode *, struct sockaddr *, 223 socklen_t, struct uio *, int); 224 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 225 socklen_t *, boolean_t, struct cred *); 226 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 227 socklen_t *, struct cred *); 228 static int sotpi_shutdown(struct sonode *, int, struct cred *); 229 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 230 socklen_t *, int, struct cred *); 231 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 232 socklen_t, struct cred *); 233 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 234 int32_t *); 235 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 236 struct cred *, int32_t *); 237 static int sotpi_poll(struct sonode *, short, int, short *, 238 struct pollhead **); 239 static int sotpi_close(struct sonode *, int, struct cred *); 240 241 static int i_sotpi_info_constructor(sotpi_info_t *); 242 static void i_sotpi_info_destructor(sotpi_info_t *); 243 244 sonodeops_t sotpi_sonodeops = { 245 sotpi_init, /* sop_init */ 246 sotpi_accept, /* sop_accept */ 247 sotpi_bind, /* sop_bind */ 248 sotpi_listen, /* sop_listen */ 249 sotpi_connect, /* sop_connect */ 250 sotpi_recvmsg, /* sop_recvmsg */ 251 sotpi_sendmsg, /* sop_sendmsg */ 252 sotpi_sendmblk, /* sop_sendmblk */ 253 sotpi_getpeername, /* sop_getpeername */ 254 sotpi_getsockname, /* sop_getsockname */ 255 sotpi_shutdown, /* sop_shutdown */ 256 sotpi_getsockopt, /* sop_getsockopt */ 257 sotpi_setsockopt, /* sop_setsockopt */ 258 sotpi_ioctl, /* sop_ioctl */ 259 sotpi_poll, /* sop_poll */ 260 sotpi_close, /* sop_close */ 261 }; 262 263 /* 264 * Return a TPI socket vnode. 265 * 266 * Note that sockets assume that the driver will clone (either itself 267 * or by using the clone driver) i.e. a socket() call will always 268 * result in a new vnode being created. 269 */ 270 271 /* 272 * Common create code for socket and accept. If tso is set the values 273 * from that node is used instead of issuing a T_INFO_REQ. 274 */ 275 276 /* ARGSUSED */ 277 static struct sonode * 278 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 279 int version, int sflags, int *errorp, cred_t *cr) 280 { 281 struct sonode *so; 282 kmem_cache_t *cp; 283 int sfamily = family; 284 285 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 286 287 if (family == AF_NCA) { 288 /* 289 * The request is for an NCA socket so for NL7C use the 290 * INET domain instead and mark NL7C_AF_NCA below. 291 */ 292 family = AF_INET; 293 /* 294 * NL7C is not supported in the non-global zone, 295 * we enforce this restriction here. 296 */ 297 if (getzoneid() != GLOBAL_ZONEID) { 298 *errorp = ENOTSUP; 299 return (NULL); 300 } 301 } 302 303 /* 304 * to be compatible with old tpi socket implementation ignore 305 * sleep flag (sflags) passed in 306 */ 307 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 308 so = kmem_cache_alloc(cp, KM_SLEEP); 309 if (so == NULL) { 310 *errorp = ENOMEM; 311 return (NULL); 312 } 313 314 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 315 sotpi_info_init(so); 316 317 if (sfamily == AF_NCA) { 318 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 319 } 320 321 if (version == SOV_DEFAULT) 322 version = so_default_version; 323 324 so->so_version = (short)version; 325 *errorp = 0; 326 327 return (so); 328 } 329 330 static void 331 sotpi_destroy(struct sonode *so) 332 { 333 kmem_cache_t *cp; 334 struct sockparams *origsp; 335 336 /* 337 * If there is a new dealloc function (ie. smod_destroy_func), 338 * then it should check the correctness of the ops. 339 */ 340 341 ASSERT(so->so_ops == &sotpi_sonodeops); 342 343 origsp = SOTOTPI(so)->sti_orig_sp; 344 345 sotpi_info_fini(so); 346 347 if (so->so_state & SS_FALLBACK_COMP) { 348 /* 349 * A fallback happend, which means that a sotpi_info_t struct 350 * was allocated (as opposed to being allocated from the TPI 351 * sonode cache. Therefore we explicitly free the struct 352 * here. 353 */ 354 sotpi_info_destroy(so); 355 ASSERT(origsp != NULL); 356 357 origsp->sp_smod_info->smod_sock_destroy_func(so); 358 SOCKPARAMS_DEC_REF(origsp); 359 } else { 360 sonode_fini(so); 361 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 362 socktpi_cache; 363 kmem_cache_free(cp, so); 364 } 365 } 366 367 /* ARGSUSED1 */ 368 int 369 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 370 { 371 major_t maj; 372 dev_t newdev; 373 struct vnode *vp; 374 int error = 0; 375 struct stdata *stp; 376 377 sotpi_info_t *sti = SOTOTPI(so); 378 379 dprint(1, ("sotpi_init()\n")); 380 381 /* 382 * over write the sleep flag passed in but that is ok 383 * as tpi socket does not honor sleep flag. 384 */ 385 flags |= FREAD|FWRITE; 386 387 /* 388 * Record in so_flag that it is a clone. 389 */ 390 if (getmajor(sti->sti_dev) == clone_major) 391 so->so_flag |= SOCLONE; 392 393 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 394 (so->so_family == AF_INET || so->so_family == AF_INET6) && 395 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 396 so->so_protocol == IPPROTO_IP)) { 397 /* Tell tcp or udp that it's talking to sockets */ 398 flags |= SO_SOCKSTR; 399 400 /* 401 * Here we indicate to socktpi_open() our attempt to 402 * make direct calls between sockfs and transport. 403 * The final decision is left to socktpi_open(). 404 */ 405 sti->sti_direct = 1; 406 407 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 408 if (so->so_type == SOCK_STREAM && tso != NULL) { 409 if (SOTOTPI(tso)->sti_direct) { 410 /* 411 * Inherit sti_direct from listener and pass 412 * SO_ACCEPTOR open flag to tcp, indicating 413 * that this is an accept fast-path instance. 414 */ 415 flags |= SO_ACCEPTOR; 416 } else { 417 /* 418 * sti_direct is not set on listener, meaning 419 * that the listener has been converted from 420 * a socket to a stream. Ensure that the 421 * acceptor inherits these settings. 422 */ 423 sti->sti_direct = 0; 424 flags &= ~SO_SOCKSTR; 425 } 426 } 427 } 428 429 /* 430 * Tell local transport that it is talking to sockets. 431 */ 432 if (so->so_family == AF_UNIX) { 433 flags |= SO_SOCKSTR; 434 } 435 436 vp = SOTOV(so); 437 newdev = vp->v_rdev; 438 maj = getmajor(newdev); 439 ASSERT(STREAMSTAB(maj)); 440 441 error = stropen(vp, &newdev, flags, cr); 442 443 stp = vp->v_stream; 444 if (error == 0) { 445 if (so->so_flag & SOCLONE) 446 ASSERT(newdev != vp->v_rdev); 447 mutex_enter(&so->so_lock); 448 sti->sti_dev = newdev; 449 vp->v_rdev = newdev; 450 mutex_exit(&so->so_lock); 451 452 if (stp->sd_flag & STRISTTY) { 453 /* 454 * this is a post SVR4 tty driver - a socket can not 455 * be a controlling terminal. Fail the open. 456 */ 457 (void) sotpi_close(so, flags, cr); 458 return (ENOTTY); /* XXX */ 459 } 460 461 ASSERT(stp->sd_wrq != NULL); 462 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 463 464 /* 465 * If caller is interested in doing direct function call 466 * interface to/from transport module, probe the module 467 * directly beneath the streamhead to see if it qualifies. 468 * 469 * We turn off the direct interface when qualifications fail. 470 * In the acceptor case, we simply turn off the sti_direct 471 * flag on the socket. We do the fallback after the accept 472 * has completed, before the new socket is returned to the 473 * application. 474 */ 475 if (sti->sti_direct) { 476 queue_t *tq = stp->sd_wrq->q_next; 477 478 /* 479 * sti_direct is currently supported and tested 480 * only for tcp/udp; this is the main reason to 481 * have the following assertions. 482 */ 483 ASSERT(so->so_family == AF_INET || 484 so->so_family == AF_INET6); 485 ASSERT(so->so_protocol == IPPROTO_UDP || 486 so->so_protocol == IPPROTO_TCP || 487 so->so_protocol == IPPROTO_IP); 488 ASSERT(so->so_type == SOCK_DGRAM || 489 so->so_type == SOCK_STREAM); 490 491 /* 492 * Abort direct call interface if the module directly 493 * underneath the stream head is not defined with the 494 * _D_DIRECT flag. This could happen in the tcp or 495 * udp case, when some other module is autopushed 496 * above it, or for some reasons the expected module 497 * isn't purely D_MP (which is the main requirement). 498 */ 499 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 500 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 501 int rval; 502 503 /* Continue on without direct calls */ 504 sti->sti_direct = 0; 505 506 /* 507 * Cannot issue ioctl on fallback socket since 508 * there is no conn associated with the queue. 509 * The fallback downcall will notify the proto 510 * of the change. 511 */ 512 if (!(flags & SO_ACCEPTOR) && 513 !(flags & SO_FALLBACK)) { 514 if ((error = strioctl(vp, 515 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 516 cr, &rval)) != 0) { 517 (void) sotpi_close(so, flags, 518 cr); 519 return (error); 520 } 521 } 522 } 523 } 524 525 if (flags & SO_FALLBACK) { 526 /* 527 * The stream created does not have a conn. 528 * do stream set up after conn has been assigned 529 */ 530 return (error); 531 } 532 if (error = so_strinit(so, tso)) { 533 (void) sotpi_close(so, flags, cr); 534 return (error); 535 } 536 537 /* Enable sendfile() on AF_UNIX streams */ 538 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) { 539 mutex_enter(&so->so_lock); 540 so->so_mode |= SM_SENDFILESUPP; 541 mutex_exit(&so->so_lock); 542 } 543 544 /* Wildcard */ 545 if (so->so_protocol != so->so_sockparams->sp_protocol) { 546 int protocol = so->so_protocol; 547 /* 548 * Issue SO_PROTOTYPE setsockopt. 549 */ 550 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 551 &protocol, (t_uscalar_t)sizeof (protocol), cr); 552 if (error != 0) { 553 (void) sotpi_close(so, flags, cr); 554 /* 555 * Setsockopt often fails with ENOPROTOOPT but 556 * socket() should fail with 557 * EPROTONOSUPPORT/EPROTOTYPE. 558 */ 559 return (EPROTONOSUPPORT); 560 } 561 } 562 563 } else { 564 /* 565 * While the same socket can not be reopened (unlike specfs) 566 * the stream head sets STREOPENFAIL when the autopush fails. 567 */ 568 if ((stp != NULL) && 569 (stp->sd_flag & STREOPENFAIL)) { 570 /* 571 * Open failed part way through. 572 */ 573 mutex_enter(&stp->sd_lock); 574 stp->sd_flag &= ~STREOPENFAIL; 575 mutex_exit(&stp->sd_lock); 576 (void) sotpi_close(so, flags, cr); 577 return (error); 578 /*NOTREACHED*/ 579 } 580 ASSERT(stp == NULL); 581 } 582 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 583 "sockfs open:maj %d vp %p so %p error %d", 584 maj, vp, so, error); 585 return (error); 586 } 587 588 /* 589 * Bind the socket to an unspecified address in sockfs only. 590 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 591 * required in all cases. 592 */ 593 static void 594 so_automatic_bind(struct sonode *so) 595 { 596 sotpi_info_t *sti = SOTOTPI(so); 597 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 598 599 ASSERT(MUTEX_HELD(&so->so_lock)); 600 ASSERT(!(so->so_state & SS_ISBOUND)); 601 ASSERT(sti->sti_unbind_mp); 602 603 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 604 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 605 sti->sti_laddr_sa->sa_family = so->so_family; 606 so->so_state |= SS_ISBOUND; 607 } 608 609 610 /* 611 * bind the socket. 612 * 613 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 614 * are passed in we allow rebinding. Note that for backwards compatibility 615 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 616 * Thus the rebinding code is currently not executed. 617 * 618 * The constraints for rebinding are: 619 * - it is a SOCK_DGRAM, or 620 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 621 * and no listen() has been done. 622 * This rebinding code was added based on some language in the XNET book 623 * about not returning EINVAL it the protocol allows rebinding. However, 624 * this language is not present in the Posix socket draft. Thus maybe the 625 * rebinding logic should be deleted from the source. 626 * 627 * A null "name" can be used to unbind the socket if: 628 * - it is a SOCK_DGRAM, or 629 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 630 * and no listen() has been done. 631 */ 632 /* ARGSUSED */ 633 static int 634 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 635 socklen_t namelen, int backlog, int flags, struct cred *cr) 636 { 637 struct T_bind_req bind_req; 638 struct T_bind_ack *bind_ack; 639 int error = 0; 640 mblk_t *mp; 641 void *addr; 642 t_uscalar_t addrlen; 643 int unbind_on_err = 1; 644 boolean_t clear_acceptconn_on_err = B_FALSE; 645 boolean_t restore_backlog_on_err = B_FALSE; 646 int save_so_backlog; 647 t_scalar_t PRIM_type = O_T_BIND_REQ; 648 boolean_t tcp_udp_xport; 649 void *nl7c = NULL; 650 sotpi_info_t *sti = SOTOTPI(so); 651 652 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 653 (void *)so, (void *)name, namelen, backlog, flags, 654 pr_state(so->so_state, so->so_mode))); 655 656 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 657 658 if (!(flags & _SOBIND_LOCK_HELD)) { 659 mutex_enter(&so->so_lock); 660 so_lock_single(so); /* Set SOLOCKED */ 661 } else { 662 ASSERT(MUTEX_HELD(&so->so_lock)); 663 ASSERT(so->so_flag & SOLOCKED); 664 } 665 666 /* 667 * Make sure that there is a preallocated unbind_req message 668 * before binding. This message allocated when the socket is 669 * created but it might be have been consumed. 670 */ 671 if (sti->sti_unbind_mp == NULL) { 672 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 673 /* NOTE: holding so_lock while sleeping */ 674 sti->sti_unbind_mp = 675 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 676 cr); 677 } 678 679 if (flags & _SOBIND_REBIND) { 680 /* 681 * Called from solisten after doing an sotpi_unbind() or 682 * potentially without the unbind (latter for AF_INET{,6}). 683 */ 684 ASSERT(name == NULL && namelen == 0); 685 686 if (so->so_family == AF_UNIX) { 687 ASSERT(sti->sti_ux_bound_vp); 688 addr = &sti->sti_ux_laddr; 689 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 690 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 691 "addr 0x%p, vp %p\n", 692 addrlen, 693 (void *)((struct so_ux_addr *)addr)->soua_vp, 694 (void *)sti->sti_ux_bound_vp)); 695 } else { 696 addr = sti->sti_laddr_sa; 697 addrlen = (t_uscalar_t)sti->sti_laddr_len; 698 } 699 } else if (flags & _SOBIND_UNSPEC) { 700 ASSERT(name == NULL && namelen == 0); 701 702 /* 703 * The caller checked SS_ISBOUND but not necessarily 704 * under so_lock 705 */ 706 if (so->so_state & SS_ISBOUND) { 707 /* No error */ 708 goto done; 709 } 710 711 /* Set an initial local address */ 712 switch (so->so_family) { 713 case AF_UNIX: 714 /* 715 * Use an address with same size as struct sockaddr 716 * just like BSD. 717 */ 718 sti->sti_laddr_len = 719 (socklen_t)sizeof (struct sockaddr); 720 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 721 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 722 sti->sti_laddr_sa->sa_family = so->so_family; 723 724 /* 725 * Pass down an address with the implicit bind 726 * magic number and the rest all zeros. 727 * The transport will return a unique address. 728 */ 729 sti->sti_ux_laddr.soua_vp = NULL; 730 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 731 addr = &sti->sti_ux_laddr; 732 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 733 break; 734 735 case AF_INET: 736 case AF_INET6: 737 /* 738 * An unspecified bind in TPI has a NULL address. 739 * Set the address in sockfs to have the sa_family. 740 */ 741 sti->sti_laddr_len = (so->so_family == AF_INET) ? 742 (socklen_t)sizeof (sin_t) : 743 (socklen_t)sizeof (sin6_t); 744 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 745 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 746 sti->sti_laddr_sa->sa_family = so->so_family; 747 addr = NULL; 748 addrlen = 0; 749 break; 750 751 default: 752 /* 753 * An unspecified bind in TPI has a NULL address. 754 * Set the address in sockfs to be zero length. 755 * 756 * Can not assume there is a sa_family for all 757 * protocol families. For example, AF_X25 does not 758 * have a family field. 759 */ 760 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 761 sti->sti_laddr_len = 0; /* XXX correct? */ 762 addr = NULL; 763 addrlen = 0; 764 break; 765 } 766 767 } else { 768 if (so->so_state & SS_ISBOUND) { 769 /* 770 * If it is ok to rebind the socket, first unbind 771 * with the transport. A rebind to the NULL address 772 * is interpreted as an unbind. 773 * Note that a bind to NULL in BSD does unbind the 774 * socket but it fails with EINVAL. 775 * Note that regular sockets set SOV_SOCKBSD i.e. 776 * _SOBIND_SOCKBSD gets set here hence no type of 777 * socket does currently allow rebinding. 778 * 779 * If the name is NULL just do an unbind. 780 */ 781 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 782 name != NULL) { 783 error = EINVAL; 784 unbind_on_err = 0; 785 eprintsoline(so, error); 786 goto done; 787 } 788 if ((so->so_mode & SM_CONNREQUIRED) && 789 (so->so_state & SS_CANTREBIND)) { 790 error = EINVAL; 791 unbind_on_err = 0; 792 eprintsoline(so, error); 793 goto done; 794 } 795 error = sotpi_unbind(so, 0); 796 if (error) { 797 eprintsoline(so, error); 798 goto done; 799 } 800 ASSERT(!(so->so_state & SS_ISBOUND)); 801 if (name == NULL) { 802 so->so_state &= 803 ~(SS_ISCONNECTED|SS_ISCONNECTING); 804 goto done; 805 } 806 } 807 808 /* X/Open requires this check */ 809 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 810 if (xnet_check_print) { 811 printf("sockfs: X/Open bind state check " 812 "caused EINVAL\n"); 813 } 814 error = EINVAL; 815 goto done; 816 } 817 818 switch (so->so_family) { 819 case AF_UNIX: 820 /* 821 * All AF_UNIX addresses are nul terminated 822 * when copied (copyin_name) in so the minimum 823 * length is 3 bytes. 824 */ 825 if (name == NULL || 826 (ssize_t)namelen <= sizeof (short) + 1) { 827 error = EISDIR; 828 eprintsoline(so, error); 829 goto done; 830 } 831 /* 832 * Verify so_family matches the bound family. 833 * BSD does not check this for AF_UNIX resulting 834 * in funny mknods. 835 */ 836 if (name->sa_family != so->so_family) { 837 error = EAFNOSUPPORT; 838 goto done; 839 } 840 break; 841 case AF_INET: 842 if (name == NULL) { 843 error = EINVAL; 844 eprintsoline(so, error); 845 goto done; 846 } 847 if ((size_t)namelen != sizeof (sin_t)) { 848 error = name->sa_family != so->so_family ? 849 EAFNOSUPPORT : EINVAL; 850 eprintsoline(so, error); 851 goto done; 852 } 853 if ((flags & _SOBIND_XPG4_2) && 854 (name->sa_family != so->so_family)) { 855 /* 856 * This check has to be made for X/Open 857 * sockets however application failures have 858 * been observed when it is applied to 859 * all sockets. 860 */ 861 error = EAFNOSUPPORT; 862 eprintsoline(so, error); 863 goto done; 864 } 865 /* 866 * Force a zero sa_family to match so_family. 867 * 868 * Some programs like inetd(1M) don't set the 869 * family field. Other programs leave 870 * sin_family set to garbage - SunOS 4.X does 871 * not check the family field on a bind. 872 * We use the family field that 873 * was passed in to the socket() call. 874 */ 875 name->sa_family = so->so_family; 876 break; 877 878 case AF_INET6: { 879 #ifdef DEBUG 880 sin6_t *sin6 = (sin6_t *)name; 881 #endif /* DEBUG */ 882 883 if (name == NULL) { 884 error = EINVAL; 885 eprintsoline(so, error); 886 goto done; 887 } 888 if ((size_t)namelen != sizeof (sin6_t)) { 889 error = name->sa_family != so->so_family ? 890 EAFNOSUPPORT : EINVAL; 891 eprintsoline(so, error); 892 goto done; 893 } 894 if (name->sa_family != so->so_family) { 895 /* 896 * With IPv6 we require the family to match 897 * unlike in IPv4. 898 */ 899 error = EAFNOSUPPORT; 900 eprintsoline(so, error); 901 goto done; 902 } 903 #ifdef DEBUG 904 /* 905 * Verify that apps don't forget to clear 906 * sin6_scope_id etc 907 */ 908 if (sin6->sin6_scope_id != 0 && 909 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 910 zcmn_err(getzoneid(), CE_WARN, 911 "bind with uninitialized sin6_scope_id " 912 "(%d) on socket. Pid = %d\n", 913 (int)sin6->sin6_scope_id, 914 (int)curproc->p_pid); 915 } 916 if (sin6->__sin6_src_id != 0) { 917 zcmn_err(getzoneid(), CE_WARN, 918 "bind with uninitialized __sin6_src_id " 919 "(%d) on socket. Pid = %d\n", 920 (int)sin6->__sin6_src_id, 921 (int)curproc->p_pid); 922 } 923 #endif /* DEBUG */ 924 break; 925 } 926 default: 927 /* 928 * Don't do any length or sa_family check to allow 929 * non-sockaddr style addresses. 930 */ 931 if (name == NULL) { 932 error = EINVAL; 933 eprintsoline(so, error); 934 goto done; 935 } 936 break; 937 } 938 939 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 940 error = ENAMETOOLONG; 941 eprintsoline(so, error); 942 goto done; 943 } 944 /* 945 * Save local address. 946 */ 947 sti->sti_laddr_len = (socklen_t)namelen; 948 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 949 bcopy(name, sti->sti_laddr_sa, namelen); 950 951 addr = sti->sti_laddr_sa; 952 addrlen = (t_uscalar_t)sti->sti_laddr_len; 953 switch (so->so_family) { 954 case AF_INET6: 955 case AF_INET: 956 break; 957 case AF_UNIX: { 958 struct sockaddr_un *soun = 959 (struct sockaddr_un *)sti->sti_laddr_sa; 960 struct vnode *vp, *rvp; 961 struct vattr vattr; 962 963 ASSERT(sti->sti_ux_bound_vp == NULL); 964 /* 965 * Create vnode for the specified path name. 966 * Keep vnode held with a reference in sti_ux_bound_vp. 967 * Use the vnode pointer as the address used in the 968 * bind with the transport. 969 * 970 * Use the same mode as in BSD. In particular this does 971 * not observe the umask. 972 */ 973 /* MAXPATHLEN + soun_family + nul termination */ 974 if (sti->sti_laddr_len > 975 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 976 error = ENAMETOOLONG; 977 eprintsoline(so, error); 978 goto done; 979 } 980 vattr.va_type = VSOCK; 981 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 982 vattr.va_mask = AT_TYPE|AT_MODE; 983 /* NOTE: holding so_lock */ 984 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 985 EXCL, 0, &vp, CRMKNOD, 0, 0); 986 if (error) { 987 if (error == EEXIST) 988 error = EADDRINUSE; 989 eprintsoline(so, error); 990 goto done; 991 } 992 /* 993 * Establish pointer from the underlying filesystem 994 * vnode to the socket node. 995 * sti_ux_bound_vp and v_stream->sd_vnode form the 996 * cross-linkage between the underlying filesystem 997 * node and the socket node. 998 */ 999 1000 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 1001 VN_HOLD(rvp); 1002 VN_RELE(vp); 1003 vp = rvp; 1004 } 1005 1006 ASSERT(SOTOV(so)->v_stream); 1007 mutex_enter(&vp->v_lock); 1008 vp->v_stream = SOTOV(so)->v_stream; 1009 sti->sti_ux_bound_vp = vp; 1010 mutex_exit(&vp->v_lock); 1011 1012 /* 1013 * Use the vnode pointer value as a unique address 1014 * (together with the magic number to avoid conflicts 1015 * with implicit binds) in the transport provider. 1016 */ 1017 sti->sti_ux_laddr.soua_vp = 1018 (void *)sti->sti_ux_bound_vp; 1019 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1020 addr = &sti->sti_ux_laddr; 1021 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1022 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1023 addrlen, 1024 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1025 break; 1026 } 1027 } /* end switch (so->so_family) */ 1028 } 1029 1030 /* 1031 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1032 * the transport can start passing up T_CONN_IND messages 1033 * as soon as it receives the bind req and strsock_proto() 1034 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1035 */ 1036 if (flags & _SOBIND_LISTEN) { 1037 if ((so->so_state & SS_ACCEPTCONN) == 0) 1038 clear_acceptconn_on_err = B_TRUE; 1039 save_so_backlog = so->so_backlog; 1040 restore_backlog_on_err = B_TRUE; 1041 so->so_state |= SS_ACCEPTCONN; 1042 so->so_backlog = backlog; 1043 } 1044 1045 /* 1046 * If NL7C addr(s) have been configured check for addr/port match, 1047 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1048 * 1049 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1050 * family sockets only. If match mark as such. 1051 */ 1052 if (nl7c_enabled && ((addr != NULL && 1053 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1054 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1055 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1056 /* 1057 * NL7C is not supported in non-global zones, 1058 * we enforce this restriction here. 1059 */ 1060 if (so->so_zoneid == GLOBAL_ZONEID) { 1061 /* An NL7C socket, mark it */ 1062 sti->sti_nl7c_flags |= NL7C_ENABLED; 1063 if (nl7c == NULL) { 1064 /* 1065 * Was an AF_NCA bind() so add it to the 1066 * addr list for reporting purposes. 1067 */ 1068 nl7c = nl7c_add_addr(addr, addrlen); 1069 } 1070 } else 1071 nl7c = NULL; 1072 } 1073 1074 /* 1075 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1076 * for other transports we will send in a O_T_BIND_REQ. 1077 */ 1078 if (tcp_udp_xport && 1079 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1080 PRIM_type = T_BIND_REQ; 1081 1082 bind_req.PRIM_type = PRIM_type; 1083 bind_req.ADDR_length = addrlen; 1084 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1085 bind_req.CONIND_number = backlog; 1086 /* NOTE: holding so_lock while sleeping */ 1087 mp = soallocproto2(&bind_req, sizeof (bind_req), 1088 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1089 sti->sti_laddr_valid = 0; 1090 1091 /* Done using sti_laddr_sa - can drop the lock */ 1092 mutex_exit(&so->so_lock); 1093 1094 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1095 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1096 if (error) { 1097 eprintsoline(so, error); 1098 mutex_enter(&so->so_lock); 1099 goto done; 1100 } 1101 1102 mutex_enter(&so->so_lock); 1103 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1104 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1105 if (error) { 1106 eprintsoline(so, error); 1107 goto done; 1108 } 1109 ASSERT(mp); 1110 /* 1111 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1112 * strsock_proto while the lock was dropped above, the bind 1113 * is allowed to complete. 1114 */ 1115 1116 /* Mark as bound. This will be undone if we detect errors below. */ 1117 if (flags & _SOBIND_NOXLATE) { 1118 ASSERT(so->so_family == AF_UNIX); 1119 sti->sti_faddr_noxlate = 1; 1120 } 1121 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1122 so->so_state |= SS_ISBOUND; 1123 ASSERT(sti->sti_unbind_mp); 1124 1125 /* note that we've already set SS_ACCEPTCONN above */ 1126 1127 /* 1128 * Recompute addrlen - an unspecied bind sent down an 1129 * address of length zero but we expect the appropriate length 1130 * in return. 1131 */ 1132 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1133 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1134 1135 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1136 /* 1137 * The alignment restriction is really too strict but 1138 * we want enough alignment to inspect the fields of 1139 * a sockaddr_in. 1140 */ 1141 addr = sogetoff(mp, bind_ack->ADDR_offset, 1142 bind_ack->ADDR_length, 1143 __TPI_ALIGN_SIZE); 1144 if (addr == NULL) { 1145 freemsg(mp); 1146 error = EPROTO; 1147 eprintsoline(so, error); 1148 goto done; 1149 } 1150 if (!(flags & _SOBIND_UNSPEC)) { 1151 /* 1152 * Verify that the transport didn't return something we 1153 * did not want e.g. an address other than what we asked for. 1154 * 1155 * NOTE: These checks would go away if/when we switch to 1156 * using the new TPI (in which the transport would fail 1157 * the request instead of assigning a different address). 1158 * 1159 * NOTE2: For protocols that we don't know (i.e. any 1160 * other than AF_INET6, AF_INET and AF_UNIX), we 1161 * cannot know if the transport should be expected to 1162 * return the same address as that requested. 1163 * 1164 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1165 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1166 * 1167 * For example, in the case of netatalk it may be 1168 * inappropriate for the transport to return the 1169 * requested address (as it may have allocated a local 1170 * port number in behaviour similar to that of an 1171 * AF_INET bind request with a port number of zero). 1172 * 1173 * Given the definition of O_T_BIND_REQ, where the 1174 * transport may bind to an address other than the 1175 * requested address, it's not possible to determine 1176 * whether a returned address that differs from the 1177 * requested address is a reason to fail (because the 1178 * requested address was not available) or succeed 1179 * (because the transport allocated an appropriate 1180 * address and/or port). 1181 * 1182 * sockfs currently requires that the transport return 1183 * the requested address in the T_BIND_ACK, unless 1184 * there is code here to allow for any discrepancy. 1185 * Such code exists for AF_INET and AF_INET6. 1186 * 1187 * Netatalk chooses to return the requested address 1188 * rather than the (correct) allocated address. This 1189 * means that netatalk violates the TPI specification 1190 * (and would not function correctly if used from a 1191 * TLI application), but it does mean that it works 1192 * with sockfs. 1193 * 1194 * As noted above, using the newer XTI bind primitive 1195 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1196 * allow sockfs to be more sure about whether or not 1197 * the bind request had succeeded (as transports are 1198 * not permitted to bind to a different address than 1199 * that requested - they must return failure). 1200 * Unfortunately, support for T_BIND_REQ may not be 1201 * present in all transport implementations (netatalk, 1202 * for example, doesn't have it), making the 1203 * transition difficult. 1204 */ 1205 if (bind_ack->ADDR_length != addrlen) { 1206 /* Assumes that the requested address was in use */ 1207 freemsg(mp); 1208 error = EADDRINUSE; 1209 eprintsoline(so, error); 1210 goto done; 1211 } 1212 1213 switch (so->so_family) { 1214 case AF_INET6: 1215 case AF_INET: { 1216 sin_t *rname, *aname; 1217 1218 rname = (sin_t *)addr; 1219 aname = (sin_t *)sti->sti_laddr_sa; 1220 1221 /* 1222 * Take advantage of the alignment 1223 * of sin_port and sin6_port which fall 1224 * in the same place in their data structures. 1225 * Just use sin_port for either address family. 1226 * 1227 * This may become a problem if (heaven forbid) 1228 * there's a separate ipv6port_reserved... :-P 1229 * 1230 * Binding to port 0 has the semantics of letting 1231 * the transport bind to any port. 1232 * 1233 * If the transport is TCP or UDP since we had sent 1234 * a T_BIND_REQ we would not get a port other than 1235 * what we asked for. 1236 */ 1237 if (tcp_udp_xport) { 1238 /* 1239 * Pick up the new port number if we bound to 1240 * port 0. 1241 */ 1242 if (aname->sin_port == 0) 1243 aname->sin_port = rname->sin_port; 1244 sti->sti_laddr_valid = 1; 1245 break; 1246 } 1247 if (aname->sin_port != 0 && 1248 aname->sin_port != rname->sin_port) { 1249 freemsg(mp); 1250 error = EADDRINUSE; 1251 eprintsoline(so, error); 1252 goto done; 1253 } 1254 /* 1255 * Pick up the new port number if we bound to port 0. 1256 */ 1257 aname->sin_port = rname->sin_port; 1258 1259 /* 1260 * Unfortunately, addresses aren't _quite_ the same. 1261 */ 1262 if (so->so_family == AF_INET) { 1263 if (aname->sin_addr.s_addr != 1264 rname->sin_addr.s_addr) { 1265 freemsg(mp); 1266 error = EADDRNOTAVAIL; 1267 eprintsoline(so, error); 1268 goto done; 1269 } 1270 } else { 1271 sin6_t *rname6 = (sin6_t *)rname; 1272 sin6_t *aname6 = (sin6_t *)aname; 1273 1274 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1275 &rname6->sin6_addr)) { 1276 freemsg(mp); 1277 error = EADDRNOTAVAIL; 1278 eprintsoline(so, error); 1279 goto done; 1280 } 1281 } 1282 break; 1283 } 1284 case AF_UNIX: 1285 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1286 freemsg(mp); 1287 error = EADDRINUSE; 1288 eprintsoline(so, error); 1289 eprintso(so, 1290 ("addrlen %d, addr 0x%x, vp %p\n", 1291 addrlen, *((int *)addr), 1292 (void *)sti->sti_ux_bound_vp)); 1293 goto done; 1294 } 1295 sti->sti_laddr_valid = 1; 1296 break; 1297 default: 1298 /* 1299 * NOTE: This assumes that addresses can be 1300 * byte-compared for equivalence. 1301 */ 1302 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1303 freemsg(mp); 1304 error = EADDRINUSE; 1305 eprintsoline(so, error); 1306 goto done; 1307 } 1308 /* 1309 * Don't mark sti_laddr_valid, as we cannot be 1310 * sure that the returned address is the real 1311 * bound address when talking to an unknown 1312 * transport. 1313 */ 1314 break; 1315 } 1316 } else { 1317 /* 1318 * Save for returned address for getsockname. 1319 * Needed for unspecific bind unless transport supports 1320 * the TI_GETMYNAME ioctl. 1321 * Do this for AF_INET{,6} even though they do, as 1322 * caching info here is much better performance than 1323 * a TPI/STREAMS trip to the transport for getsockname. 1324 * Any which can't for some reason _must_ _not_ set 1325 * sti_laddr_valid here for the caching version of 1326 * getsockname to not break; 1327 */ 1328 switch (so->so_family) { 1329 case AF_UNIX: 1330 /* 1331 * Record the address bound with the transport 1332 * for use by socketpair. 1333 */ 1334 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1335 sti->sti_laddr_valid = 1; 1336 break; 1337 case AF_INET: 1338 case AF_INET6: 1339 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1340 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1341 sti->sti_laddr_valid = 1; 1342 break; 1343 default: 1344 /* 1345 * Don't mark sti_laddr_valid, as we cannot be 1346 * sure that the returned address is the real 1347 * bound address when talking to an unknown 1348 * transport. 1349 */ 1350 break; 1351 } 1352 } 1353 1354 if (nl7c != NULL) { 1355 /* Register listen()er sonode pointer with NL7C */ 1356 nl7c_listener_addr(nl7c, so); 1357 } 1358 1359 freemsg(mp); 1360 1361 done: 1362 if (error) { 1363 /* reset state & backlog to values held on entry */ 1364 if (clear_acceptconn_on_err == B_TRUE) 1365 so->so_state &= ~SS_ACCEPTCONN; 1366 if (restore_backlog_on_err == B_TRUE) 1367 so->so_backlog = save_so_backlog; 1368 1369 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1370 int err; 1371 1372 err = sotpi_unbind(so, 0); 1373 /* LINTED - statement has no consequent: if */ 1374 if (err) { 1375 eprintsoline(so, error); 1376 } else { 1377 ASSERT(!(so->so_state & SS_ISBOUND)); 1378 } 1379 } 1380 } 1381 if (!(flags & _SOBIND_LOCK_HELD)) { 1382 so_unlock_single(so, SOLOCKED); 1383 mutex_exit(&so->so_lock); 1384 } else { 1385 ASSERT(MUTEX_HELD(&so->so_lock)); 1386 ASSERT(so->so_flag & SOLOCKED); 1387 } 1388 return (error); 1389 } 1390 1391 /* bind the socket */ 1392 static int 1393 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1394 int flags, struct cred *cr) 1395 { 1396 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1397 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1398 1399 flags &= ~_SOBIND_SOCKETPAIR; 1400 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1401 } 1402 1403 /* 1404 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1405 * address, or when listen needs to unbind and bind. 1406 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1407 * so that a sobind can pick them up. 1408 */ 1409 static int 1410 sotpi_unbind(struct sonode *so, int flags) 1411 { 1412 struct T_unbind_req unbind_req; 1413 int error = 0; 1414 mblk_t *mp; 1415 sotpi_info_t *sti = SOTOTPI(so); 1416 1417 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1418 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1419 1420 ASSERT(MUTEX_HELD(&so->so_lock)); 1421 ASSERT(so->so_flag & SOLOCKED); 1422 1423 if (!(so->so_state & SS_ISBOUND)) { 1424 error = EINVAL; 1425 eprintsoline(so, error); 1426 goto done; 1427 } 1428 1429 mutex_exit(&so->so_lock); 1430 1431 /* 1432 * Flush the read and write side (except stream head read queue) 1433 * and send down T_UNBIND_REQ. 1434 */ 1435 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1436 1437 unbind_req.PRIM_type = T_UNBIND_REQ; 1438 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1439 0, _ALLOC_SLEEP, CRED()); 1440 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1441 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1442 mutex_enter(&so->so_lock); 1443 if (error) { 1444 eprintsoline(so, error); 1445 goto done; 1446 } 1447 1448 error = sowaitokack(so, T_UNBIND_REQ); 1449 if (error) { 1450 eprintsoline(so, error); 1451 goto done; 1452 } 1453 1454 /* 1455 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1456 * strsock_proto while the lock was dropped above, the unbind 1457 * is allowed to complete. 1458 */ 1459 if (!(flags & _SOUNBIND_REBIND)) { 1460 /* 1461 * Clear out bound address. 1462 */ 1463 vnode_t *vp; 1464 1465 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1466 sti->sti_ux_bound_vp = NULL; 1467 vn_rele_stream(vp); 1468 } 1469 /* Clear out address */ 1470 sti->sti_laddr_len = 0; 1471 } 1472 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1473 sti->sti_laddr_valid = 0; 1474 1475 done: 1476 1477 /* If the caller held the lock don't release it here */ 1478 ASSERT(MUTEX_HELD(&so->so_lock)); 1479 ASSERT(so->so_flag & SOLOCKED); 1480 1481 return (error); 1482 } 1483 1484 /* 1485 * listen on the socket. 1486 * For TPI conforming transports this has to first unbind with the transport 1487 * and then bind again using the new backlog. 1488 */ 1489 /* ARGSUSED */ 1490 int 1491 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1492 { 1493 int error = 0; 1494 sotpi_info_t *sti = SOTOTPI(so); 1495 1496 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1497 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1498 1499 if (sti->sti_serv_type == T_CLTS) 1500 return (EOPNOTSUPP); 1501 1502 /* 1503 * If the socket is ready to accept connections already, then 1504 * return without doing anything. This avoids a problem where 1505 * a second listen() call fails if a connection is pending and 1506 * leaves the socket unbound. Only when we are not unbinding 1507 * with the transport can we safely increase the backlog. 1508 */ 1509 if (so->so_state & SS_ACCEPTCONN && 1510 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1511 /*CONSTCOND*/ 1512 !solisten_tpi_tcp)) 1513 return (0); 1514 1515 if (so->so_state & SS_ISCONNECTED) 1516 return (EINVAL); 1517 1518 mutex_enter(&so->so_lock); 1519 so_lock_single(so); /* Set SOLOCKED */ 1520 1521 /* 1522 * If the listen doesn't change the backlog we do nothing. 1523 * This avoids an EPROTO error from the transport. 1524 */ 1525 if ((so->so_state & SS_ACCEPTCONN) && 1526 so->so_backlog == backlog) 1527 goto done; 1528 1529 if (!(so->so_state & SS_ISBOUND)) { 1530 /* 1531 * Must have been explicitly bound in the UNIX domain. 1532 */ 1533 if (so->so_family == AF_UNIX) { 1534 error = EINVAL; 1535 goto done; 1536 } 1537 error = sotpi_bindlisten(so, NULL, 0, backlog, 1538 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1539 } else if (backlog > 0) { 1540 /* 1541 * AF_INET{,6} hack to avoid losing the port. 1542 * Assumes that all AF_INET{,6} transports can handle a 1543 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1544 * has already bound thus it is possible to avoid the unbind. 1545 */ 1546 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1547 /*CONSTCOND*/ 1548 !solisten_tpi_tcp)) { 1549 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1550 if (error) 1551 goto done; 1552 } 1553 error = sotpi_bindlisten(so, NULL, 0, backlog, 1554 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1555 } else { 1556 so->so_state |= SS_ACCEPTCONN; 1557 so->so_backlog = backlog; 1558 } 1559 if (error) 1560 goto done; 1561 ASSERT(so->so_state & SS_ACCEPTCONN); 1562 done: 1563 so_unlock_single(so, SOLOCKED); 1564 mutex_exit(&so->so_lock); 1565 return (error); 1566 } 1567 1568 /* 1569 * Disconnect either a specified seqno or all (-1). 1570 * The former is used on listening sockets only. 1571 * 1572 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1573 * the current use of sodisconnect(seqno == -1) is only for shutdown 1574 * so there is no point (and potentially incorrect) to unbind. 1575 */ 1576 static int 1577 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1578 { 1579 struct T_discon_req discon_req; 1580 int error = 0; 1581 mblk_t *mp; 1582 1583 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1584 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1585 1586 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1587 mutex_enter(&so->so_lock); 1588 so_lock_single(so); /* Set SOLOCKED */ 1589 } else { 1590 ASSERT(MUTEX_HELD(&so->so_lock)); 1591 ASSERT(so->so_flag & SOLOCKED); 1592 } 1593 1594 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1595 error = EINVAL; 1596 eprintsoline(so, error); 1597 goto done; 1598 } 1599 1600 mutex_exit(&so->so_lock); 1601 /* 1602 * Flush the write side (unless this is a listener) 1603 * and then send down a T_DISCON_REQ. 1604 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1605 * and other messages.) 1606 */ 1607 if (!(so->so_state & SS_ACCEPTCONN)) 1608 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1609 1610 discon_req.PRIM_type = T_DISCON_REQ; 1611 discon_req.SEQ_number = seqno; 1612 mp = soallocproto1(&discon_req, sizeof (discon_req), 1613 0, _ALLOC_SLEEP, CRED()); 1614 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1615 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1616 mutex_enter(&so->so_lock); 1617 if (error) { 1618 eprintsoline(so, error); 1619 goto done; 1620 } 1621 1622 error = sowaitokack(so, T_DISCON_REQ); 1623 if (error) { 1624 eprintsoline(so, error); 1625 goto done; 1626 } 1627 /* 1628 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1629 * strsock_proto while the lock was dropped above, the disconnect 1630 * is allowed to complete. However, it is not possible to 1631 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1632 */ 1633 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1634 SOTOTPI(so)->sti_laddr_valid = 0; 1635 SOTOTPI(so)->sti_faddr_valid = 0; 1636 done: 1637 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1638 so_unlock_single(so, SOLOCKED); 1639 mutex_exit(&so->so_lock); 1640 } else { 1641 /* If the caller held the lock don't release it here */ 1642 ASSERT(MUTEX_HELD(&so->so_lock)); 1643 ASSERT(so->so_flag & SOLOCKED); 1644 } 1645 return (error); 1646 } 1647 1648 /* ARGSUSED */ 1649 int 1650 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1651 struct sonode **nsop) 1652 { 1653 struct T_conn_ind *conn_ind; 1654 struct T_conn_res *conn_res; 1655 int error = 0; 1656 mblk_t *mp, *ack_mp; 1657 struct sonode *nso; 1658 vnode_t *nvp; 1659 void *src; 1660 t_uscalar_t srclen; 1661 void *opt; 1662 t_uscalar_t optlen; 1663 t_scalar_t PRIM_type; 1664 t_scalar_t SEQ_number; 1665 size_t sinlen; 1666 sotpi_info_t *sti = SOTOTPI(so); 1667 sotpi_info_t *nsti; 1668 1669 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1670 (void *)so, fflag, (void *)nsop, 1671 pr_state(so->so_state, so->so_mode))); 1672 1673 /* 1674 * Defer single-threading the accepting socket until 1675 * the T_CONN_IND has been received and parsed and the 1676 * new sonode has been opened. 1677 */ 1678 1679 /* Check that we are not already connected */ 1680 if ((so->so_state & SS_ACCEPTCONN) == 0) 1681 goto conn_bad; 1682 again: 1683 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1684 goto e_bad; 1685 1686 ASSERT(mp != NULL); 1687 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1688 1689 /* 1690 * Save SEQ_number for error paths. 1691 */ 1692 SEQ_number = conn_ind->SEQ_number; 1693 1694 srclen = conn_ind->SRC_length; 1695 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1696 if (src == NULL) { 1697 error = EPROTO; 1698 freemsg(mp); 1699 eprintsoline(so, error); 1700 goto disconnect_unlocked; 1701 } 1702 optlen = conn_ind->OPT_length; 1703 switch (so->so_family) { 1704 case AF_INET: 1705 case AF_INET6: 1706 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1707 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1708 &opt, conn_ind->OPT_length); 1709 } else { 1710 /* 1711 * The transport (in this case TCP) hasn't sent up 1712 * a pointer to an instance for the accept fast-path. 1713 * Disable fast-path completely because the call to 1714 * sotpi_create() below would otherwise create an 1715 * incomplete TCP instance, which would lead to 1716 * problems when sockfs sends a normal T_CONN_RES 1717 * message down the new stream. 1718 */ 1719 if (sti->sti_direct) { 1720 int rval; 1721 /* 1722 * For consistency we inform tcp to disable 1723 * direct interface on the listener, though 1724 * we can certainly live without doing this 1725 * because no data will ever travel upstream 1726 * on the listening socket. 1727 */ 1728 sti->sti_direct = 0; 1729 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1730 0, 0, K_TO_K, cr, &rval); 1731 } 1732 opt = NULL; 1733 optlen = 0; 1734 } 1735 break; 1736 case AF_UNIX: 1737 default: 1738 if (optlen != 0) { 1739 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1740 __TPI_ALIGN_SIZE); 1741 if (opt == NULL) { 1742 error = EPROTO; 1743 freemsg(mp); 1744 eprintsoline(so, error); 1745 goto disconnect_unlocked; 1746 } 1747 } 1748 if (so->so_family == AF_UNIX) { 1749 if (!sti->sti_faddr_noxlate) { 1750 src = NULL; 1751 srclen = 0; 1752 } 1753 /* Extract src address from options */ 1754 if (optlen != 0) 1755 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1756 } 1757 break; 1758 } 1759 1760 /* 1761 * Create the new socket. 1762 */ 1763 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1764 if (nso == NULL) { 1765 ASSERT(error != 0); 1766 /* 1767 * Accept can not fail with ENOBUFS. sotpi_create 1768 * sleeps waiting for memory until a signal is caught 1769 * so return EINTR. 1770 */ 1771 freemsg(mp); 1772 if (error == ENOBUFS) 1773 error = EINTR; 1774 goto e_disc_unl; 1775 } 1776 nvp = SOTOV(nso); 1777 nsti = SOTOTPI(nso); 1778 1779 #ifdef DEBUG 1780 /* 1781 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1782 * it's inherited early to allow debugging of the accept code itself. 1783 */ 1784 nso->so_options |= so->so_options & SO_DEBUG; 1785 #endif /* DEBUG */ 1786 1787 /* 1788 * Save the SRC address from the T_CONN_IND 1789 * for getpeername to work on AF_UNIX and on transports that do not 1790 * support TI_GETPEERNAME. 1791 * 1792 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1793 * copyin_name(). 1794 */ 1795 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1796 error = EINVAL; 1797 freemsg(mp); 1798 eprintsoline(so, error); 1799 goto disconnect_vp_unlocked; 1800 } 1801 nsti->sti_faddr_len = (socklen_t)srclen; 1802 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1803 bcopy(src, nsti->sti_faddr_sa, srclen); 1804 nsti->sti_faddr_valid = 1; 1805 1806 /* 1807 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1808 */ 1809 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1810 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1811 cred_t *cr; 1812 pid_t cpid; 1813 1814 cr = msg_getcred(mp, &cpid); 1815 if (cr != NULL) { 1816 crhold(cr); 1817 nso->so_peercred = cr; 1818 nso->so_cpid = cpid; 1819 } 1820 freemsg(mp); 1821 1822 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1823 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1824 if (mp == NULL) { 1825 /* 1826 * Accept can not fail with ENOBUFS. 1827 * A signal was caught so return EINTR. 1828 */ 1829 error = EINTR; 1830 eprintsoline(so, error); 1831 goto disconnect_vp_unlocked; 1832 } 1833 conn_res = (struct T_conn_res *)mp->b_rptr; 1834 } else { 1835 /* 1836 * For efficency reasons we use msg_extractcred; no crhold 1837 * needed since db_credp is cleared (i.e., we move the cred 1838 * from the message to so_peercred. 1839 */ 1840 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1841 1842 mp->b_rptr = DB_BASE(mp); 1843 conn_res = (struct T_conn_res *)mp->b_rptr; 1844 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1845 1846 mblk_setcred(mp, cr, curproc->p_pid); 1847 } 1848 1849 /* 1850 * New socket must be bound at least in sockfs and, except for AF_INET, 1851 * (or AF_INET6) it also has to be bound in the transport provider. 1852 * We set the local address in the sonode from the T_OK_ACK of the 1853 * T_CONN_RES. For this reason the address we bind to here isn't 1854 * important. 1855 */ 1856 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1857 /*CONSTCOND*/ 1858 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1859 /* 1860 * Optimization for AF_INET{,6} transports 1861 * that can handle a T_CONN_RES without being bound. 1862 */ 1863 mutex_enter(&nso->so_lock); 1864 so_automatic_bind(nso); 1865 mutex_exit(&nso->so_lock); 1866 } else { 1867 /* Perform NULL bind with the transport provider. */ 1868 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1869 cr)) != 0) { 1870 ASSERT(error != ENOBUFS); 1871 freemsg(mp); 1872 eprintsoline(nso, error); 1873 goto disconnect_vp_unlocked; 1874 } 1875 } 1876 1877 /* 1878 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1879 * so that any data arriving on the new socket will cause the 1880 * appropriate signals to be delivered for the new socket. 1881 * 1882 * No other thread (except strsock_proto and strsock_misc) 1883 * can access the new socket thus we relax the locking. 1884 */ 1885 nso->so_pgrp = so->so_pgrp; 1886 nso->so_state |= so->so_state & SS_ASYNC; 1887 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1888 1889 if (nso->so_pgrp != 0) { 1890 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1891 eprintsoline(nso, error); 1892 error = 0; 1893 nso->so_pgrp = 0; 1894 } 1895 } 1896 1897 /* 1898 * Make note of the socket level options. TCP and IP level options 1899 * are already inherited. We could do all this after accept is 1900 * successful but doing it here simplifies code and no harm done 1901 * for error case. 1902 */ 1903 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1904 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1905 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1906 nso->so_sndbuf = so->so_sndbuf; 1907 nso->so_rcvbuf = so->so_rcvbuf; 1908 if (nso->so_options & SO_LINGER) 1909 nso->so_linger = so->so_linger; 1910 1911 /* 1912 * Note that the following sti_direct code path should be 1913 * removed once we are confident that the direct sockets 1914 * do not result in any degradation. 1915 */ 1916 if (sti->sti_direct) { 1917 1918 ASSERT(opt != NULL); 1919 1920 conn_res->OPT_length = optlen; 1921 conn_res->OPT_offset = MBLKL(mp); 1922 bcopy(&opt, mp->b_wptr, optlen); 1923 mp->b_wptr += optlen; 1924 conn_res->PRIM_type = T_CONN_RES; 1925 conn_res->ACCEPTOR_id = 0; 1926 PRIM_type = T_CONN_RES; 1927 1928 /* Send down the T_CONN_RES on acceptor STREAM */ 1929 error = kstrputmsg(SOTOV(nso), mp, NULL, 1930 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1931 if (error) { 1932 mutex_enter(&so->so_lock); 1933 so_lock_single(so); 1934 eprintsoline(so, error); 1935 goto disconnect_vp; 1936 } 1937 mutex_enter(&nso->so_lock); 1938 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1939 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1940 if (error) { 1941 mutex_exit(&nso->so_lock); 1942 mutex_enter(&so->so_lock); 1943 so_lock_single(so); 1944 eprintsoline(so, error); 1945 goto disconnect_vp; 1946 } 1947 if (nso->so_family == AF_INET) { 1948 sin_t *sin; 1949 1950 sin = (sin_t *)(ack_mp->b_rptr + 1951 sizeof (struct T_ok_ack)); 1952 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1953 nsti->sti_laddr_len = sizeof (sin_t); 1954 } else { 1955 sin6_t *sin6; 1956 1957 sin6 = (sin6_t *)(ack_mp->b_rptr + 1958 sizeof (struct T_ok_ack)); 1959 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1960 nsti->sti_laddr_len = sizeof (sin6_t); 1961 } 1962 freemsg(ack_mp); 1963 1964 nso->so_state |= SS_ISCONNECTED; 1965 nso->so_proto_handle = (sock_lower_handle_t)opt; 1966 nsti->sti_laddr_valid = 1; 1967 1968 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 1969 /* 1970 * A NL7C marked listen()er so the new socket 1971 * inherits the listen()er's NL7C state, except 1972 * for NL7C_POLLIN. 1973 * 1974 * Only call NL7C to process the new socket if 1975 * the listen socket allows blocking i/o. 1976 */ 1977 nsti->sti_nl7c_flags = 1978 sti->sti_nl7c_flags & (~NL7C_POLLIN); 1979 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1980 /* 1981 * Nonblocking accept() just make it 1982 * persist to defer processing to the 1983 * read-side syscall (e.g. read). 1984 */ 1985 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 1986 } else if (nl7c_process(nso, B_FALSE)) { 1987 /* 1988 * NL7C has completed processing on the 1989 * socket, close the socket and back to 1990 * the top to await the next T_CONN_IND. 1991 */ 1992 mutex_exit(&nso->so_lock); 1993 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1994 cr, NULL); 1995 VN_RELE(nvp); 1996 goto again; 1997 } 1998 /* Pass the new socket out */ 1999 } 2000 2001 mutex_exit(&nso->so_lock); 2002 2003 /* 2004 * It's possible, through the use of autopush for example, 2005 * that the acceptor stream may not support sti_direct 2006 * semantics. If the new socket does not support sti_direct 2007 * we issue a _SIOCSOCKFALLBACK to inform the transport 2008 * as we would in the I_PUSH case. 2009 */ 2010 if (nsti->sti_direct == 0) { 2011 int rval; 2012 2013 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2014 0, 0, K_TO_K, cr, &rval)) != 0) { 2015 mutex_enter(&so->so_lock); 2016 so_lock_single(so); 2017 eprintsoline(so, error); 2018 goto disconnect_vp; 2019 } 2020 } 2021 2022 /* 2023 * Pass out new socket. 2024 */ 2025 if (nsop != NULL) 2026 *nsop = nso; 2027 2028 return (0); 2029 } 2030 2031 /* 2032 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2033 * which don't support the FireEngine accept fast-path. It is also 2034 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2035 * again. Neither sockfs nor TCP attempt to find out if some other 2036 * random module has been inserted in between (in which case we 2037 * should follow TLI accept behaviour). We blindly assume the worst 2038 * case and revert back to old behaviour i.e. TCP will not send us 2039 * any option (eager) and the accept should happen on the listener 2040 * queue. Any queued T_conn_ind have already got their options removed 2041 * by so_sock2_stream() when "sockmod" was I_POP'd. 2042 */ 2043 /* 2044 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2045 */ 2046 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2047 #ifdef _ILP32 2048 queue_t *q; 2049 2050 /* 2051 * Find read queue in driver 2052 * Can safely do this since we "own" nso/nvp. 2053 */ 2054 q = strvp2wq(nvp)->q_next; 2055 while (SAMESTR(q)) 2056 q = q->q_next; 2057 q = RD(q); 2058 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2059 #else 2060 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2061 #endif /* _ILP32 */ 2062 conn_res->PRIM_type = O_T_CONN_RES; 2063 PRIM_type = O_T_CONN_RES; 2064 } else { 2065 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2066 conn_res->PRIM_type = T_CONN_RES; 2067 PRIM_type = T_CONN_RES; 2068 } 2069 conn_res->SEQ_number = SEQ_number; 2070 conn_res->OPT_length = 0; 2071 conn_res->OPT_offset = 0; 2072 2073 mutex_enter(&so->so_lock); 2074 so_lock_single(so); /* Set SOLOCKED */ 2075 mutex_exit(&so->so_lock); 2076 2077 error = kstrputmsg(SOTOV(so), mp, NULL, 2078 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2079 mutex_enter(&so->so_lock); 2080 if (error) { 2081 eprintsoline(so, error); 2082 goto disconnect_vp; 2083 } 2084 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2085 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2086 if (error) { 2087 eprintsoline(so, error); 2088 goto disconnect_vp; 2089 } 2090 mutex_exit(&so->so_lock); 2091 /* 2092 * If there is a sin/sin6 appended onto the T_OK_ACK use 2093 * that to set the local address. If this is not present 2094 * then we zero out the address and don't set the 2095 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2096 * the pathname from the listening socket. 2097 * In the case where this is TCP or an AF_UNIX socket the 2098 * client side may have queued data or a T_ORDREL in the 2099 * transport. Having now sent the T_CONN_RES we may receive 2100 * those queued messages at any time. Hold the acceptor 2101 * so_lock until its state and laddr are finalized. 2102 */ 2103 mutex_enter(&nso->so_lock); 2104 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2105 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2106 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2107 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2108 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2109 nsti->sti_laddr_len = sinlen; 2110 nsti->sti_laddr_valid = 1; 2111 } else if (nso->so_family == AF_UNIX) { 2112 ASSERT(so->so_family == AF_UNIX); 2113 nsti->sti_laddr_len = sti->sti_laddr_len; 2114 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2115 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2116 nsti->sti_laddr_len); 2117 nsti->sti_laddr_valid = 1; 2118 } else { 2119 nsti->sti_laddr_len = sti->sti_laddr_len; 2120 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2121 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2122 nsti->sti_laddr_sa->sa_family = nso->so_family; 2123 } 2124 nso->so_state |= SS_ISCONNECTED; 2125 mutex_exit(&nso->so_lock); 2126 2127 freemsg(ack_mp); 2128 2129 mutex_enter(&so->so_lock); 2130 so_unlock_single(so, SOLOCKED); 2131 mutex_exit(&so->so_lock); 2132 2133 /* 2134 * Pass out new socket. 2135 */ 2136 if (nsop != NULL) 2137 *nsop = nso; 2138 2139 return (0); 2140 2141 2142 eproto_disc_unl: 2143 error = EPROTO; 2144 e_disc_unl: 2145 eprintsoline(so, error); 2146 goto disconnect_unlocked; 2147 2148 pr_disc_vp_unl: 2149 eprintsoline(so, error); 2150 disconnect_vp_unlocked: 2151 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2152 VN_RELE(nvp); 2153 disconnect_unlocked: 2154 (void) sodisconnect(so, SEQ_number, 0); 2155 return (error); 2156 2157 pr_disc_vp: 2158 eprintsoline(so, error); 2159 disconnect_vp: 2160 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2161 so_unlock_single(so, SOLOCKED); 2162 mutex_exit(&so->so_lock); 2163 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2164 VN_RELE(nvp); 2165 return (error); 2166 2167 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2168 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2169 ? EOPNOTSUPP : EINVAL; 2170 e_bad: 2171 eprintsoline(so, error); 2172 return (error); 2173 } 2174 2175 /* 2176 * connect a socket. 2177 * 2178 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2179 * unconnect (by specifying a null address). 2180 */ 2181 int 2182 sotpi_connect(struct sonode *so, 2183 struct sockaddr *name, 2184 socklen_t namelen, 2185 int fflag, 2186 int flags, 2187 struct cred *cr) 2188 { 2189 struct T_conn_req conn_req; 2190 int error = 0; 2191 mblk_t *mp; 2192 void *src; 2193 socklen_t srclen; 2194 void *addr; 2195 socklen_t addrlen; 2196 boolean_t need_unlock; 2197 sotpi_info_t *sti = SOTOTPI(so); 2198 2199 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2200 (void *)so, (void *)name, namelen, fflag, flags, 2201 pr_state(so->so_state, so->so_mode))); 2202 2203 /* 2204 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2205 * avoid sleeping for memory with SOLOCKED held. 2206 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2207 * + sizeof (struct T_opthdr). 2208 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2209 * exceed sti_faddr_maxlen). 2210 */ 2211 mp = soallocproto(sizeof (struct T_conn_req) + 2212 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2213 cr); 2214 if (mp == NULL) { 2215 /* 2216 * Connect can not fail with ENOBUFS. A signal was 2217 * caught so return EINTR. 2218 */ 2219 error = EINTR; 2220 eprintsoline(so, error); 2221 return (error); 2222 } 2223 2224 mutex_enter(&so->so_lock); 2225 /* 2226 * Make sure there is a preallocated T_unbind_req message 2227 * before any binding. This message is allocated when the 2228 * socket is created. Since another thread can consume 2229 * so_unbind_mp by the time we return from so_lock_single(), 2230 * we should check the availability of so_unbind_mp after 2231 * we return from so_lock_single(). 2232 */ 2233 2234 so_lock_single(so); /* Set SOLOCKED */ 2235 need_unlock = B_TRUE; 2236 2237 if (sti->sti_unbind_mp == NULL) { 2238 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2239 /* NOTE: holding so_lock while sleeping */ 2240 sti->sti_unbind_mp = 2241 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2242 if (sti->sti_unbind_mp == NULL) { 2243 error = EINTR; 2244 goto done; 2245 } 2246 } 2247 2248 /* 2249 * Can't have done a listen before connecting. 2250 */ 2251 if (so->so_state & SS_ACCEPTCONN) { 2252 error = EOPNOTSUPP; 2253 goto done; 2254 } 2255 2256 /* 2257 * Must be bound with the transport 2258 */ 2259 if (!(so->so_state & SS_ISBOUND)) { 2260 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2261 /*CONSTCOND*/ 2262 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2263 /* 2264 * Optimization for AF_INET{,6} transports 2265 * that can handle a T_CONN_REQ without being bound. 2266 */ 2267 so_automatic_bind(so); 2268 } else { 2269 error = sotpi_bind(so, NULL, 0, 2270 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2271 if (error) 2272 goto done; 2273 } 2274 ASSERT(so->so_state & SS_ISBOUND); 2275 flags |= _SOCONNECT_DID_BIND; 2276 } 2277 2278 /* 2279 * Handle a connect to a name parameter of type AF_UNSPEC like a 2280 * connect to a null address. This is the portable method to 2281 * unconnect a socket. 2282 */ 2283 if ((namelen >= sizeof (sa_family_t)) && 2284 (name->sa_family == AF_UNSPEC)) { 2285 name = NULL; 2286 namelen = 0; 2287 } 2288 2289 /* 2290 * Check that we are not already connected. 2291 * A connection-oriented socket cannot be reconnected. 2292 * A connected connection-less socket can be 2293 * - connected to a different address by a subsequent connect 2294 * - "unconnected" by a connect to the NULL address 2295 */ 2296 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2297 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2298 if (so->so_mode & SM_CONNREQUIRED) { 2299 /* Connection-oriented socket */ 2300 error = so->so_state & SS_ISCONNECTED ? 2301 EISCONN : EALREADY; 2302 goto done; 2303 } 2304 /* Connection-less socket */ 2305 if (name == NULL) { 2306 /* 2307 * Remove the connected state and clear SO_DGRAM_ERRIND 2308 * since it was set when the socket was connected. 2309 * If this is UDP also send down a T_DISCON_REQ. 2310 */ 2311 int val; 2312 2313 if ((so->so_family == AF_INET || 2314 so->so_family == AF_INET6) && 2315 (so->so_type == SOCK_DGRAM || 2316 so->so_type == SOCK_RAW) && 2317 /*CONSTCOND*/ 2318 !soconnect_tpi_udp) { 2319 /* XXX What about implicitly unbinding here? */ 2320 error = sodisconnect(so, -1, 2321 _SODISCONNECT_LOCK_HELD); 2322 } else { 2323 so->so_state &= 2324 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2325 sti->sti_faddr_valid = 0; 2326 sti->sti_faddr_len = 0; 2327 } 2328 2329 /* Remove SOLOCKED since setsockopt will grab it */ 2330 so_unlock_single(so, SOLOCKED); 2331 mutex_exit(&so->so_lock); 2332 2333 val = 0; 2334 (void) sotpi_setsockopt(so, SOL_SOCKET, 2335 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2336 cr); 2337 2338 mutex_enter(&so->so_lock); 2339 so_lock_single(so); /* Set SOLOCKED */ 2340 goto done; 2341 } 2342 } 2343 ASSERT(so->so_state & SS_ISBOUND); 2344 2345 if (name == NULL || namelen == 0) { 2346 error = EINVAL; 2347 goto done; 2348 } 2349 /* 2350 * Mark the socket if sti_faddr_sa represents the transport level 2351 * address. 2352 */ 2353 if (flags & _SOCONNECT_NOXLATE) { 2354 struct sockaddr_ux *soaddr_ux; 2355 2356 ASSERT(so->so_family == AF_UNIX); 2357 if (namelen != sizeof (struct sockaddr_ux)) { 2358 error = EINVAL; 2359 goto done; 2360 } 2361 soaddr_ux = (struct sockaddr_ux *)name; 2362 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2363 namelen = sizeof (soaddr_ux->sou_addr); 2364 sti->sti_faddr_noxlate = 1; 2365 } 2366 2367 /* 2368 * Length and family checks. 2369 */ 2370 error = so_addr_verify(so, name, namelen); 2371 if (error) 2372 goto bad; 2373 2374 /* 2375 * Save foreign address. Needed for AF_UNIX as well as 2376 * transport providers that do not support TI_GETPEERNAME. 2377 * Also used for cached foreign address for TCP and UDP. 2378 */ 2379 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2380 error = EINVAL; 2381 goto done; 2382 } 2383 sti->sti_faddr_len = (socklen_t)namelen; 2384 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2385 bcopy(name, sti->sti_faddr_sa, namelen); 2386 sti->sti_faddr_valid = 1; 2387 2388 if (so->so_family == AF_UNIX) { 2389 if (sti->sti_faddr_noxlate) { 2390 /* 2391 * sti_faddr is a transport-level address, so 2392 * don't pass it as an option. Do save it in 2393 * sti_ux_faddr, used for connected DG send. 2394 */ 2395 src = NULL; 2396 srclen = 0; 2397 addr = sti->sti_faddr_sa; 2398 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2399 bcopy(addr, &sti->sti_ux_faddr, 2400 sizeof (sti->sti_ux_faddr)); 2401 } else { 2402 /* 2403 * Pass the sockaddr_un source address as an option 2404 * and translate the remote address. 2405 * Holding so_lock thus sti_laddr_sa can not change. 2406 */ 2407 src = sti->sti_laddr_sa; 2408 srclen = (t_uscalar_t)sti->sti_laddr_len; 2409 dprintso(so, 1, 2410 ("sotpi_connect UNIX: srclen %d, src %p\n", 2411 srclen, src)); 2412 /* 2413 * Translate the destination address into our 2414 * internal form, and save it in sti_ux_faddr. 2415 * After this call, addr==&sti->sti_ux_taddr, 2416 * and we copy that to sti->sti_ux_faddr so 2417 * we save the connected peer address. 2418 */ 2419 error = so_ux_addr_xlate(so, 2420 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2421 (flags & _SOCONNECT_XPG4_2), 2422 &addr, &addrlen); 2423 if (error) 2424 goto bad; 2425 bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr, 2426 sizeof (sti->sti_ux_faddr)); 2427 } 2428 } else { 2429 addr = sti->sti_faddr_sa; 2430 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2431 src = NULL; 2432 srclen = 0; 2433 } 2434 /* 2435 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2436 * option which asks the transport provider to send T_UDERR_IND 2437 * messages. These T_UDERR_IND messages are used to return connected 2438 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2439 * 2440 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2441 * we send down a T_CONN_REQ. This is needed to let the 2442 * transport assign a local address that is consistent with 2443 * the remote address. Applications depend on a getsockname() 2444 * after a connect() to retrieve the "source" IP address for 2445 * the connected socket. Invalidate the cached local address 2446 * to force getsockname() to enquire of the transport. 2447 */ 2448 if (!(so->so_mode & SM_CONNREQUIRED)) { 2449 /* 2450 * Datagram socket. 2451 */ 2452 int32_t val; 2453 2454 so_unlock_single(so, SOLOCKED); 2455 mutex_exit(&so->so_lock); 2456 2457 val = 1; 2458 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2459 &val, (t_uscalar_t)sizeof (val), cr); 2460 2461 mutex_enter(&so->so_lock); 2462 so_lock_single(so); /* Set SOLOCKED */ 2463 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2464 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2465 soconnect_tpi_udp) { 2466 soisconnected(so); 2467 goto done; 2468 } 2469 /* 2470 * Send down T_CONN_REQ etc. 2471 * Clear fflag to avoid returning EWOULDBLOCK. 2472 */ 2473 fflag = 0; 2474 ASSERT(so->so_family != AF_UNIX); 2475 sti->sti_laddr_valid = 0; 2476 } else if (sti->sti_laddr_len != 0) { 2477 /* 2478 * If the local address or port was "any" then it may be 2479 * changed by the transport as a result of the 2480 * connect. Invalidate the cached version if we have one. 2481 */ 2482 switch (so->so_family) { 2483 case AF_INET: 2484 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2485 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2486 INADDR_ANY || 2487 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2488 sti->sti_laddr_valid = 0; 2489 break; 2490 2491 case AF_INET6: 2492 ASSERT(sti->sti_laddr_len == 2493 (socklen_t)sizeof (sin6_t)); 2494 if (IN6_IS_ADDR_UNSPECIFIED( 2495 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2496 IN6_IS_ADDR_V4MAPPED_ANY( 2497 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2498 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2499 sti->sti_laddr_valid = 0; 2500 break; 2501 2502 default: 2503 break; 2504 } 2505 } 2506 2507 /* 2508 * Check for failure of an earlier call 2509 */ 2510 if (so->so_error != 0) 2511 goto so_bad; 2512 2513 /* 2514 * Send down T_CONN_REQ. Message was allocated above. 2515 */ 2516 conn_req.PRIM_type = T_CONN_REQ; 2517 conn_req.DEST_length = addrlen; 2518 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2519 if (srclen == 0) { 2520 conn_req.OPT_length = 0; 2521 conn_req.OPT_offset = 0; 2522 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2523 soappendmsg(mp, addr, addrlen); 2524 } else { 2525 /* 2526 * There is a AF_UNIX sockaddr_un to include as a source 2527 * address option. 2528 */ 2529 struct T_opthdr toh; 2530 2531 toh.level = SOL_SOCKET; 2532 toh.name = SO_SRCADDR; 2533 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2534 toh.status = 0; 2535 conn_req.OPT_length = 2536 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2537 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2538 _TPI_ALIGN_TOPT(addrlen)); 2539 2540 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2541 soappendmsg(mp, addr, addrlen); 2542 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2543 soappendmsg(mp, &toh, sizeof (toh)); 2544 soappendmsg(mp, src, srclen); 2545 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2546 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2547 } 2548 /* 2549 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2550 * in order to have the right state when the T_CONN_CON shows up. 2551 */ 2552 soisconnecting(so); 2553 mutex_exit(&so->so_lock); 2554 2555 if (AU_AUDITING()) 2556 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2557 2558 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2559 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2560 mp = NULL; 2561 mutex_enter(&so->so_lock); 2562 if (error != 0) 2563 goto bad; 2564 2565 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2566 goto bad; 2567 2568 /* Allow other threads to access the socket */ 2569 so_unlock_single(so, SOLOCKED); 2570 need_unlock = B_FALSE; 2571 2572 /* 2573 * Wait until we get a T_CONN_CON or an error 2574 */ 2575 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2576 so_lock_single(so); /* Set SOLOCKED */ 2577 need_unlock = B_TRUE; 2578 } 2579 2580 done: 2581 freemsg(mp); 2582 switch (error) { 2583 case EINPROGRESS: 2584 case EALREADY: 2585 case EISCONN: 2586 case EINTR: 2587 /* Non-fatal errors */ 2588 sti->sti_laddr_valid = 0; 2589 /* FALLTHRU */ 2590 case 0: 2591 break; 2592 default: 2593 ASSERT(need_unlock); 2594 /* 2595 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2596 * and invalidate local-address cache 2597 */ 2598 so->so_state &= ~SS_ISCONNECTING; 2599 sti->sti_laddr_valid = 0; 2600 /* A discon_ind might have already unbound us */ 2601 if ((flags & _SOCONNECT_DID_BIND) && 2602 (so->so_state & SS_ISBOUND)) { 2603 int err; 2604 2605 err = sotpi_unbind(so, 0); 2606 /* LINTED - statement has no conseq */ 2607 if (err) { 2608 eprintsoline(so, err); 2609 } 2610 } 2611 break; 2612 } 2613 if (need_unlock) 2614 so_unlock_single(so, SOLOCKED); 2615 mutex_exit(&so->so_lock); 2616 return (error); 2617 2618 so_bad: error = sogeterr(so, B_TRUE); 2619 bad: eprintsoline(so, error); 2620 goto done; 2621 } 2622 2623 /* ARGSUSED */ 2624 int 2625 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2626 { 2627 struct T_ordrel_req ordrel_req; 2628 mblk_t *mp; 2629 uint_t old_state, state_change; 2630 int error = 0; 2631 sotpi_info_t *sti = SOTOTPI(so); 2632 2633 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2634 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2635 2636 mutex_enter(&so->so_lock); 2637 so_lock_single(so); /* Set SOLOCKED */ 2638 2639 /* 2640 * SunOS 4.X has no check for datagram sockets. 2641 * 5.X checks that it is connected (ENOTCONN) 2642 * X/Open requires that we check the connected state. 2643 */ 2644 if (!(so->so_state & SS_ISCONNECTED)) { 2645 if (!xnet_skip_checks) { 2646 error = ENOTCONN; 2647 if (xnet_check_print) { 2648 printf("sockfs: X/Open shutdown check " 2649 "caused ENOTCONN\n"); 2650 } 2651 } 2652 goto done; 2653 } 2654 /* 2655 * Record the current state and then perform any state changes. 2656 * Then use the difference between the old and new states to 2657 * determine which messages need to be sent. 2658 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2659 * duplicate calls to shutdown(). 2660 */ 2661 old_state = so->so_state; 2662 2663 switch (how) { 2664 case 0: 2665 socantrcvmore(so); 2666 break; 2667 case 1: 2668 socantsendmore(so); 2669 break; 2670 case 2: 2671 socantsendmore(so); 2672 socantrcvmore(so); 2673 break; 2674 default: 2675 error = EINVAL; 2676 goto done; 2677 } 2678 2679 /* 2680 * Assumes that the SS_CANT* flags are never cleared in the above code. 2681 */ 2682 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2683 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2684 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2685 2686 switch (state_change) { 2687 case 0: 2688 dprintso(so, 1, 2689 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2690 so->so_state)); 2691 goto done; 2692 2693 case SS_CANTRCVMORE: 2694 mutex_exit(&so->so_lock); 2695 strseteof(SOTOV(so), 1); 2696 /* 2697 * strseteof takes care of read side wakeups, 2698 * pollwakeups, and signals. 2699 */ 2700 /* 2701 * Get the read lock before flushing data to avoid problems 2702 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2703 */ 2704 mutex_enter(&so->so_lock); 2705 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2706 mutex_exit(&so->so_lock); 2707 2708 /* Flush read side queue */ 2709 strflushrq(SOTOV(so), FLUSHALL); 2710 2711 mutex_enter(&so->so_lock); 2712 so_unlock_read(so); /* Clear SOREADLOCKED */ 2713 break; 2714 2715 case SS_CANTSENDMORE: 2716 mutex_exit(&so->so_lock); 2717 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2718 mutex_enter(&so->so_lock); 2719 break; 2720 2721 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2722 mutex_exit(&so->so_lock); 2723 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2724 strseteof(SOTOV(so), 1); 2725 /* 2726 * strseteof takes care of read side wakeups, 2727 * pollwakeups, and signals. 2728 */ 2729 /* 2730 * Get the read lock before flushing data to avoid problems 2731 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2732 */ 2733 mutex_enter(&so->so_lock); 2734 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2735 mutex_exit(&so->so_lock); 2736 2737 /* Flush read side queue */ 2738 strflushrq(SOTOV(so), FLUSHALL); 2739 2740 mutex_enter(&so->so_lock); 2741 so_unlock_read(so); /* Clear SOREADLOCKED */ 2742 break; 2743 } 2744 2745 ASSERT(MUTEX_HELD(&so->so_lock)); 2746 2747 /* 2748 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2749 * was set due to this call and the new state has both of them set: 2750 * Send the AF_UNIX close indication 2751 * For T_COTS send a discon_ind 2752 * 2753 * If cantsend was set due to this call: 2754 * For T_COTSORD send an ordrel_ind 2755 * 2756 * Note that for T_CLTS there is no message sent here. 2757 */ 2758 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2759 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2760 /* 2761 * For SunOS 4.X compatibility we tell the other end 2762 * that we are unable to receive at this point. 2763 */ 2764 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2765 so_unix_close(so); 2766 2767 if (sti->sti_serv_type == T_COTS) 2768 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2769 } 2770 if ((state_change & SS_CANTSENDMORE) && 2771 (sti->sti_serv_type == T_COTS_ORD)) { 2772 /* Send an orderly release */ 2773 ordrel_req.PRIM_type = T_ORDREL_REQ; 2774 2775 mutex_exit(&so->so_lock); 2776 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2777 0, _ALLOC_SLEEP, cr); 2778 /* 2779 * Send down the T_ORDREL_REQ even if there is flow control. 2780 * This prevents shutdown from blocking. 2781 * Note that there is no T_OK_ACK for ordrel_req. 2782 */ 2783 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2784 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2785 mutex_enter(&so->so_lock); 2786 if (error) { 2787 eprintsoline(so, error); 2788 goto done; 2789 } 2790 } 2791 2792 done: 2793 so_unlock_single(so, SOLOCKED); 2794 mutex_exit(&so->so_lock); 2795 return (error); 2796 } 2797 2798 /* 2799 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2800 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2801 * that we have closed. 2802 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2803 * T_UNITDATA_REQ containing the same option. 2804 * 2805 * For SOCK_DGRAM half-connections (somebody connected to this end 2806 * but this end is not connect) we don't know where to send any 2807 * SO_UNIX_CLOSE. 2808 * 2809 * We have to ignore stream head errors just in case there has been 2810 * a shutdown(output). 2811 * Ignore any flow control to try to get the message more quickly to the peer. 2812 * While locally ignoring flow control solves the problem when there 2813 * is only the loopback transport on the stream it would not provide 2814 * the correct AF_UNIX socket semantics when one or more modules have 2815 * been pushed. 2816 */ 2817 void 2818 so_unix_close(struct sonode *so) 2819 { 2820 struct T_opthdr toh; 2821 mblk_t *mp; 2822 sotpi_info_t *sti = SOTOTPI(so); 2823 2824 ASSERT(MUTEX_HELD(&so->so_lock)); 2825 2826 ASSERT(so->so_family == AF_UNIX); 2827 2828 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2829 (SS_ISCONNECTED|SS_ISBOUND)) 2830 return; 2831 2832 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2833 (void *)so, pr_state(so->so_state, so->so_mode))); 2834 2835 toh.level = SOL_SOCKET; 2836 toh.name = SO_UNIX_CLOSE; 2837 2838 /* zero length + header */ 2839 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2840 toh.status = 0; 2841 2842 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2843 struct T_optdata_req tdr; 2844 2845 tdr.PRIM_type = T_OPTDATA_REQ; 2846 tdr.DATA_flag = 0; 2847 2848 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2849 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2850 2851 /* NOTE: holding so_lock while sleeping */ 2852 mp = soallocproto2(&tdr, sizeof (tdr), 2853 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2854 } else { 2855 struct T_unitdata_req tudr; 2856 void *addr; 2857 socklen_t addrlen; 2858 void *src; 2859 socklen_t srclen; 2860 struct T_opthdr toh2; 2861 t_scalar_t size; 2862 2863 /* 2864 * We know this is an AF_UNIX connected DGRAM socket. 2865 * We therefore already have the destination address 2866 * in the internal form needed for this send. This is 2867 * similar to the sosend_dgram call later in this file 2868 * when there's no user-specified destination address. 2869 */ 2870 if (sti->sti_faddr_noxlate) { 2871 /* 2872 * Already have a transport internal address. Do not 2873 * pass any (transport internal) source address. 2874 */ 2875 addr = sti->sti_faddr_sa; 2876 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2877 src = NULL; 2878 srclen = 0; 2879 } else { 2880 /* 2881 * Pass the sockaddr_un source address as an option 2882 * and translate the remote address. 2883 * Holding so_lock thus sti_laddr_sa can not change. 2884 */ 2885 src = sti->sti_laddr_sa; 2886 srclen = (socklen_t)sti->sti_laddr_len; 2887 dprintso(so, 1, 2888 ("so_ux_close: srclen %d, src %p\n", 2889 srclen, src)); 2890 /* 2891 * Use the destination address saved in connect. 2892 */ 2893 addr = &sti->sti_ux_faddr; 2894 addrlen = sizeof (sti->sti_ux_faddr); 2895 } 2896 tudr.PRIM_type = T_UNITDATA_REQ; 2897 tudr.DEST_length = addrlen; 2898 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2899 if (srclen == 0) { 2900 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2901 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2902 _TPI_ALIGN_TOPT(addrlen)); 2903 2904 size = tudr.OPT_offset + tudr.OPT_length; 2905 /* NOTE: holding so_lock while sleeping */ 2906 mp = soallocproto2(&tudr, sizeof (tudr), 2907 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2908 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2909 soappendmsg(mp, &toh, sizeof (toh)); 2910 } else { 2911 /* 2912 * There is a AF_UNIX sockaddr_un to include as a 2913 * source address option. 2914 */ 2915 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2916 _TPI_ALIGN_TOPT(srclen)); 2917 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2918 _TPI_ALIGN_TOPT(addrlen)); 2919 2920 toh2.level = SOL_SOCKET; 2921 toh2.name = SO_SRCADDR; 2922 toh2.len = (t_uscalar_t)(srclen + 2923 sizeof (struct T_opthdr)); 2924 toh2.status = 0; 2925 2926 size = tudr.OPT_offset + tudr.OPT_length; 2927 2928 /* NOTE: holding so_lock while sleeping */ 2929 mp = soallocproto2(&tudr, sizeof (tudr), 2930 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2931 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2932 soappendmsg(mp, &toh, sizeof (toh)); 2933 soappendmsg(mp, &toh2, sizeof (toh2)); 2934 soappendmsg(mp, src, srclen); 2935 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2936 } 2937 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2938 } 2939 mutex_exit(&so->so_lock); 2940 (void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2941 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2942 mutex_enter(&so->so_lock); 2943 } 2944 2945 /* 2946 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2947 * In addition, the caller typically verifies that there is some 2948 * potential state to clear by checking 2949 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2950 * before calling this routine. 2951 * Note that such a check can be made without holding so_lock since 2952 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2953 * decrements sti_oobsigcnt. 2954 * 2955 * When data is read *after* the point that all pending 2956 * oob data has been consumed the oob indication is cleared. 2957 * 2958 * This logic keeps select/poll returning POLLRDBAND and 2959 * SIOCATMARK returning true until we have read past 2960 * the mark. 2961 */ 2962 static void 2963 sorecv_update_oobstate(struct sonode *so) 2964 { 2965 sotpi_info_t *sti = SOTOTPI(so); 2966 2967 mutex_enter(&so->so_lock); 2968 ASSERT(so_verify_oobstate(so)); 2969 dprintso(so, 1, 2970 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2971 sti->sti_oobsigcnt, 2972 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2973 if (sti->sti_oobsigcnt == 0) { 2974 /* No more pending oob indications */ 2975 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2976 freemsg(so->so_oobmsg); 2977 so->so_oobmsg = NULL; 2978 } 2979 ASSERT(so_verify_oobstate(so)); 2980 mutex_exit(&so->so_lock); 2981 } 2982 2983 /* 2984 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2985 */ 2986 static int 2987 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2988 { 2989 sotpi_info_t *sti = SOTOTPI(so); 2990 int error = 0; 2991 mblk_t *tmp = NULL; 2992 mblk_t *pmp = NULL; 2993 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 2994 2995 ASSERT(nmp != NULL); 2996 2997 while (nmp != NULL && uiop->uio_resid > 0) { 2998 ssize_t n; 2999 3000 if (DB_TYPE(nmp) == M_DATA) { 3001 /* 3002 * We have some data, uiomove up to resid bytes. 3003 */ 3004 n = MIN(MBLKL(nmp), uiop->uio_resid); 3005 if (n > 0) 3006 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3007 nmp->b_rptr += n; 3008 if (nmp->b_rptr == nmp->b_wptr) { 3009 pmp = nmp; 3010 nmp = nmp->b_cont; 3011 } 3012 if (error) 3013 break; 3014 } else { 3015 /* 3016 * We only handle data, save for caller to handle. 3017 */ 3018 if (pmp != NULL) { 3019 pmp->b_cont = nmp->b_cont; 3020 } 3021 nmp->b_cont = NULL; 3022 if (*rmp == NULL) { 3023 *rmp = nmp; 3024 } else { 3025 tmp->b_cont = nmp; 3026 } 3027 nmp = nmp->b_cont; 3028 tmp = nmp; 3029 } 3030 } 3031 if (pmp != NULL) { 3032 /* Free any mblk_t(s) which we have consumed */ 3033 pmp->b_cont = NULL; 3034 freemsg(sti->sti_nl7c_rcv_mp); 3035 } 3036 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3037 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3038 if (error == 0) { 3039 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3040 3041 error = p->r_v.r_v2; 3042 p->r_v.r_v2 = 0; 3043 } 3044 rp->r_vals = sti->sti_nl7c_rcv_rval; 3045 sti->sti_nl7c_rcv_rval = 0; 3046 } else { 3047 /* More mblk_t(s) to process so no rval to return */ 3048 rp->r_vals = 0; 3049 } 3050 return (error); 3051 } 3052 /* 3053 * Receive the next message on the queue. 3054 * If msg_controllen is non-zero when called the caller is interested in 3055 * any received control info (options). 3056 * If msg_namelen is non-zero when called the caller is interested in 3057 * any received source address. 3058 * The routine returns with msg_control and msg_name pointing to 3059 * kmem_alloc'ed memory which the caller has to free. 3060 */ 3061 /* ARGSUSED */ 3062 int 3063 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3064 struct cred *cr) 3065 { 3066 union T_primitives *tpr; 3067 mblk_t *mp; 3068 uchar_t pri; 3069 int pflag, opflag; 3070 void *control; 3071 t_uscalar_t controllen; 3072 t_uscalar_t namelen; 3073 int so_state = so->so_state; /* Snapshot */ 3074 ssize_t saved_resid; 3075 rval_t rval; 3076 int flags; 3077 clock_t timout; 3078 int error = 0; 3079 sotpi_info_t *sti = SOTOTPI(so); 3080 3081 flags = msg->msg_flags; 3082 msg->msg_flags = 0; 3083 3084 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3085 (void *)so, (void *)msg, flags, 3086 pr_state(so->so_state, so->so_mode), so->so_error)); 3087 3088 if (so->so_version == SOV_STREAM) { 3089 so_update_attrs(so, SOACC); 3090 /* The imaginary "sockmod" has been popped - act as a stream */ 3091 return (strread(SOTOV(so), uiop, cr)); 3092 } 3093 3094 /* 3095 * If we are not connected because we have never been connected 3096 * we return ENOTCONN. If we have been connected (but are no longer 3097 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3098 * the EOF. 3099 * 3100 * An alternative would be to post an ENOTCONN error in stream head 3101 * (read+write) and clear it when we're connected. However, that error 3102 * would cause incorrect poll/select behavior! 3103 */ 3104 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3105 (so->so_mode & SM_CONNREQUIRED)) { 3106 return (ENOTCONN); 3107 } 3108 3109 /* 3110 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3111 * after checking that the read queue is empty) and returns zero. 3112 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3113 * is zero. 3114 */ 3115 3116 if (flags & MSG_OOB) { 3117 /* Check that the transport supports OOB */ 3118 if (!(so->so_mode & SM_EXDATA)) 3119 return (EOPNOTSUPP); 3120 so_update_attrs(so, SOACC); 3121 return (sorecvoob(so, msg, uiop, flags, 3122 (so->so_options & SO_OOBINLINE))); 3123 } 3124 3125 so_update_attrs(so, SOACC); 3126 3127 /* 3128 * Set msg_controllen and msg_namelen to zero here to make it 3129 * simpler in the cases that no control or name is returned. 3130 */ 3131 controllen = msg->msg_controllen; 3132 namelen = msg->msg_namelen; 3133 msg->msg_controllen = 0; 3134 msg->msg_namelen = 0; 3135 3136 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3137 namelen, controllen)); 3138 3139 mutex_enter(&so->so_lock); 3140 /* 3141 * If an NL7C enabled socket and not waiting for write data. 3142 */ 3143 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3144 NL7C_ENABLED) { 3145 if (sti->sti_nl7c_uri) { 3146 /* Close uri processing for a previous request */ 3147 nl7c_close(so); 3148 } 3149 if ((so_state & SS_CANTRCVMORE) && 3150 sti->sti_nl7c_rcv_mp == NULL) { 3151 /* Nothing to process, EOF */ 3152 mutex_exit(&so->so_lock); 3153 return (0); 3154 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3155 /* Persistent NL7C socket, try to process request */ 3156 boolean_t ret; 3157 3158 ret = nl7c_process(so, 3159 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3160 rval.r_vals = sti->sti_nl7c_rcv_rval; 3161 error = rval.r_v.r_v2; 3162 if (error) { 3163 /* Error of some sort, return it */ 3164 mutex_exit(&so->so_lock); 3165 return (error); 3166 } 3167 if (sti->sti_nl7c_flags && 3168 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3169 /* 3170 * Still an NL7C socket and no data 3171 * to pass up to the caller. 3172 */ 3173 mutex_exit(&so->so_lock); 3174 if (ret) { 3175 /* EOF */ 3176 return (0); 3177 } else { 3178 /* Need more data */ 3179 return (EAGAIN); 3180 } 3181 } 3182 } else { 3183 /* 3184 * Not persistent so no further NL7C processing. 3185 */ 3186 sti->sti_nl7c_flags = 0; 3187 } 3188 } 3189 /* 3190 * Only one reader is allowed at any given time. This is needed 3191 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3192 * 3193 * This is slightly different that BSD behavior in that it fails with 3194 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3195 * is single-threaded using sblock(), which is dropped while waiting 3196 * for data to appear. The difference shows up e.g. if one 3197 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3198 * does use nonblocking io and different threads are reading each 3199 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3200 * in this case as long as the read queue doesn't get empty. 3201 * In this implementation the thread using nonblocking io can 3202 * get an EWOULDBLOCK error due to the blocking thread executing 3203 * e.g. in the uiomove in kstrgetmsg. 3204 * This difference is not believed to be significant. 3205 */ 3206 /* Set SOREADLOCKED */ 3207 error = so_lock_read_intr(so, 3208 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3209 mutex_exit(&so->so_lock); 3210 if (error) 3211 return (error); 3212 3213 /* 3214 * Tell kstrgetmsg to not inspect the stream head errors until all 3215 * queued data has been consumed. 3216 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3217 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3218 * 3219 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3220 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3221 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3222 */ 3223 pflag = MSG_ANY | MSG_DELAYERROR; 3224 if (flags & MSG_PEEK) { 3225 pflag |= MSG_IPEEK; 3226 flags &= ~MSG_WAITALL; 3227 } 3228 if (so->so_mode & SM_ATOMIC) 3229 pflag |= MSG_DISCARDTAIL; 3230 3231 if (flags & MSG_DONTWAIT) 3232 timout = 0; 3233 else if (so->so_rcvtimeo != 0) 3234 timout = TICK_TO_MSEC(so->so_rcvtimeo); 3235 else 3236 timout = -1; 3237 opflag = pflag; 3238 retry: 3239 saved_resid = uiop->uio_resid; 3240 pri = 0; 3241 mp = NULL; 3242 if (sti->sti_nl7c_rcv_mp != NULL) { 3243 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3244 error = nl7c_sorecv(so, &mp, uiop, &rval); 3245 } else { 3246 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3247 timout, &rval); 3248 } 3249 if (error != 0) { 3250 /* kstrgetmsg returns ETIME when timeout expires */ 3251 if (error == ETIME) 3252 error = EWOULDBLOCK; 3253 goto out; 3254 } 3255 /* 3256 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3257 * For non-datagrams MOREDATA is used to set MSG_EOR. 3258 */ 3259 ASSERT(!(rval.r_val1 & MORECTL)); 3260 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3261 msg->msg_flags |= MSG_TRUNC; 3262 3263 if (mp == NULL) { 3264 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3265 /* 3266 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3267 * The draft Posix socket spec states that the mark should 3268 * not be cleared when peeking. We follow the latter. 3269 */ 3270 if ((so->so_state & 3271 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3272 (uiop->uio_resid != saved_resid) && 3273 !(flags & MSG_PEEK)) { 3274 sorecv_update_oobstate(so); 3275 } 3276 3277 mutex_enter(&so->so_lock); 3278 /* Set MSG_EOR based on MOREDATA */ 3279 if (!(rval.r_val1 & MOREDATA)) { 3280 if (so->so_state & SS_SAVEDEOR) { 3281 msg->msg_flags |= MSG_EOR; 3282 so->so_state &= ~SS_SAVEDEOR; 3283 } 3284 } 3285 /* 3286 * If some data was received (i.e. not EOF) and the 3287 * read/recv* has not been satisfied wait for some more. 3288 */ 3289 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3290 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3291 mutex_exit(&so->so_lock); 3292 pflag = opflag | MSG_NOMARK; 3293 goto retry; 3294 } 3295 goto out_locked; 3296 } 3297 3298 /* strsock_proto has already verified length and alignment */ 3299 tpr = (union T_primitives *)mp->b_rptr; 3300 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3301 3302 switch (tpr->type) { 3303 case T_DATA_IND: { 3304 if ((so->so_state & 3305 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3306 (uiop->uio_resid != saved_resid) && 3307 !(flags & MSG_PEEK)) { 3308 sorecv_update_oobstate(so); 3309 } 3310 3311 /* 3312 * Set msg_flags to MSG_EOR based on 3313 * MORE_flag and MOREDATA. 3314 */ 3315 mutex_enter(&so->so_lock); 3316 so->so_state &= ~SS_SAVEDEOR; 3317 if (!(tpr->data_ind.MORE_flag & 1)) { 3318 if (!(rval.r_val1 & MOREDATA)) 3319 msg->msg_flags |= MSG_EOR; 3320 else 3321 so->so_state |= SS_SAVEDEOR; 3322 } 3323 freemsg(mp); 3324 /* 3325 * If some data was received (i.e. not EOF) and the 3326 * read/recv* has not been satisfied wait for some more. 3327 */ 3328 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3329 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3330 mutex_exit(&so->so_lock); 3331 pflag = opflag | MSG_NOMARK; 3332 goto retry; 3333 } 3334 goto out_locked; 3335 } 3336 case T_UNITDATA_IND: { 3337 void *addr; 3338 t_uscalar_t addrlen; 3339 void *abuf; 3340 t_uscalar_t optlen; 3341 void *opt; 3342 3343 if ((so->so_state & 3344 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3345 (uiop->uio_resid != saved_resid) && 3346 !(flags & MSG_PEEK)) { 3347 sorecv_update_oobstate(so); 3348 } 3349 3350 if (namelen != 0) { 3351 /* Caller wants source address */ 3352 addrlen = tpr->unitdata_ind.SRC_length; 3353 addr = sogetoff(mp, 3354 tpr->unitdata_ind.SRC_offset, 3355 addrlen, 1); 3356 if (addr == NULL) { 3357 freemsg(mp); 3358 error = EPROTO; 3359 eprintsoline(so, error); 3360 goto out; 3361 } 3362 if (so->so_family == AF_UNIX) { 3363 /* 3364 * Can not use the transport level address. 3365 * If there is a SO_SRCADDR option carrying 3366 * the socket level address it will be 3367 * extracted below. 3368 */ 3369 addr = NULL; 3370 addrlen = 0; 3371 } 3372 } 3373 optlen = tpr->unitdata_ind.OPT_length; 3374 if (optlen != 0) { 3375 t_uscalar_t ncontrollen; 3376 3377 /* 3378 * Extract any source address option. 3379 * Determine how large cmsg buffer is needed. 3380 */ 3381 opt = sogetoff(mp, 3382 tpr->unitdata_ind.OPT_offset, 3383 optlen, __TPI_ALIGN_SIZE); 3384 3385 if (opt == NULL) { 3386 freemsg(mp); 3387 error = EPROTO; 3388 eprintsoline(so, error); 3389 goto out; 3390 } 3391 if (so->so_family == AF_UNIX) 3392 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3393 ncontrollen = so_cmsglen(mp, opt, optlen, 3394 !(flags & MSG_XPG4_2)); 3395 if (controllen != 0) 3396 controllen = ncontrollen; 3397 else if (ncontrollen != 0) 3398 msg->msg_flags |= MSG_CTRUNC; 3399 } else { 3400 controllen = 0; 3401 } 3402 3403 if (namelen != 0) { 3404 /* 3405 * Return address to caller. 3406 * Caller handles truncation if length 3407 * exceeds msg_namelen. 3408 * NOTE: AF_UNIX NUL termination is ensured by 3409 * the sender's copyin_name(). 3410 */ 3411 abuf = kmem_alloc(addrlen, KM_SLEEP); 3412 3413 bcopy(addr, abuf, addrlen); 3414 msg->msg_name = abuf; 3415 msg->msg_namelen = addrlen; 3416 } 3417 3418 if (controllen != 0) { 3419 /* 3420 * Return control msg to caller. 3421 * Caller handles truncation if length 3422 * exceeds msg_controllen. 3423 */ 3424 control = kmem_zalloc(controllen, KM_SLEEP); 3425 3426 error = so_opt2cmsg(mp, opt, optlen, 3427 !(flags & MSG_XPG4_2), 3428 control, controllen); 3429 if (error) { 3430 freemsg(mp); 3431 if (msg->msg_namelen != 0) 3432 kmem_free(msg->msg_name, 3433 msg->msg_namelen); 3434 kmem_free(control, controllen); 3435 eprintsoline(so, error); 3436 goto out; 3437 } 3438 msg->msg_control = control; 3439 msg->msg_controllen = controllen; 3440 } 3441 3442 freemsg(mp); 3443 goto out; 3444 } 3445 case T_OPTDATA_IND: { 3446 struct T_optdata_req *tdr; 3447 void *opt; 3448 t_uscalar_t optlen; 3449 3450 if ((so->so_state & 3451 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3452 (uiop->uio_resid != saved_resid) && 3453 !(flags & MSG_PEEK)) { 3454 sorecv_update_oobstate(so); 3455 } 3456 3457 tdr = (struct T_optdata_req *)mp->b_rptr; 3458 optlen = tdr->OPT_length; 3459 if (optlen != 0) { 3460 t_uscalar_t ncontrollen; 3461 /* 3462 * Determine how large cmsg buffer is needed. 3463 */ 3464 opt = sogetoff(mp, 3465 tpr->optdata_ind.OPT_offset, 3466 optlen, __TPI_ALIGN_SIZE); 3467 3468 if (opt == NULL) { 3469 freemsg(mp); 3470 error = EPROTO; 3471 eprintsoline(so, error); 3472 goto out; 3473 } 3474 3475 ncontrollen = so_cmsglen(mp, opt, optlen, 3476 !(flags & MSG_XPG4_2)); 3477 if (controllen != 0) 3478 controllen = ncontrollen; 3479 else if (ncontrollen != 0) 3480 msg->msg_flags |= MSG_CTRUNC; 3481 } else { 3482 controllen = 0; 3483 } 3484 3485 if (controllen != 0) { 3486 /* 3487 * Return control msg to caller. 3488 * Caller handles truncation if length 3489 * exceeds msg_controllen. 3490 */ 3491 control = kmem_zalloc(controllen, KM_SLEEP); 3492 3493 error = so_opt2cmsg(mp, opt, optlen, 3494 !(flags & MSG_XPG4_2), 3495 control, controllen); 3496 if (error) { 3497 freemsg(mp); 3498 kmem_free(control, controllen); 3499 eprintsoline(so, error); 3500 goto out; 3501 } 3502 msg->msg_control = control; 3503 msg->msg_controllen = controllen; 3504 } 3505 3506 /* 3507 * Set msg_flags to MSG_EOR based on 3508 * DATA_flag and MOREDATA. 3509 */ 3510 mutex_enter(&so->so_lock); 3511 so->so_state &= ~SS_SAVEDEOR; 3512 if (!(tpr->data_ind.MORE_flag & 1)) { 3513 if (!(rval.r_val1 & MOREDATA)) 3514 msg->msg_flags |= MSG_EOR; 3515 else 3516 so->so_state |= SS_SAVEDEOR; 3517 } 3518 freemsg(mp); 3519 /* 3520 * If some data was received (i.e. not EOF) and the 3521 * read/recv* has not been satisfied wait for some more. 3522 * Not possible to wait if control info was received. 3523 */ 3524 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3525 controllen == 0 && 3526 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3527 mutex_exit(&so->so_lock); 3528 pflag = opflag | MSG_NOMARK; 3529 goto retry; 3530 } 3531 goto out_locked; 3532 } 3533 case T_EXDATA_IND: { 3534 dprintso(so, 1, 3535 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3536 "state %s\n", 3537 sti->sti_oobsigcnt, sti->sti_oobcnt, 3538 saved_resid - uiop->uio_resid, 3539 pr_state(so->so_state, so->so_mode))); 3540 /* 3541 * kstrgetmsg handles MSGMARK so there is nothing to 3542 * inspect in the T_EXDATA_IND. 3543 * strsock_proto makes the stream head queue the T_EXDATA_IND 3544 * as a separate message with no M_DATA component. Furthermore, 3545 * the stream head does not consolidate M_DATA messages onto 3546 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3547 * remains a message by itself. This is needed since MSGMARK 3548 * marks both the whole message as well as the last byte 3549 * of the message. 3550 */ 3551 freemsg(mp); 3552 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3553 if (flags & MSG_PEEK) { 3554 /* 3555 * Even though we are peeking we consume the 3556 * T_EXDATA_IND thereby moving the mark information 3557 * to SS_RCVATMARK. Then the oob code below will 3558 * retry the peeking kstrgetmsg. 3559 * Note that the stream head read queue is 3560 * never flushed without holding SOREADLOCKED 3561 * thus the T_EXDATA_IND can not disappear 3562 * underneath us. 3563 */ 3564 dprintso(so, 1, 3565 ("sotpi_recvmsg: consume EXDATA_IND " 3566 "counts %d/%d state %s\n", 3567 sti->sti_oobsigcnt, 3568 sti->sti_oobcnt, 3569 pr_state(so->so_state, so->so_mode))); 3570 3571 pflag = MSG_ANY | MSG_DELAYERROR; 3572 if (so->so_mode & SM_ATOMIC) 3573 pflag |= MSG_DISCARDTAIL; 3574 3575 pri = 0; 3576 mp = NULL; 3577 3578 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3579 &pri, &pflag, (clock_t)-1, &rval); 3580 ASSERT(uiop->uio_resid == saved_resid); 3581 3582 if (error) { 3583 #ifdef SOCK_DEBUG 3584 if (error != EWOULDBLOCK && error != EINTR) { 3585 eprintsoline(so, error); 3586 } 3587 #endif /* SOCK_DEBUG */ 3588 goto out; 3589 } 3590 ASSERT(mp); 3591 tpr = (union T_primitives *)mp->b_rptr; 3592 ASSERT(tpr->type == T_EXDATA_IND); 3593 freemsg(mp); 3594 } /* end "if (flags & MSG_PEEK)" */ 3595 3596 /* 3597 * Decrement the number of queued and pending oob. 3598 * 3599 * SS_RCVATMARK is cleared when we read past a mark. 3600 * SS_HAVEOOBDATA is cleared when we've read past the 3601 * last mark. 3602 * SS_OOBPEND is cleared if we've read past the last 3603 * mark and no (new) SIGURG has been posted. 3604 */ 3605 mutex_enter(&so->so_lock); 3606 ASSERT(so_verify_oobstate(so)); 3607 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3608 ASSERT(sti->sti_oobsigcnt > 0); 3609 sti->sti_oobsigcnt--; 3610 ASSERT(sti->sti_oobcnt > 0); 3611 sti->sti_oobcnt--; 3612 /* 3613 * Since the T_EXDATA_IND has been removed from the stream 3614 * head, but we have not read data past the mark, 3615 * sockfs needs to track that the socket is still at the mark. 3616 * 3617 * Since no data was received call kstrgetmsg again to wait 3618 * for data. 3619 */ 3620 so->so_state |= SS_RCVATMARK; 3621 mutex_exit(&so->so_lock); 3622 dprintso(so, 1, 3623 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3624 sti->sti_oobsigcnt, sti->sti_oobcnt, 3625 pr_state(so->so_state, so->so_mode))); 3626 pflag = opflag; 3627 goto retry; 3628 } 3629 default: 3630 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3631 (void *)so, tpr->type, (void *)mp); 3632 ASSERT(0); 3633 freemsg(mp); 3634 error = EPROTO; 3635 eprintsoline(so, error); 3636 goto out; 3637 } 3638 /* NOTREACHED */ 3639 out: 3640 mutex_enter(&so->so_lock); 3641 out_locked: 3642 so_unlock_read(so); /* Clear SOREADLOCKED */ 3643 mutex_exit(&so->so_lock); 3644 return (error); 3645 } 3646 3647 /* 3648 * Sending data with options on a datagram socket. 3649 * Assumes caller has verified that SS_ISBOUND etc. are set. 3650 * 3651 * For AF_UNIX the destination address may be already in 3652 * internal form, as indicated by sti->sti_faddr_noxlate 3653 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3654 * translate the destination address to internal form. 3655 * 3656 * The source address is passed as an option. If passing 3657 * file descriptors, those are passed as file pointers in 3658 * another option. 3659 */ 3660 static int 3661 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3662 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3663 { 3664 struct T_unitdata_req tudr; 3665 mblk_t *mp; 3666 int error; 3667 void *addr; 3668 socklen_t addrlen; 3669 void *src; 3670 socklen_t srclen; 3671 ssize_t len; 3672 int size; 3673 struct T_opthdr toh; 3674 struct fdbuf *fdbuf; 3675 t_uscalar_t optlen; 3676 void *fds; 3677 int fdlen; 3678 sotpi_info_t *sti = SOTOTPI(so); 3679 3680 ASSERT(name && namelen); 3681 ASSERT(control && controllen); 3682 3683 len = uiop->uio_resid; 3684 if (len > (ssize_t)sti->sti_tidu_size) { 3685 return (EMSGSIZE); 3686 } 3687 3688 if (sti->sti_faddr_noxlate == 0 && 3689 (flags & MSG_SENDTO_NOXLATE) == 0) { 3690 /* 3691 * Length and family checks. 3692 * Don't verify internal form. 3693 */ 3694 error = so_addr_verify(so, name, namelen); 3695 if (error) { 3696 eprintsoline(so, error); 3697 return (error); 3698 } 3699 } 3700 3701 if (so->so_family == AF_UNIX) { 3702 if (sti->sti_faddr_noxlate) { 3703 /* 3704 * Already have a transport internal address. Do not 3705 * pass any (transport internal) source address. 3706 */ 3707 addr = name; 3708 addrlen = namelen; 3709 src = NULL; 3710 srclen = 0; 3711 } else if (flags & MSG_SENDTO_NOXLATE) { 3712 /* 3713 * Have an internal form dest. address. 3714 * Pass the source address as usual. 3715 */ 3716 addr = name; 3717 addrlen = namelen; 3718 src = sti->sti_laddr_sa; 3719 srclen = (socklen_t)sti->sti_laddr_len; 3720 } else { 3721 /* 3722 * Pass the sockaddr_un source address as an option 3723 * and translate the remote address. 3724 * 3725 * Note that this code does not prevent sti_laddr_sa 3726 * from changing while it is being used. Thus 3727 * if an unbind+bind occurs concurrently with this 3728 * send the peer might see a partially new and a 3729 * partially old "from" address. 3730 */ 3731 src = sti->sti_laddr_sa; 3732 srclen = (socklen_t)sti->sti_laddr_len; 3733 dprintso(so, 1, 3734 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3735 srclen, src)); 3736 /* 3737 * The sendmsg caller specified a destination 3738 * address, which we must translate into our 3739 * internal form. addr = &sti->sti_ux_taddr 3740 */ 3741 error = so_ux_addr_xlate(so, name, namelen, 3742 (flags & MSG_XPG4_2), 3743 &addr, &addrlen); 3744 if (error) { 3745 eprintsoline(so, error); 3746 return (error); 3747 } 3748 } 3749 } else { 3750 addr = name; 3751 addrlen = namelen; 3752 src = NULL; 3753 srclen = 0; 3754 } 3755 optlen = so_optlen(control, controllen, 3756 !(flags & MSG_XPG4_2)); 3757 tudr.PRIM_type = T_UNITDATA_REQ; 3758 tudr.DEST_length = addrlen; 3759 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3760 if (srclen != 0) 3761 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3762 _TPI_ALIGN_TOPT(srclen)); 3763 else 3764 tudr.OPT_length = optlen; 3765 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3766 _TPI_ALIGN_TOPT(addrlen)); 3767 3768 size = tudr.OPT_offset + tudr.OPT_length; 3769 3770 /* 3771 * File descriptors only when SM_FDPASSING set. 3772 */ 3773 error = so_getfdopt(control, controllen, 3774 !(flags & MSG_XPG4_2), &fds, &fdlen); 3775 if (error) 3776 return (error); 3777 if (fdlen != -1) { 3778 if (!(so->so_mode & SM_FDPASSING)) 3779 return (EOPNOTSUPP); 3780 3781 error = fdbuf_create(fds, fdlen, &fdbuf); 3782 if (error) 3783 return (error); 3784 3785 /* 3786 * Pre-allocate enough additional space for lower level modules 3787 * to append an option (e.g. see tl_unitdata). The following 3788 * is enough extra space for the largest option we might append. 3789 */ 3790 size += sizeof (struct T_opthdr) + ucredsize; 3791 mp = fdbuf_allocmsg(size, fdbuf); 3792 } else { 3793 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3794 if (mp == NULL) { 3795 /* 3796 * Caught a signal waiting for memory. 3797 * Let send* return EINTR. 3798 */ 3799 return (EINTR); 3800 } 3801 } 3802 soappendmsg(mp, &tudr, sizeof (tudr)); 3803 soappendmsg(mp, addr, addrlen); 3804 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3805 3806 if (fdlen != -1) { 3807 ASSERT(fdbuf != NULL); 3808 toh.level = SOL_SOCKET; 3809 toh.name = SO_FILEP; 3810 toh.len = fdbuf->fd_size + 3811 (t_uscalar_t)sizeof (struct T_opthdr); 3812 toh.status = 0; 3813 soappendmsg(mp, &toh, sizeof (toh)); 3814 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3815 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3816 } 3817 if (srclen != 0) { 3818 /* 3819 * There is a AF_UNIX sockaddr_un to include as a source 3820 * address option. 3821 */ 3822 toh.level = SOL_SOCKET; 3823 toh.name = SO_SRCADDR; 3824 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3825 toh.status = 0; 3826 soappendmsg(mp, &toh, sizeof (toh)); 3827 soappendmsg(mp, src, srclen); 3828 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3829 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3830 } 3831 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3832 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3833 /* 3834 * Normally at most 3 bytes left in the message, but we might have 3835 * allowed for extra space if we're passing fd's through. 3836 */ 3837 ASSERT(MBLKL(mp) <= (ssize_t)size); 3838 3839 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3840 if (AU_AUDITING()) 3841 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3842 3843 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3844 #ifdef SOCK_DEBUG 3845 if (error) { 3846 eprintsoline(so, error); 3847 } 3848 #endif /* SOCK_DEBUG */ 3849 return (error); 3850 } 3851 3852 /* 3853 * Sending data with options on a connected stream socket. 3854 * Assumes caller has verified that SS_ISCONNECTED is set. 3855 */ 3856 static int 3857 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3858 t_uscalar_t controllen, int flags) 3859 { 3860 struct T_optdata_req tdr; 3861 mblk_t *mp; 3862 int error; 3863 ssize_t iosize; 3864 int size; 3865 struct fdbuf *fdbuf; 3866 t_uscalar_t optlen; 3867 void *fds; 3868 int fdlen; 3869 struct T_opthdr toh; 3870 sotpi_info_t *sti = SOTOTPI(so); 3871 3872 dprintso(so, 1, 3873 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3874 3875 /* 3876 * Has to be bound and connected. However, since no locks are 3877 * held the state could have changed after sotpi_sendmsg checked it 3878 * thus it is not possible to ASSERT on the state. 3879 */ 3880 3881 /* Options on connection-oriented only when SM_OPTDATA set. */ 3882 if (!(so->so_mode & SM_OPTDATA)) 3883 return (EOPNOTSUPP); 3884 3885 do { 3886 /* 3887 * Set the MORE flag if uio_resid does not fit in this 3888 * message or if the caller passed in "more". 3889 * Error for transports with zero tidu_size. 3890 */ 3891 tdr.PRIM_type = T_OPTDATA_REQ; 3892 iosize = sti->sti_tidu_size; 3893 if (iosize <= 0) 3894 return (EMSGSIZE); 3895 if (uiop->uio_resid > iosize) { 3896 tdr.DATA_flag = 1; 3897 } else { 3898 if (more) 3899 tdr.DATA_flag = 1; 3900 else 3901 tdr.DATA_flag = 0; 3902 iosize = uiop->uio_resid; 3903 } 3904 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3905 tdr.DATA_flag, iosize)); 3906 3907 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3908 tdr.OPT_length = optlen; 3909 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3910 3911 size = (int)sizeof (tdr) + optlen; 3912 /* 3913 * File descriptors only when SM_FDPASSING set. 3914 */ 3915 error = so_getfdopt(control, controllen, 3916 !(flags & MSG_XPG4_2), &fds, &fdlen); 3917 if (error) 3918 return (error); 3919 if (fdlen != -1) { 3920 if (!(so->so_mode & SM_FDPASSING)) 3921 return (EOPNOTSUPP); 3922 3923 error = fdbuf_create(fds, fdlen, &fdbuf); 3924 if (error) 3925 return (error); 3926 3927 /* 3928 * Pre-allocate enough additional space for lower level 3929 * modules to append an option (e.g. see tl_unitdata). 3930 * The following is enough extra space for the largest 3931 * option we might append. 3932 */ 3933 size += sizeof (struct T_opthdr) + ucredsize; 3934 mp = fdbuf_allocmsg(size, fdbuf); 3935 } else { 3936 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3937 if (mp == NULL) { 3938 /* 3939 * Caught a signal waiting for memory. 3940 * Let send* return EINTR. 3941 */ 3942 return (EINTR); 3943 } 3944 } 3945 soappendmsg(mp, &tdr, sizeof (tdr)); 3946 3947 if (fdlen != -1) { 3948 ASSERT(fdbuf != NULL); 3949 toh.level = SOL_SOCKET; 3950 toh.name = SO_FILEP; 3951 toh.len = fdbuf->fd_size + 3952 (t_uscalar_t)sizeof (struct T_opthdr); 3953 toh.status = 0; 3954 soappendmsg(mp, &toh, sizeof (toh)); 3955 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3956 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3957 } 3958 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3959 /* 3960 * Normally at most 3 bytes left in the message, but we might 3961 * have allowed for extra space if we're passing fd's through. 3962 */ 3963 ASSERT(MBLKL(mp) <= (ssize_t)size); 3964 3965 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3966 3967 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3968 0, MSG_BAND, 0); 3969 if (error) { 3970 eprintsoline(so, error); 3971 return (error); 3972 } 3973 control = NULL; 3974 if (uiop->uio_resid > 0) { 3975 /* 3976 * Recheck for fatal errors. Fail write even though 3977 * some data have been written. This is consistent 3978 * with strwrite semantics and BSD sockets semantics. 3979 */ 3980 if (so->so_state & SS_CANTSENDMORE) { 3981 eprintsoline(so, error); 3982 return (EPIPE); 3983 } 3984 if (so->so_error != 0) { 3985 mutex_enter(&so->so_lock); 3986 error = sogeterr(so, B_TRUE); 3987 mutex_exit(&so->so_lock); 3988 if (error != 0) { 3989 eprintsoline(so, error); 3990 return (error); 3991 } 3992 } 3993 } 3994 } while (uiop->uio_resid > 0); 3995 return (0); 3996 } 3997 3998 /* 3999 * Sending data on a datagram socket. 4000 * Assumes caller has verified that SS_ISBOUND etc. are set. 4001 * 4002 * For AF_UNIX the destination address may be already in 4003 * internal form, as indicated by sti->sti_faddr_noxlate 4004 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 4005 * translate the destination address to internal form. 4006 * 4007 * The source address is passed as an option. 4008 */ 4009 int 4010 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 4011 struct uio *uiop, int flags) 4012 { 4013 struct T_unitdata_req tudr; 4014 mblk_t *mp; 4015 int error; 4016 void *addr; 4017 socklen_t addrlen; 4018 void *src; 4019 socklen_t srclen; 4020 ssize_t len; 4021 sotpi_info_t *sti = SOTOTPI(so); 4022 4023 ASSERT(name != NULL && namelen != 0); 4024 4025 len = uiop->uio_resid; 4026 if (len > sti->sti_tidu_size) { 4027 error = EMSGSIZE; 4028 goto done; 4029 } 4030 4031 if (sti->sti_faddr_noxlate == 0 && 4032 (flags & MSG_SENDTO_NOXLATE) == 0) { 4033 /* 4034 * Length and family checks. 4035 * Don't verify internal form. 4036 */ 4037 error = so_addr_verify(so, name, namelen); 4038 if (error != 0) 4039 goto done; 4040 } 4041 4042 if (sti->sti_direct) /* Never on AF_UNIX */ 4043 return (sodgram_direct(so, name, namelen, uiop, flags)); 4044 4045 if (so->so_family == AF_UNIX) { 4046 if (sti->sti_faddr_noxlate) { 4047 /* 4048 * Already have a transport internal address. Do not 4049 * pass any (transport internal) source address. 4050 */ 4051 addr = name; 4052 addrlen = namelen; 4053 src = NULL; 4054 srclen = 0; 4055 } else if (flags & MSG_SENDTO_NOXLATE) { 4056 /* 4057 * Have an internal form dest. address. 4058 * Pass the source address as usual. 4059 */ 4060 addr = name; 4061 addrlen = namelen; 4062 src = sti->sti_laddr_sa; 4063 srclen = (socklen_t)sti->sti_laddr_len; 4064 } else { 4065 /* 4066 * Pass the sockaddr_un source address as an option 4067 * and translate the remote address. 4068 * 4069 * Note that this code does not prevent sti_laddr_sa 4070 * from changing while it is being used. Thus 4071 * if an unbind+bind occurs concurrently with this 4072 * send the peer might see a partially new and a 4073 * partially old "from" address. 4074 */ 4075 src = sti->sti_laddr_sa; 4076 srclen = (socklen_t)sti->sti_laddr_len; 4077 dprintso(so, 1, 4078 ("sosend_dgram UNIX: srclen %d, src %p\n", 4079 srclen, src)); 4080 /* 4081 * The sendmsg caller specified a destination 4082 * address, which we must translate into our 4083 * internal form. addr = &sti->sti_ux_taddr 4084 */ 4085 error = so_ux_addr_xlate(so, name, namelen, 4086 (flags & MSG_XPG4_2), 4087 &addr, &addrlen); 4088 if (error) { 4089 eprintsoline(so, error); 4090 goto done; 4091 } 4092 } 4093 } else { 4094 addr = name; 4095 addrlen = namelen; 4096 src = NULL; 4097 srclen = 0; 4098 } 4099 tudr.PRIM_type = T_UNITDATA_REQ; 4100 tudr.DEST_length = addrlen; 4101 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4102 if (srclen == 0) { 4103 tudr.OPT_length = 0; 4104 tudr.OPT_offset = 0; 4105 4106 mp = soallocproto2(&tudr, sizeof (tudr), 4107 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4108 if (mp == NULL) { 4109 /* 4110 * Caught a signal waiting for memory. 4111 * Let send* return EINTR. 4112 */ 4113 error = EINTR; 4114 goto done; 4115 } 4116 } else { 4117 /* 4118 * There is a AF_UNIX sockaddr_un to include as a source 4119 * address option. 4120 */ 4121 struct T_opthdr toh; 4122 ssize_t size; 4123 4124 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4125 _TPI_ALIGN_TOPT(srclen)); 4126 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4127 _TPI_ALIGN_TOPT(addrlen)); 4128 4129 toh.level = SOL_SOCKET; 4130 toh.name = SO_SRCADDR; 4131 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4132 toh.status = 0; 4133 4134 size = tudr.OPT_offset + tudr.OPT_length; 4135 mp = soallocproto2(&tudr, sizeof (tudr), 4136 addr, addrlen, size, _ALLOC_INTR, CRED()); 4137 if (mp == NULL) { 4138 /* 4139 * Caught a signal waiting for memory. 4140 * Let send* return EINTR. 4141 */ 4142 error = EINTR; 4143 goto done; 4144 } 4145 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4146 soappendmsg(mp, &toh, sizeof (toh)); 4147 soappendmsg(mp, src, srclen); 4148 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4149 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4150 } 4151 4152 if (AU_AUDITING()) 4153 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4154 4155 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4156 done: 4157 #ifdef SOCK_DEBUG 4158 if (error) { 4159 eprintsoline(so, error); 4160 } 4161 #endif /* SOCK_DEBUG */ 4162 return (error); 4163 } 4164 4165 /* 4166 * Sending data on a connected stream socket. 4167 * Assumes caller has verified that SS_ISCONNECTED is set. 4168 */ 4169 int 4170 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4171 int sflag) 4172 { 4173 struct T_data_req tdr; 4174 mblk_t *mp; 4175 int error; 4176 ssize_t iosize; 4177 sotpi_info_t *sti = SOTOTPI(so); 4178 4179 dprintso(so, 1, 4180 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4181 (void *)so, uiop->uio_resid, prim, sflag)); 4182 4183 /* 4184 * Has to be bound and connected. However, since no locks are 4185 * held the state could have changed after sotpi_sendmsg checked it 4186 * thus it is not possible to ASSERT on the state. 4187 */ 4188 4189 do { 4190 /* 4191 * Set the MORE flag if uio_resid does not fit in this 4192 * message or if the caller passed in "more". 4193 * Error for transports with zero tidu_size. 4194 */ 4195 tdr.PRIM_type = prim; 4196 iosize = sti->sti_tidu_size; 4197 if (iosize <= 0) 4198 return (EMSGSIZE); 4199 if (uiop->uio_resid > iosize) { 4200 tdr.MORE_flag = 1; 4201 } else { 4202 if (more) 4203 tdr.MORE_flag = 1; 4204 else 4205 tdr.MORE_flag = 0; 4206 iosize = uiop->uio_resid; 4207 } 4208 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4209 prim, tdr.MORE_flag, iosize)); 4210 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4211 if (mp == NULL) { 4212 /* 4213 * Caught a signal waiting for memory. 4214 * Let send* return EINTR. 4215 */ 4216 return (EINTR); 4217 } 4218 4219 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4220 0, sflag | MSG_BAND, 0); 4221 if (error) { 4222 eprintsoline(so, error); 4223 return (error); 4224 } 4225 if (uiop->uio_resid > 0) { 4226 /* 4227 * Recheck for fatal errors. Fail write even though 4228 * some data have been written. This is consistent 4229 * with strwrite semantics and BSD sockets semantics. 4230 */ 4231 if (so->so_state & SS_CANTSENDMORE) { 4232 eprintsoline(so, error); 4233 return (EPIPE); 4234 } 4235 if (so->so_error != 0) { 4236 mutex_enter(&so->so_lock); 4237 error = sogeterr(so, B_TRUE); 4238 mutex_exit(&so->so_lock); 4239 if (error != 0) { 4240 eprintsoline(so, error); 4241 return (error); 4242 } 4243 } 4244 } 4245 } while (uiop->uio_resid > 0); 4246 return (0); 4247 } 4248 4249 /* 4250 * Check the state for errors and call the appropriate send function. 4251 * 4252 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4253 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4254 * after sending the message. 4255 * 4256 * The caller may optionally specify a destination address, for either 4257 * stream or datagram sockets. This table summarizes the cases: 4258 * 4259 * Socket type Dest. given Connected Result 4260 * ----------- ----------- --------- -------------- 4261 * Stream * Yes send to conn. addr. 4262 * Stream * No error ENOTCONN 4263 * Dgram yes * send to given addr. 4264 * Dgram no yes send to conn. addr. 4265 * Dgram no no error EDESTADDRREQ 4266 * 4267 * There are subtleties around the destination address when using 4268 * AF_UNIX datagram sockets. When the sendmsg call specifies the 4269 * destination address, it's in (struct sockaddr_un) form and we 4270 * need to translate it to our internal form (struct so_ux_addr). 4271 * 4272 * When the sendmsg call does not specify a destination address 4273 * we're using the peer address saved during sotpi_connect, and 4274 * that address is already in internal form. In this case, the 4275 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags 4276 * passed to sosend_dgram or sosend_dgramcmsg to indicate that 4277 * those functions should skip translation to internal form. 4278 * Avoiding that translation is not only more efficient, but it's 4279 * also necessary when a process does a connect on an AF_UNIX 4280 * datagram socket and then drops privileges. After the process 4281 * has dropped privileges, it may no longer be able to lookup the 4282 * the external name in the filesystem, but it should still be 4283 * able to send messages on the connected socket by leaving the 4284 * destination name unspecified. 4285 * 4286 * Yet more subtleties arise with sockets connected by socketpair(), 4287 * which puts internal form addresses in the fields where normally 4288 * the external form is found, and sets sti_faddr_noxlate=1, which 4289 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions 4290 * to skip translation of destination addresses to internal form. 4291 * However, beware that the flag sti_faddr_noxlate=1 also triggers 4292 * different behaviour almost everywhere AF_UNIX addresses appear. 4293 */ 4294 static int 4295 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4296 struct cred *cr) 4297 { 4298 int so_state; 4299 int so_mode; 4300 int error; 4301 struct sockaddr *name; 4302 t_uscalar_t namelen; 4303 int dontroute; 4304 int flags; 4305 sotpi_info_t *sti = SOTOTPI(so); 4306 4307 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4308 (void *)so, (void *)msg, msg->msg_flags, 4309 pr_state(so->so_state, so->so_mode), so->so_error)); 4310 4311 if (so->so_version == SOV_STREAM) { 4312 /* The imaginary "sockmod" has been popped - act as a stream */ 4313 so_update_attrs(so, SOMOD); 4314 return (strwrite(SOTOV(so), uiop, cr)); 4315 } 4316 4317 mutex_enter(&so->so_lock); 4318 so_state = so->so_state; 4319 4320 if (so_state & SS_CANTSENDMORE) { 4321 mutex_exit(&so->so_lock); 4322 return (EPIPE); 4323 } 4324 4325 if (so->so_error != 0) { 4326 error = sogeterr(so, B_TRUE); 4327 if (error != 0) { 4328 mutex_exit(&so->so_lock); 4329 return (error); 4330 } 4331 } 4332 4333 name = (struct sockaddr *)msg->msg_name; 4334 namelen = msg->msg_namelen; 4335 flags = msg->msg_flags; 4336 4337 /* 4338 * Historically, this function does not validate the flags 4339 * passed in, and any errant bits are ignored. However, 4340 * we would not want any such errant flag bits accidently 4341 * being treated as one of the internal-only flags, so 4342 * clear the internal-only flag bits. 4343 */ 4344 flags &= ~MSG_SENDTO_NOXLATE; 4345 4346 so_mode = so->so_mode; 4347 4348 if (name == NULL) { 4349 if (!(so_state & SS_ISCONNECTED)) { 4350 mutex_exit(&so->so_lock); 4351 if (so_mode & SM_CONNREQUIRED) 4352 return (ENOTCONN); 4353 else 4354 return (EDESTADDRREQ); 4355 } 4356 /* 4357 * This is a connected socket. 4358 */ 4359 if (so_mode & SM_CONNREQUIRED) { 4360 /* 4361 * This is a connected STREAM socket, 4362 * destination not specified. 4363 */ 4364 name = NULL; 4365 namelen = 0; 4366 } else { 4367 /* 4368 * Datagram send on connected socket with 4369 * the destination name not specified. 4370 * Use the peer address from connect. 4371 */ 4372 if (so->so_family == AF_UNIX) { 4373 /* 4374 * Use the (internal form) address saved 4375 * in sotpi_connect. See above. 4376 */ 4377 name = (void *)&sti->sti_ux_faddr; 4378 namelen = sizeof (sti->sti_ux_faddr); 4379 flags |= MSG_SENDTO_NOXLATE; 4380 } else { 4381 ASSERT(sti->sti_faddr_sa); 4382 name = sti->sti_faddr_sa; 4383 namelen = (t_uscalar_t)sti->sti_faddr_len; 4384 } 4385 } 4386 } else { 4387 /* 4388 * Sendmsg specifies a destination name 4389 */ 4390 if (!(so_state & SS_ISCONNECTED) && 4391 (so_mode & SM_CONNREQUIRED)) { 4392 /* i.e. TCP not connected */ 4393 mutex_exit(&so->so_lock); 4394 return (ENOTCONN); 4395 } 4396 /* 4397 * Ignore the address on connection-oriented sockets. 4398 * Just like BSD this code does not generate an error for 4399 * TCP (a CONNREQUIRED socket) when sending to an address 4400 * passed in with sendto/sendmsg. Instead the data is 4401 * delivered on the connection as if no address had been 4402 * supplied. 4403 */ 4404 if ((so_state & SS_ISCONNECTED) && 4405 !(so_mode & SM_CONNREQUIRED)) { 4406 mutex_exit(&so->so_lock); 4407 return (EISCONN); 4408 } 4409 if (!(so_state & SS_ISBOUND)) { 4410 so_lock_single(so); /* Set SOLOCKED */ 4411 error = sotpi_bind(so, NULL, 0, 4412 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4413 so_unlock_single(so, SOLOCKED); 4414 if (error) { 4415 mutex_exit(&so->so_lock); 4416 eprintsoline(so, error); 4417 return (error); 4418 } 4419 } 4420 /* 4421 * Handle delayed datagram errors. These are only queued 4422 * when the application sets SO_DGRAM_ERRIND. 4423 * Return the error if we are sending to the address 4424 * that was returned in the last T_UDERROR_IND. 4425 * If sending to some other address discard the delayed 4426 * error indication. 4427 */ 4428 if (sti->sti_delayed_error) { 4429 struct T_uderror_ind *tudi; 4430 void *addr; 4431 t_uscalar_t addrlen; 4432 boolean_t match = B_FALSE; 4433 4434 ASSERT(sti->sti_eaddr_mp); 4435 error = sti->sti_delayed_error; 4436 sti->sti_delayed_error = 0; 4437 tudi = 4438 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4439 addrlen = tudi->DEST_length; 4440 addr = sogetoff(sti->sti_eaddr_mp, 4441 tudi->DEST_offset, addrlen, 1); 4442 ASSERT(addr); /* Checked by strsock_proto */ 4443 switch (so->so_family) { 4444 case AF_INET: { 4445 /* Compare just IP address and port */ 4446 sin_t *sin1 = (sin_t *)name; 4447 sin_t *sin2 = (sin_t *)addr; 4448 4449 if (addrlen == sizeof (sin_t) && 4450 namelen == addrlen && 4451 sin1->sin_port == sin2->sin_port && 4452 sin1->sin_addr.s_addr == 4453 sin2->sin_addr.s_addr) 4454 match = B_TRUE; 4455 break; 4456 } 4457 case AF_INET6: { 4458 /* Compare just IP address and port. Not flow */ 4459 sin6_t *sin1 = (sin6_t *)name; 4460 sin6_t *sin2 = (sin6_t *)addr; 4461 4462 if (addrlen == sizeof (sin6_t) && 4463 namelen == addrlen && 4464 sin1->sin6_port == sin2->sin6_port && 4465 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4466 &sin2->sin6_addr)) 4467 match = B_TRUE; 4468 break; 4469 } 4470 case AF_UNIX: 4471 default: 4472 if (namelen == addrlen && 4473 bcmp(name, addr, namelen) == 0) 4474 match = B_TRUE; 4475 } 4476 if (match) { 4477 freemsg(sti->sti_eaddr_mp); 4478 sti->sti_eaddr_mp = NULL; 4479 mutex_exit(&so->so_lock); 4480 #ifdef DEBUG 4481 dprintso(so, 0, 4482 ("sockfs delayed error %d for %s\n", 4483 error, 4484 pr_addr(so->so_family, name, namelen))); 4485 #endif /* DEBUG */ 4486 return (error); 4487 } 4488 freemsg(sti->sti_eaddr_mp); 4489 sti->sti_eaddr_mp = NULL; 4490 } 4491 } 4492 mutex_exit(&so->so_lock); 4493 4494 dontroute = 0; 4495 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4496 uint32_t val; 4497 4498 val = 1; 4499 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4500 &val, (t_uscalar_t)sizeof (val), cr); 4501 if (error) 4502 return (error); 4503 dontroute = 1; 4504 } 4505 4506 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4507 error = EOPNOTSUPP; 4508 goto done; 4509 } 4510 if (msg->msg_controllen != 0) { 4511 if (!(so_mode & SM_CONNREQUIRED)) { 4512 so_update_attrs(so, SOMOD); 4513 error = sosend_dgramcmsg(so, name, namelen, uiop, 4514 msg->msg_control, msg->msg_controllen, flags); 4515 } else { 4516 if (flags & MSG_OOB) { 4517 /* Can't generate T_EXDATA_REQ with options */ 4518 error = EOPNOTSUPP; 4519 goto done; 4520 } 4521 so_update_attrs(so, SOMOD); 4522 error = sosend_svccmsg(so, uiop, 4523 !(flags & MSG_EOR), 4524 msg->msg_control, msg->msg_controllen, 4525 flags); 4526 } 4527 goto done; 4528 } 4529 4530 so_update_attrs(so, SOMOD); 4531 if (!(so_mode & SM_CONNREQUIRED)) { 4532 /* 4533 * If there is no SO_DONTROUTE to turn off return immediately 4534 * from send_dgram. This can allow tail-call optimizations. 4535 */ 4536 if (!dontroute) { 4537 return (sosend_dgram(so, name, namelen, uiop, flags)); 4538 } 4539 error = sosend_dgram(so, name, namelen, uiop, flags); 4540 } else { 4541 t_scalar_t prim; 4542 int sflag; 4543 4544 /* Ignore msg_name in the connected state */ 4545 if (flags & MSG_OOB) { 4546 prim = T_EXDATA_REQ; 4547 /* 4548 * Send down T_EXDATA_REQ even if there is flow 4549 * control for data. 4550 */ 4551 sflag = MSG_IGNFLOW; 4552 } else { 4553 if (so_mode & SM_BYTESTREAM) { 4554 /* Byte stream transport - use write */ 4555 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4556 4557 /* Send M_DATA messages */ 4558 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4559 (error = nl7c_data(so, uiop)) >= 0) { 4560 /* NL7C consumed the data */ 4561 return (error); 4562 } 4563 /* 4564 * If there is no SO_DONTROUTE to turn off, 4565 * sti_direct is on, and there is no flow 4566 * control, we can take the fast path. 4567 */ 4568 if (!dontroute && sti->sti_direct != 0 && 4569 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4570 return (sostream_direct(so, uiop, 4571 NULL, cr)); 4572 } 4573 error = strwrite(SOTOV(so), uiop, cr); 4574 goto done; 4575 } 4576 prim = T_DATA_REQ; 4577 sflag = 0; 4578 } 4579 /* 4580 * If there is no SO_DONTROUTE to turn off return immediately 4581 * from sosend_svc. This can allow tail-call optimizations. 4582 */ 4583 if (!dontroute) 4584 return (sosend_svc(so, uiop, prim, 4585 !(flags & MSG_EOR), sflag)); 4586 error = sosend_svc(so, uiop, prim, 4587 !(flags & MSG_EOR), sflag); 4588 } 4589 ASSERT(dontroute); 4590 done: 4591 if (dontroute) { 4592 uint32_t val; 4593 4594 val = 0; 4595 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4596 &val, (t_uscalar_t)sizeof (val), cr); 4597 } 4598 return (error); 4599 } 4600 4601 /* 4602 * kstrwritemp() has very similar semantics as that of strwrite(). 4603 * The main difference is it obtains mblks from the caller and also 4604 * does not do any copy as done in strwrite() from user buffers to 4605 * kernel buffers. 4606 * 4607 * Currently, this routine is used by sendfile to send data allocated 4608 * within the kernel without any copying. This interface does not use the 4609 * synchronous stream interface as synch. stream interface implies 4610 * copying. 4611 */ 4612 int 4613 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4614 { 4615 struct stdata *stp; 4616 struct queue *wqp; 4617 mblk_t *newmp; 4618 char waitflag; 4619 int tempmode; 4620 int error = 0; 4621 int done = 0; 4622 struct sonode *so; 4623 boolean_t direct; 4624 4625 ASSERT(vp->v_stream); 4626 stp = vp->v_stream; 4627 4628 so = VTOSO(vp); 4629 direct = _SOTOTPI(so)->sti_direct; 4630 4631 /* 4632 * This is the sockfs direct fast path. canputnext() need 4633 * not be accurate so we don't grab the sd_lock here. If 4634 * we get flow-controlled, we grab sd_lock just before the 4635 * do..while loop below to emulate what strwrite() does. 4636 */ 4637 wqp = stp->sd_wrq; 4638 if (canputnext(wqp) && direct && 4639 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4640 return (sostream_direct(so, NULL, mp, CRED())); 4641 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4642 /* Fast check of flags before acquiring the lock */ 4643 mutex_enter(&stp->sd_lock); 4644 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4645 mutex_exit(&stp->sd_lock); 4646 if (error != 0) { 4647 if (!(stp->sd_flag & STPLEX) && 4648 (stp->sd_wput_opt & SW_SIGPIPE)) { 4649 error = EPIPE; 4650 } 4651 return (error); 4652 } 4653 } 4654 4655 waitflag = WRITEWAIT; 4656 if (stp->sd_flag & OLDNDELAY) 4657 tempmode = fmode & ~FNDELAY; 4658 else 4659 tempmode = fmode; 4660 4661 mutex_enter(&stp->sd_lock); 4662 do { 4663 if (canputnext(wqp)) { 4664 mutex_exit(&stp->sd_lock); 4665 if (stp->sd_wputdatafunc != NULL) { 4666 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4667 NULL, NULL, NULL); 4668 if (newmp == NULL) { 4669 /* The caller will free mp */ 4670 return (ECOMM); 4671 } 4672 mp = newmp; 4673 } 4674 putnext(wqp, mp); 4675 return (0); 4676 } 4677 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4678 &done); 4679 } while (error == 0 && !done); 4680 4681 mutex_exit(&stp->sd_lock); 4682 /* 4683 * EAGAIN tells the application to try again. ENOMEM 4684 * is returned only if the memory allocation size 4685 * exceeds the physical limits of the system. ENOMEM 4686 * can't be true here. 4687 */ 4688 if (error == ENOMEM) 4689 error = EAGAIN; 4690 return (error); 4691 } 4692 4693 /* ARGSUSED */ 4694 static int 4695 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4696 struct cred *cr, mblk_t **mpp) 4697 { 4698 int error; 4699 4700 switch (so->so_family) { 4701 case AF_INET: 4702 case AF_INET6: 4703 case AF_UNIX: 4704 break; 4705 default: 4706 return (EAFNOSUPPORT); 4707 4708 } 4709 4710 if (so->so_state & SS_CANTSENDMORE) 4711 return (EPIPE); 4712 4713 if (so->so_type != SOCK_STREAM) 4714 return (EOPNOTSUPP); 4715 4716 if ((so->so_state & SS_ISCONNECTED) == 0) 4717 return (ENOTCONN); 4718 4719 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4720 if (error == 0) 4721 *mpp = NULL; 4722 return (error); 4723 } 4724 4725 /* 4726 * Sending data on a datagram socket. 4727 * Assumes caller has verified that SS_ISBOUND etc. are set. 4728 */ 4729 /* ARGSUSED */ 4730 static int 4731 sodgram_direct(struct sonode *so, struct sockaddr *name, 4732 socklen_t namelen, struct uio *uiop, int flags) 4733 { 4734 struct T_unitdata_req tudr; 4735 mblk_t *mp = NULL; 4736 int error = 0; 4737 void *addr; 4738 socklen_t addrlen; 4739 ssize_t len; 4740 struct stdata *stp = SOTOV(so)->v_stream; 4741 int so_state; 4742 queue_t *udp_wq; 4743 boolean_t connected; 4744 mblk_t *mpdata = NULL; 4745 sotpi_info_t *sti = SOTOTPI(so); 4746 uint32_t auditing = AU_AUDITING(); 4747 4748 ASSERT(name != NULL && namelen != 0); 4749 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4750 ASSERT(!(so->so_mode & SM_EXDATA)); 4751 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4752 ASSERT(SOTOV(so)->v_type == VSOCK); 4753 4754 /* Caller checked for proper length */ 4755 len = uiop->uio_resid; 4756 ASSERT(len <= sti->sti_tidu_size); 4757 4758 /* Length and family checks have been done by caller */ 4759 ASSERT(name->sa_family == so->so_family); 4760 ASSERT(so->so_family == AF_INET || 4761 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4762 ASSERT(so->so_family == AF_INET6 || 4763 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4764 4765 addr = name; 4766 addrlen = namelen; 4767 4768 if (stp->sd_sidp != NULL && 4769 (error = straccess(stp, JCWRITE)) != 0) 4770 goto done; 4771 4772 so_state = so->so_state; 4773 4774 connected = so_state & SS_ISCONNECTED; 4775 if (!connected) { 4776 tudr.PRIM_type = T_UNITDATA_REQ; 4777 tudr.DEST_length = addrlen; 4778 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4779 tudr.OPT_length = 0; 4780 tudr.OPT_offset = 0; 4781 4782 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4783 _ALLOC_INTR, CRED()); 4784 if (mp == NULL) { 4785 /* 4786 * Caught a signal waiting for memory. 4787 * Let send* return EINTR. 4788 */ 4789 error = EINTR; 4790 goto done; 4791 } 4792 } 4793 4794 /* 4795 * For UDP we don't break up the copyin into smaller pieces 4796 * as in the TCP case. That means if ENOMEM is returned by 4797 * mcopyinuio() then the uio vector has not been modified at 4798 * all and we fallback to either strwrite() or kstrputmsg() 4799 * below. Note also that we never generate priority messages 4800 * from here. 4801 */ 4802 udp_wq = stp->sd_wrq->q_next; 4803 if (canput(udp_wq) && 4804 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4805 ASSERT(DB_TYPE(mpdata) == M_DATA); 4806 ASSERT(uiop->uio_resid == 0); 4807 if (!connected) 4808 linkb(mp, mpdata); 4809 else 4810 mp = mpdata; 4811 if (auditing) 4812 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4813 4814 udp_wput(udp_wq, mp); 4815 return (0); 4816 } 4817 4818 ASSERT(mpdata == NULL); 4819 if (error != 0 && error != ENOMEM) { 4820 freemsg(mp); 4821 return (error); 4822 } 4823 4824 /* 4825 * For connected, let strwrite() handle the blocking case. 4826 * Otherwise we fall thru and use kstrputmsg(). 4827 */ 4828 if (connected) 4829 return (strwrite(SOTOV(so), uiop, CRED())); 4830 4831 if (auditing) 4832 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4833 4834 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4835 done: 4836 #ifdef SOCK_DEBUG 4837 if (error != 0) { 4838 eprintsoline(so, error); 4839 } 4840 #endif /* SOCK_DEBUG */ 4841 return (error); 4842 } 4843 4844 int 4845 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4846 { 4847 struct stdata *stp = SOTOV(so)->v_stream; 4848 ssize_t iosize, rmax, maxblk; 4849 queue_t *tcp_wq = stp->sd_wrq->q_next; 4850 mblk_t *newmp; 4851 int error = 0, wflag = 0; 4852 4853 ASSERT(so->so_mode & SM_BYTESTREAM); 4854 ASSERT(SOTOV(so)->v_type == VSOCK); 4855 4856 if (stp->sd_sidp != NULL && 4857 (error = straccess(stp, JCWRITE)) != 0) 4858 return (error); 4859 4860 if (uiop == NULL) { 4861 /* 4862 * kstrwritemp() should have checked sd_flag and 4863 * flow-control before coming here. If we end up 4864 * here it means that we can simply pass down the 4865 * data to tcp. 4866 */ 4867 ASSERT(mp != NULL); 4868 if (stp->sd_wputdatafunc != NULL) { 4869 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4870 NULL, NULL, NULL); 4871 if (newmp == NULL) { 4872 /* The caller will free mp */ 4873 return (ECOMM); 4874 } 4875 mp = newmp; 4876 } 4877 tcp_wput(tcp_wq, mp); 4878 return (0); 4879 } 4880 4881 /* Fallback to strwrite() to do proper error handling */ 4882 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4883 return (strwrite(SOTOV(so), uiop, cr)); 4884 4885 rmax = stp->sd_qn_maxpsz; 4886 ASSERT(rmax >= 0 || rmax == INFPSZ); 4887 if (rmax == 0 || uiop->uio_resid <= 0) 4888 return (0); 4889 4890 if (rmax == INFPSZ) 4891 rmax = uiop->uio_resid; 4892 4893 maxblk = stp->sd_maxblk; 4894 4895 for (;;) { 4896 iosize = MIN(uiop->uio_resid, rmax); 4897 4898 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4899 if (mp == NULL) { 4900 /* 4901 * Fallback to strwrite() for ENOMEM; if this 4902 * is our first time in this routine and the uio 4903 * vector has not been modified, we will end up 4904 * calling strwrite() without any flag set. 4905 */ 4906 if (error == ENOMEM) 4907 goto slow_send; 4908 else 4909 return (error); 4910 } 4911 ASSERT(uiop->uio_resid >= 0); 4912 /* 4913 * If mp is non-NULL and ENOMEM is set, it means that 4914 * mcopyinuio() was able to break down some of the user 4915 * data into one or more mblks. Send the partial data 4916 * to tcp and let the rest be handled in strwrite(). 4917 */ 4918 ASSERT(error == 0 || error == ENOMEM); 4919 if (stp->sd_wputdatafunc != NULL) { 4920 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4921 NULL, NULL, NULL); 4922 if (newmp == NULL) { 4923 /* The caller will free mp */ 4924 return (ECOMM); 4925 } 4926 mp = newmp; 4927 } 4928 tcp_wput(tcp_wq, mp); 4929 4930 wflag |= NOINTR; 4931 4932 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4933 ASSERT(error == 0); 4934 break; 4935 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4936 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4937 slow_send: 4938 /* 4939 * We were able to send down partial data using 4940 * the direct call interface, but are now relying 4941 * on strwrite() to handle the non-fastpath cases. 4942 * If the socket is blocking we will sleep in 4943 * strwaitq() until write is permitted, otherwise, 4944 * we will need to return the amount of bytes 4945 * written so far back to the app. This is the 4946 * reason why we pass NOINTR flag to strwrite() 4947 * for non-blocking socket, because we don't want 4948 * to return EAGAIN when portion of the user data 4949 * has actually been sent down. 4950 */ 4951 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4952 } 4953 } 4954 return (0); 4955 } 4956 4957 /* 4958 * Update sti_faddr by asking the transport (unless AF_UNIX). 4959 */ 4960 /* ARGSUSED */ 4961 int 4962 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4963 boolean_t accept, struct cred *cr) 4964 { 4965 struct strbuf strbuf; 4966 int error = 0, res; 4967 void *addr; 4968 t_uscalar_t addrlen; 4969 k_sigset_t smask; 4970 sotpi_info_t *sti = SOTOTPI(so); 4971 4972 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4973 (void *)so, pr_state(so->so_state, so->so_mode))); 4974 4975 ASSERT(*namelen > 0); 4976 mutex_enter(&so->so_lock); 4977 so_lock_single(so); /* Set SOLOCKED */ 4978 4979 if (accept) { 4980 bcopy(sti->sti_faddr_sa, name, 4981 MIN(*namelen, sti->sti_faddr_len)); 4982 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4983 goto done; 4984 } 4985 4986 if (!(so->so_state & SS_ISCONNECTED)) { 4987 error = ENOTCONN; 4988 goto done; 4989 } 4990 /* Added this check for X/Open */ 4991 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4992 error = EINVAL; 4993 if (xnet_check_print) { 4994 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4995 } 4996 goto done; 4997 } 4998 4999 if (sti->sti_faddr_valid) { 5000 bcopy(sti->sti_faddr_sa, name, 5001 MIN(*namelen, sti->sti_faddr_len)); 5002 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 5003 goto done; 5004 } 5005 5006 #ifdef DEBUG 5007 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 5008 pr_addr(so->so_family, sti->sti_faddr_sa, 5009 (t_uscalar_t)sti->sti_faddr_len))); 5010 #endif /* DEBUG */ 5011 5012 if (so->so_family == AF_UNIX) { 5013 /* Transport has different name space - return local info */ 5014 if (sti->sti_faddr_noxlate) 5015 *namelen = 0; 5016 error = 0; 5017 goto done; 5018 } 5019 5020 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 5021 5022 ASSERT(sti->sti_faddr_sa); 5023 /* Allocate local buffer to use with ioctl */ 5024 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 5025 mutex_exit(&so->so_lock); 5026 addr = kmem_alloc(addrlen, KM_SLEEP); 5027 5028 /* 5029 * Issue TI_GETPEERNAME with signals masked. 5030 * Put the result in sti_faddr_sa so that getpeername works after 5031 * a shutdown(output). 5032 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5033 * back to the socket. 5034 */ 5035 strbuf.buf = addr; 5036 strbuf.maxlen = addrlen; 5037 strbuf.len = 0; 5038 5039 sigintr(&smask, 0); 5040 res = 0; 5041 ASSERT(cr); 5042 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 5043 0, K_TO_K, cr, &res); 5044 sigunintr(&smask); 5045 5046 mutex_enter(&so->so_lock); 5047 /* 5048 * If there is an error record the error in so_error put don't fail 5049 * the getpeername. Instead fallback on the recorded 5050 * sti->sti_faddr_sa. 5051 */ 5052 if (error) { 5053 /* 5054 * Various stream head errors can be returned to the ioctl. 5055 * However, it is impossible to determine which ones of 5056 * these are really socket level errors that were incorrectly 5057 * consumed by the ioctl. Thus this code silently ignores the 5058 * error - to code explicitly does not reinstate the error 5059 * using soseterror(). 5060 * Experiments have shows that at least this set of 5061 * errors are reported and should not be reinstated on the 5062 * socket: 5063 * EINVAL E.g. if an I_LINK was in effect when 5064 * getpeername was called. 5065 * EPIPE The ioctl error semantics prefer the write 5066 * side error over the read side error. 5067 * ENOTCONN The transport just got disconnected but 5068 * sockfs had not yet seen the T_DISCON_IND 5069 * when issuing the ioctl. 5070 */ 5071 error = 0; 5072 } else if (res == 0 && strbuf.len > 0 && 5073 (so->so_state & SS_ISCONNECTED)) { 5074 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 5075 sti->sti_faddr_len = (socklen_t)strbuf.len; 5076 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 5077 sti->sti_faddr_valid = 1; 5078 5079 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 5080 *namelen = sti->sti_faddr_len; 5081 } 5082 kmem_free(addr, addrlen); 5083 #ifdef DEBUG 5084 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 5085 pr_addr(so->so_family, sti->sti_faddr_sa, 5086 (t_uscalar_t)sti->sti_faddr_len))); 5087 #endif /* DEBUG */ 5088 done: 5089 so_unlock_single(so, SOLOCKED); 5090 mutex_exit(&so->so_lock); 5091 return (error); 5092 } 5093 5094 /* 5095 * Update sti_laddr by asking the transport (unless AF_UNIX). 5096 */ 5097 int 5098 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 5099 struct cred *cr) 5100 { 5101 struct strbuf strbuf; 5102 int error = 0, res; 5103 void *addr; 5104 t_uscalar_t addrlen; 5105 k_sigset_t smask; 5106 sotpi_info_t *sti = SOTOTPI(so); 5107 5108 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5109 (void *)so, pr_state(so->so_state, so->so_mode))); 5110 5111 ASSERT(*namelen > 0); 5112 mutex_enter(&so->so_lock); 5113 so_lock_single(so); /* Set SOLOCKED */ 5114 5115 #ifdef DEBUG 5116 5117 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5118 pr_addr(so->so_family, sti->sti_laddr_sa, 5119 (t_uscalar_t)sti->sti_laddr_len))); 5120 #endif /* DEBUG */ 5121 if (sti->sti_laddr_valid) { 5122 bcopy(sti->sti_laddr_sa, name, 5123 MIN(*namelen, sti->sti_laddr_len)); 5124 *namelen = sti->sti_laddr_len; 5125 goto done; 5126 } 5127 5128 if (so->so_family == AF_UNIX) { 5129 /* 5130 * Transport has different name space - return local info. If we 5131 * have enough space, let consumers know the family. 5132 */ 5133 if (*namelen >= sizeof (sa_family_t)) { 5134 name->sa_family = AF_UNIX; 5135 *namelen = sizeof (sa_family_t); 5136 } else { 5137 *namelen = 0; 5138 } 5139 error = 0; 5140 goto done; 5141 } 5142 if (!(so->so_state & SS_ISBOUND)) { 5143 /* If not bound, then nothing to return. */ 5144 error = 0; 5145 goto done; 5146 } 5147 5148 /* Allocate local buffer to use with ioctl */ 5149 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5150 mutex_exit(&so->so_lock); 5151 addr = kmem_alloc(addrlen, KM_SLEEP); 5152 5153 /* 5154 * Issue TI_GETMYNAME with signals masked. 5155 * Put the result in sti_laddr_sa so that getsockname works after 5156 * a shutdown(output). 5157 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5158 * back to the socket. 5159 */ 5160 strbuf.buf = addr; 5161 strbuf.maxlen = addrlen; 5162 strbuf.len = 0; 5163 5164 sigintr(&smask, 0); 5165 res = 0; 5166 ASSERT(cr); 5167 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5168 0, K_TO_K, cr, &res); 5169 sigunintr(&smask); 5170 5171 mutex_enter(&so->so_lock); 5172 /* 5173 * If there is an error record the error in so_error put don't fail 5174 * the getsockname. Instead fallback on the recorded 5175 * sti->sti_laddr_sa. 5176 */ 5177 if (error) { 5178 /* 5179 * Various stream head errors can be returned to the ioctl. 5180 * However, it is impossible to determine which ones of 5181 * these are really socket level errors that were incorrectly 5182 * consumed by the ioctl. Thus this code silently ignores the 5183 * error - to code explicitly does not reinstate the error 5184 * using soseterror(). 5185 * Experiments have shows that at least this set of 5186 * errors are reported and should not be reinstated on the 5187 * socket: 5188 * EINVAL E.g. if an I_LINK was in effect when 5189 * getsockname was called. 5190 * EPIPE The ioctl error semantics prefer the write 5191 * side error over the read side error. 5192 */ 5193 error = 0; 5194 } else if (res == 0 && strbuf.len > 0 && 5195 (so->so_state & SS_ISBOUND)) { 5196 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5197 sti->sti_laddr_len = (socklen_t)strbuf.len; 5198 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5199 sti->sti_laddr_valid = 1; 5200 5201 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5202 *namelen = sti->sti_laddr_len; 5203 } 5204 kmem_free(addr, addrlen); 5205 #ifdef DEBUG 5206 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5207 pr_addr(so->so_family, sti->sti_laddr_sa, 5208 (t_uscalar_t)sti->sti_laddr_len))); 5209 #endif /* DEBUG */ 5210 done: 5211 so_unlock_single(so, SOLOCKED); 5212 mutex_exit(&so->so_lock); 5213 return (error); 5214 } 5215 5216 /* 5217 * Get socket options. For SOL_SOCKET options some options are handled 5218 * by the sockfs while others use the value recorded in the sonode as a 5219 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5220 * 5221 * On the return most *optlenp bytes are copied to optval. 5222 */ 5223 /* ARGSUSED */ 5224 int 5225 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5226 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5227 { 5228 struct T_optmgmt_req optmgmt_req; 5229 struct T_optmgmt_ack *optmgmt_ack; 5230 struct opthdr oh; 5231 struct opthdr *opt_res; 5232 mblk_t *mp = NULL; 5233 int error = 0; 5234 void *option = NULL; /* Set if fallback value */ 5235 t_uscalar_t maxlen = *optlenp; 5236 t_uscalar_t len; 5237 uint32_t value; 5238 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5239 struct timeval32 tmo_val32; 5240 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5241 5242 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5243 (void *)so, level, option_name, optval, (void *)optlenp, 5244 pr_state(so->so_state, so->so_mode))); 5245 5246 mutex_enter(&so->so_lock); 5247 so_lock_single(so); /* Set SOLOCKED */ 5248 5249 /* 5250 * Check for SOL_SOCKET options. 5251 * Certain SOL_SOCKET options are returned directly whereas 5252 * others only provide a default (fallback) value should 5253 * the T_SVR4_OPTMGMT_REQ fail. 5254 */ 5255 if (level == SOL_SOCKET) { 5256 /* Check parameters */ 5257 switch (option_name) { 5258 case SO_TYPE: 5259 case SO_ERROR: 5260 case SO_DEBUG: 5261 case SO_ACCEPTCONN: 5262 case SO_REUSEADDR: 5263 case SO_KEEPALIVE: 5264 case SO_DONTROUTE: 5265 case SO_BROADCAST: 5266 case SO_USELOOPBACK: 5267 case SO_OOBINLINE: 5268 case SO_SNDBUF: 5269 case SO_RCVBUF: 5270 #ifdef notyet 5271 case SO_SNDLOWAT: 5272 case SO_RCVLOWAT: 5273 #endif /* notyet */ 5274 case SO_DOMAIN: 5275 case SO_DGRAM_ERRIND: 5276 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5277 error = EINVAL; 5278 eprintsoline(so, error); 5279 goto done2; 5280 } 5281 break; 5282 case SO_RCVTIMEO: 5283 case SO_SNDTIMEO: 5284 if (get_udatamodel() == DATAMODEL_NONE || 5285 get_udatamodel() == DATAMODEL_NATIVE) { 5286 if (maxlen < sizeof (struct timeval)) { 5287 error = EINVAL; 5288 eprintsoline(so, error); 5289 goto done2; 5290 } 5291 } else { 5292 if (maxlen < sizeof (struct timeval32)) { 5293 error = EINVAL; 5294 eprintsoline(so, error); 5295 goto done2; 5296 } 5297 5298 } 5299 break; 5300 case SO_LINGER: 5301 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5302 error = EINVAL; 5303 eprintsoline(so, error); 5304 goto done2; 5305 } 5306 break; 5307 case SO_SND_BUFINFO: 5308 if (maxlen < (t_uscalar_t) 5309 sizeof (struct so_snd_bufinfo)) { 5310 error = EINVAL; 5311 eprintsoline(so, error); 5312 goto done2; 5313 } 5314 break; 5315 } 5316 5317 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5318 5319 switch (option_name) { 5320 case SO_TYPE: 5321 value = so->so_type; 5322 option = &value; 5323 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5324 5325 case SO_ERROR: 5326 value = sogeterr(so, B_TRUE); 5327 option = &value; 5328 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5329 5330 case SO_ACCEPTCONN: 5331 if (so->so_state & SS_ACCEPTCONN) 5332 value = SO_ACCEPTCONN; 5333 else 5334 value = 0; 5335 #ifdef DEBUG 5336 if (value) { 5337 dprintso(so, 1, 5338 ("sotpi_getsockopt: 0x%x is set\n", 5339 option_name)); 5340 } else { 5341 dprintso(so, 1, 5342 ("sotpi_getsockopt: 0x%x not set\n", 5343 option_name)); 5344 } 5345 #endif /* DEBUG */ 5346 option = &value; 5347 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5348 5349 case SO_DEBUG: 5350 case SO_REUSEADDR: 5351 case SO_KEEPALIVE: 5352 case SO_DONTROUTE: 5353 case SO_BROADCAST: 5354 case SO_USELOOPBACK: 5355 case SO_OOBINLINE: 5356 case SO_DGRAM_ERRIND: 5357 value = (so->so_options & option_name); 5358 #ifdef DEBUG 5359 if (value) { 5360 dprintso(so, 1, 5361 ("sotpi_getsockopt: 0x%x is set\n", 5362 option_name)); 5363 } else { 5364 dprintso(so, 1, 5365 ("sotpi_getsockopt: 0x%x not set\n", 5366 option_name)); 5367 } 5368 #endif /* DEBUG */ 5369 option = &value; 5370 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5371 5372 /* 5373 * The following options are only returned by sockfs when the 5374 * T_SVR4_OPTMGMT_REQ fails. 5375 */ 5376 case SO_LINGER: 5377 option = &so->so_linger; 5378 len = (t_uscalar_t)sizeof (struct linger); 5379 break; 5380 case SO_SNDBUF: { 5381 ssize_t lvalue; 5382 5383 /* 5384 * If the option has not been set then get a default 5385 * value from the read queue. This value is 5386 * returned if the transport fails 5387 * the T_SVR4_OPTMGMT_REQ. 5388 */ 5389 lvalue = so->so_sndbuf; 5390 if (lvalue == 0) { 5391 mutex_exit(&so->so_lock); 5392 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5393 QHIWAT, 0, &lvalue); 5394 mutex_enter(&so->so_lock); 5395 dprintso(so, 1, 5396 ("got SO_SNDBUF %ld from q\n", lvalue)); 5397 } 5398 value = (int)lvalue; 5399 option = &value; 5400 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5401 break; 5402 } 5403 case SO_RCVBUF: { 5404 ssize_t lvalue; 5405 5406 /* 5407 * If the option has not been set then get a default 5408 * value from the read queue. This value is 5409 * returned if the transport fails 5410 * the T_SVR4_OPTMGMT_REQ. 5411 * 5412 * XXX If SO_RCVBUF has been set and this is an 5413 * XPG 4.2 application then do not ask the transport 5414 * since the transport might adjust the value and not 5415 * return exactly what was set by the application. 5416 * For non-XPG 4.2 application we return the value 5417 * that the transport is actually using. 5418 */ 5419 lvalue = so->so_rcvbuf; 5420 if (lvalue == 0) { 5421 mutex_exit(&so->so_lock); 5422 (void) strqget(RD(strvp2wq(SOTOV(so))), 5423 QHIWAT, 0, &lvalue); 5424 mutex_enter(&so->so_lock); 5425 dprintso(so, 1, 5426 ("got SO_RCVBUF %ld from q\n", lvalue)); 5427 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5428 value = (int)lvalue; 5429 option = &value; 5430 goto copyout; /* skip asking transport */ 5431 } 5432 value = (int)lvalue; 5433 option = &value; 5434 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5435 break; 5436 } 5437 case SO_DOMAIN: 5438 value = so->so_family; 5439 option = &value; 5440 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5441 5442 #ifdef notyet 5443 /* 5444 * We do not implement the semantics of these options 5445 * thus we shouldn't implement the options either. 5446 */ 5447 case SO_SNDLOWAT: 5448 value = so->so_sndlowat; 5449 option = &value; 5450 break; 5451 case SO_RCVLOWAT: 5452 value = so->so_rcvlowat; 5453 option = &value; 5454 break; 5455 #endif /* notyet */ 5456 case SO_SNDTIMEO: 5457 case SO_RCVTIMEO: { 5458 clock_t val; 5459 5460 if (option_name == SO_RCVTIMEO) 5461 val = drv_hztousec(so->so_rcvtimeo); 5462 else 5463 val = drv_hztousec(so->so_sndtimeo); 5464 tmo_val.tv_sec = val / (1000 * 1000); 5465 tmo_val.tv_usec = val % (1000 * 1000); 5466 if (get_udatamodel() == DATAMODEL_NONE || 5467 get_udatamodel() == DATAMODEL_NATIVE) { 5468 option = &tmo_val; 5469 len = sizeof (struct timeval); 5470 } else { 5471 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5472 option = &tmo_val32; 5473 len = sizeof (struct timeval32); 5474 } 5475 break; 5476 } 5477 case SO_SND_BUFINFO: { 5478 snd_bufinfo.sbi_wroff = 5479 (so->so_proto_props).sopp_wroff; 5480 snd_bufinfo.sbi_maxblk = 5481 (so->so_proto_props).sopp_maxblk; 5482 snd_bufinfo.sbi_maxpsz = 5483 (so->so_proto_props).sopp_maxpsz; 5484 snd_bufinfo.sbi_tail = 5485 (so->so_proto_props).sopp_tail; 5486 option = &snd_bufinfo; 5487 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5488 break; 5489 } 5490 } 5491 } 5492 5493 mutex_exit(&so->so_lock); 5494 5495 /* Send request */ 5496 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5497 optmgmt_req.MGMT_flags = T_CHECK; 5498 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5499 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5500 5501 oh.level = level; 5502 oh.name = option_name; 5503 oh.len = maxlen; 5504 5505 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5506 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5507 /* Let option management work in the presence of data flow control */ 5508 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5509 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5510 mp = NULL; 5511 mutex_enter(&so->so_lock); 5512 if (error) { 5513 eprintsoline(so, error); 5514 goto done2; 5515 } 5516 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5517 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5518 if (error) { 5519 if (option != NULL) { 5520 /* We have a fallback value */ 5521 error = 0; 5522 goto copyout; 5523 } 5524 eprintsoline(so, error); 5525 goto done2; 5526 } 5527 ASSERT(mp); 5528 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5529 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5530 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5531 if (opt_res == NULL) { 5532 if (option != NULL) { 5533 /* We have a fallback value */ 5534 error = 0; 5535 goto copyout; 5536 } 5537 error = EPROTO; 5538 eprintsoline(so, error); 5539 goto done; 5540 } 5541 option = &opt_res[1]; 5542 5543 /* check to ensure that the option is within bounds */ 5544 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5545 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5546 if (option != NULL) { 5547 /* We have a fallback value */ 5548 error = 0; 5549 goto copyout; 5550 } 5551 error = EPROTO; 5552 eprintsoline(so, error); 5553 goto done; 5554 } 5555 5556 len = opt_res->len; 5557 5558 copyout: { 5559 t_uscalar_t size = MIN(len, maxlen); 5560 bcopy(option, optval, size); 5561 bcopy(&size, optlenp, sizeof (size)); 5562 } 5563 done: 5564 freemsg(mp); 5565 done2: 5566 so_unlock_single(so, SOLOCKED); 5567 mutex_exit(&so->so_lock); 5568 5569 return (error); 5570 } 5571 5572 /* 5573 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5574 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5575 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5576 * setsockopt has to work even if the transport does not support the option. 5577 */ 5578 /* ARGSUSED */ 5579 int 5580 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5581 const void *optval, t_uscalar_t optlen, struct cred *cr) 5582 { 5583 struct T_optmgmt_req optmgmt_req; 5584 struct opthdr oh; 5585 mblk_t *mp; 5586 int error = 0; 5587 boolean_t handled = B_FALSE; 5588 5589 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5590 (void *)so, level, option_name, optval, optlen, 5591 pr_state(so->so_state, so->so_mode))); 5592 5593 /* X/Open requires this check */ 5594 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5595 if (xnet_check_print) 5596 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5597 return (EINVAL); 5598 } 5599 5600 mutex_enter(&so->so_lock); 5601 so_lock_single(so); /* Set SOLOCKED */ 5602 mutex_exit(&so->so_lock); 5603 5604 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5605 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5606 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5607 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5608 5609 oh.level = level; 5610 oh.name = option_name; 5611 oh.len = optlen; 5612 5613 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5614 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5615 /* Let option management work in the presence of data flow control */ 5616 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5617 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5618 mp = NULL; 5619 mutex_enter(&so->so_lock); 5620 if (error) { 5621 eprintsoline(so, error); 5622 goto done2; 5623 } 5624 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5625 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5626 if (error) { 5627 eprintsoline(so, error); 5628 goto done; 5629 } 5630 ASSERT(mp); 5631 /* No need to verify T_optmgmt_ack */ 5632 freemsg(mp); 5633 done: 5634 /* 5635 * Check for SOL_SOCKET options and record their values. 5636 * If we know about a SOL_SOCKET parameter and the transport 5637 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5638 * EPROTO) we let the setsockopt succeed. 5639 */ 5640 if (level == SOL_SOCKET) { 5641 /* Check parameters */ 5642 switch (option_name) { 5643 case SO_DEBUG: 5644 case SO_REUSEADDR: 5645 case SO_KEEPALIVE: 5646 case SO_DONTROUTE: 5647 case SO_BROADCAST: 5648 case SO_USELOOPBACK: 5649 case SO_OOBINLINE: 5650 case SO_SNDBUF: 5651 case SO_RCVBUF: 5652 #ifdef notyet 5653 case SO_SNDLOWAT: 5654 case SO_RCVLOWAT: 5655 #endif /* notyet */ 5656 case SO_DGRAM_ERRIND: 5657 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5658 error = EINVAL; 5659 eprintsoline(so, error); 5660 goto done2; 5661 } 5662 ASSERT(optval); 5663 handled = B_TRUE; 5664 break; 5665 case SO_SNDTIMEO: 5666 case SO_RCVTIMEO: 5667 if (get_udatamodel() == DATAMODEL_NONE || 5668 get_udatamodel() == DATAMODEL_NATIVE) { 5669 if (optlen != sizeof (struct timeval)) { 5670 error = EINVAL; 5671 eprintsoline(so, error); 5672 goto done2; 5673 } 5674 } else { 5675 if (optlen != sizeof (struct timeval32)) { 5676 error = EINVAL; 5677 eprintsoline(so, error); 5678 goto done2; 5679 } 5680 } 5681 ASSERT(optval); 5682 handled = B_TRUE; 5683 break; 5684 case SO_LINGER: 5685 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5686 error = EINVAL; 5687 eprintsoline(so, error); 5688 goto done2; 5689 } 5690 ASSERT(optval); 5691 handled = B_TRUE; 5692 break; 5693 } 5694 5695 #define intvalue (*(int32_t *)optval) 5696 5697 switch (option_name) { 5698 case SO_TYPE: 5699 case SO_ERROR: 5700 case SO_ACCEPTCONN: 5701 /* Can't be set */ 5702 error = ENOPROTOOPT; 5703 goto done2; 5704 case SO_LINGER: { 5705 struct linger *l = (struct linger *)optval; 5706 5707 so->so_linger.l_linger = l->l_linger; 5708 if (l->l_onoff) { 5709 so->so_linger.l_onoff = SO_LINGER; 5710 so->so_options |= SO_LINGER; 5711 } else { 5712 so->so_linger.l_onoff = 0; 5713 so->so_options &= ~SO_LINGER; 5714 } 5715 break; 5716 } 5717 5718 case SO_DEBUG: 5719 #ifdef SOCK_TEST 5720 if (intvalue & 2) 5721 sock_test_timelimit = 10 * hz; 5722 else 5723 sock_test_timelimit = 0; 5724 5725 if (intvalue & 4) 5726 do_useracc = 0; 5727 else 5728 do_useracc = 1; 5729 #endif /* SOCK_TEST */ 5730 /* FALLTHRU */ 5731 case SO_REUSEADDR: 5732 case SO_KEEPALIVE: 5733 case SO_DONTROUTE: 5734 case SO_BROADCAST: 5735 case SO_USELOOPBACK: 5736 case SO_OOBINLINE: 5737 case SO_DGRAM_ERRIND: 5738 if (intvalue != 0) { 5739 dprintso(so, 1, 5740 ("socket_setsockopt: setting 0x%x\n", 5741 option_name)); 5742 so->so_options |= option_name; 5743 } else { 5744 dprintso(so, 1, 5745 ("socket_setsockopt: clearing 0x%x\n", 5746 option_name)); 5747 so->so_options &= ~option_name; 5748 } 5749 break; 5750 /* 5751 * The following options are only returned by us when the 5752 * transport layer fails. 5753 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5754 * since the transport might adjust the value and not 5755 * return exactly what was set by the application. 5756 */ 5757 case SO_SNDBUF: 5758 so->so_sndbuf = intvalue; 5759 break; 5760 case SO_RCVBUF: 5761 so->so_rcvbuf = intvalue; 5762 break; 5763 case SO_RCVPSH: 5764 so->so_rcv_timer_interval = intvalue; 5765 break; 5766 #ifdef notyet 5767 /* 5768 * We do not implement the semantics of these options 5769 * thus we shouldn't implement the options either. 5770 */ 5771 case SO_SNDLOWAT: 5772 so->so_sndlowat = intvalue; 5773 break; 5774 case SO_RCVLOWAT: 5775 so->so_rcvlowat = intvalue; 5776 break; 5777 #endif /* notyet */ 5778 case SO_SNDTIMEO: 5779 case SO_RCVTIMEO: { 5780 struct timeval tl; 5781 clock_t val; 5782 5783 if (get_udatamodel() == DATAMODEL_NONE || 5784 get_udatamodel() == DATAMODEL_NATIVE) 5785 bcopy(&tl, (struct timeval *)optval, 5786 sizeof (struct timeval)); 5787 else 5788 TIMEVAL32_TO_TIMEVAL(&tl, 5789 (struct timeval32 *)optval); 5790 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5791 if (option_name == SO_RCVTIMEO) 5792 so->so_rcvtimeo = drv_usectohz(val); 5793 else 5794 so->so_sndtimeo = drv_usectohz(val); 5795 break; 5796 } 5797 } 5798 #undef intvalue 5799 5800 if (error) { 5801 if ((error == ENOPROTOOPT || error == EPROTO || 5802 error == EINVAL) && handled) { 5803 dprintso(so, 1, 5804 ("setsockopt: ignoring error %d for 0x%x\n", 5805 error, option_name)); 5806 error = 0; 5807 } 5808 } 5809 } 5810 done2: 5811 so_unlock_single(so, SOLOCKED); 5812 mutex_exit(&so->so_lock); 5813 return (error); 5814 } 5815 5816 /* 5817 * sotpi_close() is called when the last open reference goes away. 5818 */ 5819 /* ARGSUSED */ 5820 int 5821 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5822 { 5823 struct vnode *vp = SOTOV(so); 5824 dev_t dev; 5825 int error = 0; 5826 sotpi_info_t *sti = SOTOTPI(so); 5827 5828 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5829 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5830 5831 dev = sti->sti_dev; 5832 5833 ASSERT(STREAMSTAB(getmajor(dev))); 5834 5835 mutex_enter(&so->so_lock); 5836 so_lock_single(so); /* Set SOLOCKED */ 5837 5838 ASSERT(so_verify_oobstate(so)); 5839 5840 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5841 sti->sti_nl7c_flags = 0; 5842 nl7c_close(so); 5843 } 5844 5845 if (vp->v_stream != NULL) { 5846 vnode_t *ux_vp; 5847 5848 if (so->so_family == AF_UNIX) { 5849 /* Could avoid this when CANTSENDMORE for !dgram */ 5850 so_unix_close(so); 5851 } 5852 5853 mutex_exit(&so->so_lock); 5854 /* 5855 * Disassemble the linkage from the AF_UNIX underlying file 5856 * system vnode to this socket (by atomically clearing 5857 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5858 * and frees the stream head. 5859 */ 5860 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5861 ASSERT(ux_vp->v_stream); 5862 sti->sti_ux_bound_vp = NULL; 5863 vn_rele_stream(ux_vp); 5864 } 5865 error = strclose(vp, flag, cr); 5866 vp->v_stream = NULL; 5867 mutex_enter(&so->so_lock); 5868 } 5869 5870 /* 5871 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5872 */ 5873 so_flush_discon_ind(so); 5874 5875 so_unlock_single(so, SOLOCKED); 5876 mutex_exit(&so->so_lock); 5877 5878 /* 5879 * Needed for STREAMs. 5880 * Decrement the device driver's reference count for streams 5881 * opened via the clone dip. The driver was held in clone_open(). 5882 * The absence of clone_close() forces this asymmetry. 5883 */ 5884 if (so->so_flag & SOCLONE) 5885 ddi_rele_driver(getmajor(dev)); 5886 5887 return (error); 5888 } 5889 5890 static int 5891 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5892 struct cred *cr, int32_t *rvalp) 5893 { 5894 struct vnode *vp = SOTOV(so); 5895 sotpi_info_t *sti = SOTOTPI(so); 5896 int error = 0; 5897 5898 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5899 cmd, arg, pr_state(so->so_state, so->so_mode))); 5900 5901 switch (cmd) { 5902 case SIOCSQPTR: 5903 /* 5904 * SIOCSQPTR is valid only when helper stream is created 5905 * by the protocol. 5906 */ 5907 case _I_INSERT: 5908 case _I_REMOVE: 5909 /* 5910 * Since there's no compelling reason to support these ioctls 5911 * on sockets, and doing so would increase the complexity 5912 * markedly, prevent it. 5913 */ 5914 return (EOPNOTSUPP); 5915 5916 case I_FIND: 5917 case I_LIST: 5918 case I_LOOK: 5919 case I_POP: 5920 case I_PUSH: 5921 /* 5922 * To prevent races and inconsistencies between the actual 5923 * state of the stream and the state according to the sonode, 5924 * we serialize all operations which modify or operate on the 5925 * list of modules on the socket's stream. 5926 */ 5927 mutex_enter(&sti->sti_plumb_lock); 5928 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5929 mutex_exit(&sti->sti_plumb_lock); 5930 return (error); 5931 5932 default: 5933 if (so->so_version != SOV_STREAM) 5934 break; 5935 5936 /* 5937 * The imaginary "sockmod" has been popped; act as a stream. 5938 */ 5939 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5940 } 5941 5942 ASSERT(so->so_version != SOV_STREAM); 5943 5944 /* 5945 * Process socket-specific ioctls. 5946 */ 5947 switch (cmd) { 5948 case FIONBIO: { 5949 int32_t value; 5950 5951 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5952 (mode & (int)FKIOCTL))) 5953 return (EFAULT); 5954 5955 mutex_enter(&so->so_lock); 5956 if (value) { 5957 so->so_state |= SS_NDELAY; 5958 } else { 5959 so->so_state &= ~SS_NDELAY; 5960 } 5961 mutex_exit(&so->so_lock); 5962 return (0); 5963 } 5964 5965 case FIOASYNC: { 5966 int32_t value; 5967 5968 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5969 (mode & (int)FKIOCTL))) 5970 return (EFAULT); 5971 5972 mutex_enter(&so->so_lock); 5973 /* 5974 * SS_ASYNC flag not already set correctly? 5975 * (!value != !(so->so_state & SS_ASYNC)) 5976 * but some engineers find that too hard to read. 5977 */ 5978 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5979 value != 0 && (so->so_state & SS_ASYNC) == 0) 5980 error = so_flip_async(so, vp, mode, cr); 5981 mutex_exit(&so->so_lock); 5982 return (error); 5983 } 5984 5985 case SIOCSPGRP: 5986 case FIOSETOWN: { 5987 pid_t pgrp; 5988 5989 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5990 (mode & (int)FKIOCTL))) 5991 return (EFAULT); 5992 5993 mutex_enter(&so->so_lock); 5994 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5995 /* Any change? */ 5996 if (pgrp != so->so_pgrp) 5997 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5998 mutex_exit(&so->so_lock); 5999 return (error); 6000 } 6001 case SIOCGPGRP: 6002 case FIOGETOWN: 6003 if (so_copyout(&so->so_pgrp, (void *)arg, 6004 sizeof (pid_t), (mode & (int)FKIOCTL))) 6005 return (EFAULT); 6006 return (0); 6007 6008 case SIOCATMARK: { 6009 int retval; 6010 uint_t so_state; 6011 6012 /* 6013 * strwaitmark has a finite timeout after which it 6014 * returns -1 if the mark state is undetermined. 6015 * In order to avoid any race between the mark state 6016 * in sockfs and the mark state in the stream head this 6017 * routine loops until the mark state can be determined 6018 * (or the urgent data indication has been removed by some 6019 * other thread). 6020 */ 6021 do { 6022 mutex_enter(&so->so_lock); 6023 so_state = so->so_state; 6024 mutex_exit(&so->so_lock); 6025 if (so_state & SS_RCVATMARK) { 6026 retval = 1; 6027 } else if (!(so_state & SS_OOBPEND)) { 6028 /* 6029 * No SIGURG has been generated -- there is no 6030 * pending or present urgent data. Thus can't 6031 * possibly be at the mark. 6032 */ 6033 retval = 0; 6034 } else { 6035 /* 6036 * Have the stream head wait until there is 6037 * either some messages on the read queue, or 6038 * STRATMARK or STRNOTATMARK gets set. The 6039 * STRNOTATMARK flag is used so that the 6040 * transport can send up a MSGNOTMARKNEXT 6041 * M_DATA to indicate that it is not 6042 * at the mark and additional data is not about 6043 * to be send upstream. 6044 * 6045 * If the mark state is undetermined this will 6046 * return -1 and we will loop rechecking the 6047 * socket state. 6048 */ 6049 retval = strwaitmark(vp); 6050 } 6051 } while (retval == -1); 6052 6053 if (so_copyout(&retval, (void *)arg, sizeof (int), 6054 (mode & (int)FKIOCTL))) 6055 return (EFAULT); 6056 return (0); 6057 } 6058 6059 case I_FDINSERT: 6060 case I_SENDFD: 6061 case I_RECVFD: 6062 case I_ATMARK: 6063 case _SIOCSOCKFALLBACK: 6064 /* 6065 * These ioctls do not apply to sockets. I_FDINSERT can be 6066 * used to send M_PROTO messages without modifying the socket 6067 * state. I_SENDFD/RECVFD should not be used for socket file 6068 * descriptor passing since they assume a twisted stream. 6069 * SIOCATMARK must be used instead of I_ATMARK. 6070 * 6071 * _SIOCSOCKFALLBACK from an application should never be 6072 * processed. It is only generated by socktpi_open() or 6073 * in response to I_POP or I_PUSH. 6074 */ 6075 #ifdef DEBUG 6076 zcmn_err(getzoneid(), CE_WARN, 6077 "Unsupported STREAMS ioctl 0x%x on socket. " 6078 "Pid = %d\n", cmd, curproc->p_pid); 6079 #endif /* DEBUG */ 6080 return (EOPNOTSUPP); 6081 6082 case _I_GETPEERCRED: 6083 if ((mode & FKIOCTL) == 0) 6084 return (EINVAL); 6085 6086 mutex_enter(&so->so_lock); 6087 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 6088 error = ENOTSUP; 6089 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 6090 error = ENOTCONN; 6091 } else if (so->so_peercred != NULL) { 6092 k_peercred_t *kp = (k_peercred_t *)arg; 6093 kp->pc_cr = so->so_peercred; 6094 kp->pc_cpid = so->so_cpid; 6095 crhold(so->so_peercred); 6096 } else { 6097 error = EINVAL; 6098 } 6099 mutex_exit(&so->so_lock); 6100 return (error); 6101 6102 default: 6103 /* 6104 * Do the higher-order bits of the ioctl cmd indicate 6105 * that it is an I_* streams ioctl? 6106 */ 6107 if ((cmd & 0xffffff00U) == STR && 6108 so->so_version == SOV_SOCKBSD) { 6109 #ifdef DEBUG 6110 zcmn_err(getzoneid(), CE_WARN, 6111 "Unsupported STREAMS ioctl 0x%x on socket. " 6112 "Pid = %d\n", cmd, curproc->p_pid); 6113 #endif /* DEBUG */ 6114 return (EOPNOTSUPP); 6115 } 6116 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6117 } 6118 } 6119 6120 /* 6121 * Handle plumbing-related ioctls. 6122 */ 6123 static int 6124 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6125 struct cred *cr, int32_t *rvalp) 6126 { 6127 static const char sockmod_name[] = "sockmod"; 6128 struct sonode *so = VTOSO(vp); 6129 char mname[FMNAMESZ + 1]; 6130 int error; 6131 sotpi_info_t *sti = SOTOTPI(so); 6132 6133 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6134 6135 if (so->so_version == SOV_SOCKBSD) 6136 return (EOPNOTSUPP); 6137 6138 if (so->so_version == SOV_STREAM) { 6139 /* 6140 * The imaginary "sockmod" has been popped - act as a stream. 6141 * If this is a push of sockmod then change back to a socket. 6142 */ 6143 if (cmd == I_PUSH) { 6144 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6145 (void *)arg, mname, sizeof (mname), NULL); 6146 6147 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6148 dprintso(so, 0, ("socktpi_ioctl: going to " 6149 "socket version\n")); 6150 so_stream2sock(so); 6151 return (0); 6152 } 6153 } 6154 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6155 } 6156 6157 switch (cmd) { 6158 case I_PUSH: 6159 if (sti->sti_direct) { 6160 mutex_enter(&so->so_lock); 6161 so_lock_single(so); 6162 mutex_exit(&so->so_lock); 6163 6164 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6165 cr, rvalp); 6166 6167 mutex_enter(&so->so_lock); 6168 if (error == 0) 6169 sti->sti_direct = 0; 6170 so_unlock_single(so, SOLOCKED); 6171 mutex_exit(&so->so_lock); 6172 6173 if (error != 0) 6174 return (error); 6175 } 6176 6177 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6178 if (error == 0) 6179 sti->sti_pushcnt++; 6180 return (error); 6181 6182 case I_POP: 6183 if (sti->sti_pushcnt == 0) { 6184 /* Emulate sockmod being popped */ 6185 dprintso(so, 0, 6186 ("socktpi_ioctl: going to STREAMS version\n")); 6187 return (so_sock2stream(so)); 6188 } 6189 6190 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6191 if (error == 0) 6192 sti->sti_pushcnt--; 6193 return (error); 6194 6195 case I_LIST: { 6196 struct str_mlist *kmlistp, *umlistp; 6197 struct str_list kstrlist; 6198 ssize_t kstrlistsize; 6199 int i, nmods; 6200 6201 STRUCT_DECL(str_list, ustrlist); 6202 STRUCT_INIT(ustrlist, mode); 6203 6204 if (arg == NULL) { 6205 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6206 if (error == 0) 6207 (*rvalp)++; /* Add one for sockmod */ 6208 return (error); 6209 } 6210 6211 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6212 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6213 if (error != 0) 6214 return (error); 6215 6216 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6217 if (nmods <= 0) 6218 return (EINVAL); 6219 /* 6220 * Ceiling nmods at nstrpush to prevent someone from 6221 * maliciously consuming lots of kernel memory. 6222 */ 6223 nmods = MIN(nmods, nstrpush); 6224 6225 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6226 kstrlist.sl_nmods = nmods; 6227 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6228 6229 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6230 cr, rvalp); 6231 if (error != 0) 6232 goto done; 6233 6234 /* 6235 * Considering the module list as a 0-based array of sl_nmods 6236 * modules, sockmod should conceptually exist at slot 6237 * sti_pushcnt. Insert sockmod at this location by sliding all 6238 * of the module names after so_pushcnt over by one. We know 6239 * that there will be room to do this since we allocated 6240 * sl_modlist with an additional slot. 6241 */ 6242 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6243 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6244 6245 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6246 kstrlist.sl_nmods++; 6247 6248 /* 6249 * Copy all of the entries out to ustrlist. 6250 */ 6251 kmlistp = kstrlist.sl_modlist; 6252 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6253 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6254 error = so_copyout(kmlistp++, umlistp++, 6255 sizeof (struct str_mlist), mode & FKIOCTL); 6256 if (error != 0) 6257 goto done; 6258 } 6259 6260 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6261 mode & FKIOCTL); 6262 if (error == 0) 6263 *rvalp = 0; 6264 done: 6265 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6266 return (error); 6267 } 6268 case I_LOOK: 6269 if (sti->sti_pushcnt == 0) { 6270 return (so_copyout(sockmod_name, (void *)arg, 6271 sizeof (sockmod_name), mode & FKIOCTL)); 6272 } 6273 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6274 6275 case I_FIND: 6276 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6277 if (error && error != EINVAL) 6278 return (error); 6279 6280 /* if not found and string was sockmod return 1 */ 6281 if (*rvalp == 0 || error == EINVAL) { 6282 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6283 (void *)arg, mname, sizeof (mname), NULL); 6284 if (error == ENAMETOOLONG) 6285 error = EINVAL; 6286 6287 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6288 *rvalp = 1; 6289 } 6290 return (error); 6291 6292 default: 6293 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6294 break; 6295 } 6296 6297 return (0); 6298 } 6299 6300 /* 6301 * Wrapper around the streams poll routine that implements socket poll 6302 * semantics. 6303 * The sockfs never calls pollwakeup itself - the stream head take care 6304 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6305 * stream head there can never be a deadlock due to holding so_lock across 6306 * pollwakeup and acquiring so_lock in this routine. 6307 * 6308 * However, since the performance of VOP_POLL is critical we avoid 6309 * acquiring so_lock here. This is based on two assumptions: 6310 * - The poll implementation holds locks to serialize the VOP_POLL call 6311 * and a pollwakeup for the same pollhead. This ensures that should 6312 * e.g. so_state change during a socktpi_poll call the pollwakeup 6313 * (which strsock_* and strrput conspire to issue) is issued after 6314 * the state change. Thus the pollwakeup will block until VOP_POLL has 6315 * returned and then wake up poll and have it call VOP_POLL again. 6316 * - The reading of so_state without holding so_lock does not result in 6317 * stale data that is older than the latest state change that has dropped 6318 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6319 * memory barrier to force the data into the coherency domain. 6320 */ 6321 static int 6322 sotpi_poll( 6323 struct sonode *so, 6324 short events, 6325 int anyyet, 6326 short *reventsp, 6327 struct pollhead **phpp) 6328 { 6329 short origevents = events; 6330 struct vnode *vp = SOTOV(so); 6331 int error; 6332 int so_state = so->so_state; /* snapshot */ 6333 sotpi_info_t *sti = SOTOTPI(so); 6334 6335 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6336 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6337 6338 ASSERT(vp->v_type == VSOCK); 6339 ASSERT(vp->v_stream != NULL); 6340 6341 if (so->so_version == SOV_STREAM) { 6342 /* The imaginary "sockmod" has been popped - act as a stream */ 6343 return (strpoll(vp->v_stream, events, anyyet, 6344 reventsp, phpp)); 6345 } 6346 6347 if (!(so_state & SS_ISCONNECTED) && 6348 (so->so_mode & SM_CONNREQUIRED)) { 6349 /* Not connected yet - turn off write side events */ 6350 events &= ~(POLLOUT|POLLWRBAND); 6351 } 6352 /* 6353 * Check for errors without calling strpoll if the caller wants them. 6354 * In sockets the errors are represented as input/output events 6355 * and there is no need to ask the stream head for this information. 6356 */ 6357 if (so->so_error != 0 && 6358 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6359 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6360 return (0); 6361 } 6362 /* 6363 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6364 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6365 * will not trigger a POLLIN event with POLLRDDATA set. 6366 * The handling of urgent data (causing POLLRDBAND) is done by 6367 * inspecting SS_OOBPEND below. 6368 */ 6369 events |= POLLRDDATA; 6370 6371 /* 6372 * After shutdown(output) a stream head write error is set. 6373 * However, we should not return output events. 6374 */ 6375 events |= POLLNOERR; 6376 error = strpoll(vp->v_stream, events, anyyet, 6377 reventsp, phpp); 6378 if (error) 6379 return (error); 6380 6381 ASSERT(!(*reventsp & POLLERR)); 6382 6383 /* 6384 * Notes on T_CONN_IND handling for sockets. 6385 * 6386 * If strpoll() returned without events, SR_POLLIN is guaranteed 6387 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6388 * 6389 * Since the so_lock is not held, soqueueconnind() may have run 6390 * and a T_CONN_IND may be waiting. We now check for any queued 6391 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6392 * to ensure poll returns. 6393 * 6394 * However: 6395 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6396 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6397 * the following actions will occur; taken together they ensure the 6398 * syscall will return. 6399 * 6400 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6401 * the accept() was run on a non-blocking socket sowaitconnind() 6402 * may have already returned EWOULDBLOCK, so not be waiting to 6403 * process the message. Additionally socktpi_poll() has probably 6404 * proceeded past the sti_conn_ind_head check below. 6405 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6406 * this thread, however that could occur before poll_common() 6407 * has entered cv_wait. 6408 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6409 * 6410 * Before proceeding to cv_wait() in poll_common() for an event, 6411 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6412 * and if set, re-calls strpoll() to ensure the late arriving 6413 * T_CONN_IND is recognized, and pollsys() returns. 6414 */ 6415 6416 if (sti->sti_conn_ind_head != NULL) 6417 *reventsp |= (POLLIN|POLLRDNORM) & events; 6418 6419 if (so->so_state & SS_CANTRCVMORE) { 6420 *reventsp |= POLLRDHUP & events; 6421 6422 if (so->so_state & SS_CANTSENDMORE) 6423 *reventsp |= POLLHUP; 6424 } 6425 6426 if (so->so_state & SS_OOBPEND) 6427 *reventsp |= POLLRDBAND & events; 6428 6429 if (sti->sti_nl7c_rcv_mp != NULL) { 6430 *reventsp |= (POLLIN|POLLRDNORM) & events; 6431 } 6432 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6433 ((POLLIN|POLLRDNORM) & *reventsp)) { 6434 sti->sti_nl7c_flags |= NL7C_POLLIN; 6435 } 6436 6437 return (0); 6438 } 6439 6440 /*ARGSUSED*/ 6441 static int 6442 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6443 { 6444 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6445 int error = 0; 6446 6447 error = sonode_constructor(buf, cdrarg, kmflags); 6448 if (error != 0) 6449 return (error); 6450 6451 error = i_sotpi_info_constructor(&st->st_info); 6452 if (error != 0) 6453 sonode_destructor(buf, cdrarg); 6454 6455 st->st_sonode.so_priv = &st->st_info; 6456 6457 return (error); 6458 } 6459 6460 /*ARGSUSED1*/ 6461 static void 6462 socktpi_destructor(void *buf, void *cdrarg) 6463 { 6464 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6465 6466 ASSERT(st->st_sonode.so_priv == &st->st_info); 6467 st->st_sonode.so_priv = NULL; 6468 6469 i_sotpi_info_destructor(&st->st_info); 6470 sonode_destructor(buf, cdrarg); 6471 } 6472 6473 static int 6474 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6475 { 6476 int retval; 6477 6478 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6479 struct sonode *so = (struct sonode *)buf; 6480 sotpi_info_t *sti = SOTOTPI(so); 6481 6482 mutex_enter(&socklist.sl_lock); 6483 6484 sti->sti_next_so = socklist.sl_list; 6485 sti->sti_prev_so = NULL; 6486 if (sti->sti_next_so != NULL) 6487 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6488 socklist.sl_list = so; 6489 6490 mutex_exit(&socklist.sl_lock); 6491 6492 } 6493 return (retval); 6494 } 6495 6496 static void 6497 socktpi_unix_destructor(void *buf, void *cdrarg) 6498 { 6499 struct sonode *so = (struct sonode *)buf; 6500 sotpi_info_t *sti = SOTOTPI(so); 6501 6502 mutex_enter(&socklist.sl_lock); 6503 6504 if (sti->sti_next_so != NULL) 6505 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6506 if (sti->sti_prev_so != NULL) 6507 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6508 else 6509 socklist.sl_list = sti->sti_next_so; 6510 6511 mutex_exit(&socklist.sl_lock); 6512 6513 socktpi_destructor(buf, cdrarg); 6514 } 6515 6516 int 6517 socktpi_init(void) 6518 { 6519 /* 6520 * Create sonode caches. We create a special one for AF_UNIX so 6521 * that we can track them for netstat(1m). 6522 */ 6523 socktpi_cache = kmem_cache_create("socktpi_cache", 6524 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6525 socktpi_destructor, NULL, NULL, NULL, 0); 6526 6527 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6528 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6529 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6530 6531 return (0); 6532 } 6533 6534 /* 6535 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6536 * 6537 * Caller must still update state and mode using sotpi_update_state(). 6538 */ 6539 int 6540 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6541 boolean_t *direct, queue_t **qp, struct cred *cr) 6542 { 6543 sotpi_info_t *sti; 6544 struct sockparams *origsp = so->so_sockparams; 6545 sock_lower_handle_t handle = so->so_proto_handle; 6546 struct stdata *stp; 6547 struct vnode *vp; 6548 queue_t *q; 6549 int error = 0; 6550 6551 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6552 SS_FALLBACK_PENDING); 6553 ASSERT(SOCK_IS_NONSTR(so)); 6554 6555 *qp = NULL; 6556 *direct = B_FALSE; 6557 so->so_sockparams = newsp; 6558 /* 6559 * Allocate and initalize fields required by TPI. 6560 */ 6561 (void) sotpi_info_create(so, KM_SLEEP); 6562 sotpi_info_init(so); 6563 6564 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6565 sotpi_info_fini(so); 6566 sotpi_info_destroy(so); 6567 return (error); 6568 } 6569 ASSERT(handle == so->so_proto_handle); 6570 sti = SOTOTPI(so); 6571 if (sti->sti_direct != 0) 6572 *direct = B_TRUE; 6573 6574 /* 6575 * Keep the original sp around so we can properly dispose of the 6576 * sonode when the socket is being closed. 6577 */ 6578 sti->sti_orig_sp = origsp; 6579 6580 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6581 so_alloc_addr(so, so->so_max_addr_len); 6582 6583 /* 6584 * If the application has done a SIOCSPGRP, make sure the 6585 * STREAM head is aware. This needs to take place before 6586 * the protocol start sending up messages. Otherwise we 6587 * might miss to generate SIGPOLL. 6588 * 6589 * It is possible that the application will receive duplicate 6590 * signals if some were already generated for either data or 6591 * connection indications. 6592 */ 6593 if (so->so_pgrp != 0) { 6594 if (so_set_events(so, so->so_vnode, cr) != 0) 6595 so->so_pgrp = 0; 6596 } 6597 6598 /* 6599 * Determine which queue to use. 6600 */ 6601 vp = SOTOV(so); 6602 stp = vp->v_stream; 6603 ASSERT(stp != NULL); 6604 q = stp->sd_wrq->q_next; 6605 6606 /* 6607 * Skip any modules that may have been auto pushed when the device 6608 * was opened 6609 */ 6610 while (q->q_next != NULL) 6611 q = q->q_next; 6612 *qp = _RD(q); 6613 6614 /* This is now a STREAMS sockets */ 6615 so->so_not_str = B_FALSE; 6616 6617 return (error); 6618 } 6619 6620 /* 6621 * Revert a TPI sonode. It is only allowed to revert the sonode during 6622 * the fallback process. 6623 */ 6624 void 6625 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6626 { 6627 vnode_t *vp = SOTOV(so); 6628 6629 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6630 SS_FALLBACK_PENDING); 6631 ASSERT(!SOCK_IS_NONSTR(so)); 6632 ASSERT(vp->v_stream != NULL); 6633 6634 strclean(vp); 6635 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6636 6637 /* 6638 * Restore the original sockparams. The caller is responsible for 6639 * dropping the ref to the new sp. 6640 */ 6641 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6642 6643 sotpi_info_fini(so); 6644 sotpi_info_destroy(so); 6645 6646 /* This is no longer a STREAMS sockets */ 6647 so->so_not_str = B_TRUE; 6648 } 6649 6650 void 6651 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6652 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6653 socklen_t faddrlen, short opts) 6654 { 6655 sotpi_info_t *sti = SOTOTPI(so); 6656 6657 so_proc_tcapability_ack(so, tcap); 6658 6659 so->so_options |= opts; 6660 6661 /* 6662 * Determine whether the foreign and local address are valid 6663 */ 6664 if (laddrlen != 0) { 6665 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6666 sti->sti_laddr_len = laddrlen; 6667 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6668 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6669 } 6670 6671 if (faddrlen != 0) { 6672 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6673 sti->sti_faddr_len = faddrlen; 6674 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6675 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6676 } 6677 6678 } 6679 6680 /* 6681 * Allocate enough space to cache the local and foreign addresses. 6682 */ 6683 void 6684 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6685 { 6686 sotpi_info_t *sti = SOTOTPI(so); 6687 6688 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6689 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6690 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6691 P2ROUNDUP(maxlen, KMEM_ALIGN); 6692 so->so_max_addr_len = sti->sti_laddr_maxlen; 6693 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6694 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6695 + sti->sti_laddr_maxlen); 6696 6697 if (so->so_family == AF_UNIX) { 6698 /* 6699 * Initialize AF_UNIX related fields. 6700 */ 6701 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6702 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6703 } 6704 } 6705 6706 6707 sotpi_info_t * 6708 sotpi_sototpi(struct sonode *so) 6709 { 6710 sotpi_info_t *sti; 6711 6712 ASSERT(so != NULL); 6713 6714 sti = (sotpi_info_t *)so->so_priv; 6715 6716 ASSERT(sti != NULL); 6717 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6718 6719 return (sti); 6720 } 6721 6722 static int 6723 i_sotpi_info_constructor(sotpi_info_t *sti) 6724 { 6725 sti->sti_magic = SOTPI_INFO_MAGIC; 6726 sti->sti_ack_mp = NULL; 6727 sti->sti_discon_ind_mp = NULL; 6728 sti->sti_ux_bound_vp = NULL; 6729 sti->sti_unbind_mp = NULL; 6730 6731 sti->sti_conn_ind_head = NULL; 6732 sti->sti_conn_ind_tail = NULL; 6733 6734 sti->sti_laddr_sa = NULL; 6735 sti->sti_faddr_sa = NULL; 6736 6737 sti->sti_nl7c_flags = 0; 6738 sti->sti_nl7c_uri = NULL; 6739 sti->sti_nl7c_rcv_mp = NULL; 6740 6741 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6742 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6743 6744 return (0); 6745 } 6746 6747 static void 6748 i_sotpi_info_destructor(sotpi_info_t *sti) 6749 { 6750 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6751 ASSERT(sti->sti_ack_mp == NULL); 6752 ASSERT(sti->sti_discon_ind_mp == NULL); 6753 ASSERT(sti->sti_ux_bound_vp == NULL); 6754 ASSERT(sti->sti_unbind_mp == NULL); 6755 6756 ASSERT(sti->sti_conn_ind_head == NULL); 6757 ASSERT(sti->sti_conn_ind_tail == NULL); 6758 6759 ASSERT(sti->sti_laddr_sa == NULL); 6760 ASSERT(sti->sti_faddr_sa == NULL); 6761 6762 ASSERT(sti->sti_nl7c_flags == 0); 6763 ASSERT(sti->sti_nl7c_uri == NULL); 6764 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6765 6766 mutex_destroy(&sti->sti_plumb_lock); 6767 cv_destroy(&sti->sti_ack_cv); 6768 } 6769 6770 /* 6771 * Creates and attaches TPI information to the given sonode 6772 */ 6773 static boolean_t 6774 sotpi_info_create(struct sonode *so, int kmflags) 6775 { 6776 sotpi_info_t *sti; 6777 6778 ASSERT(so->so_priv == NULL); 6779 6780 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6781 return (B_FALSE); 6782 6783 if (i_sotpi_info_constructor(sti) != 0) { 6784 kmem_free(sti, sizeof (*sti)); 6785 return (B_FALSE); 6786 } 6787 6788 so->so_priv = (void *)sti; 6789 return (B_TRUE); 6790 } 6791 6792 /* 6793 * Initializes the TPI information. 6794 */ 6795 static void 6796 sotpi_info_init(struct sonode *so) 6797 { 6798 struct vnode *vp = SOTOV(so); 6799 sotpi_info_t *sti = SOTOTPI(so); 6800 time_t now; 6801 6802 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6803 vp->v_rdev = sti->sti_dev; 6804 6805 sti->sti_orig_sp = NULL; 6806 6807 sti->sti_pushcnt = 0; 6808 6809 now = gethrestime_sec(); 6810 sti->sti_atime = now; 6811 sti->sti_mtime = now; 6812 sti->sti_ctime = now; 6813 6814 sti->sti_eaddr_mp = NULL; 6815 sti->sti_delayed_error = 0; 6816 6817 sti->sti_provinfo = NULL; 6818 6819 sti->sti_oobcnt = 0; 6820 sti->sti_oobsigcnt = 0; 6821 6822 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6823 6824 sti->sti_laddr_sa = 0; 6825 sti->sti_faddr_sa = 0; 6826 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6827 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6828 6829 sti->sti_laddr_valid = 0; 6830 sti->sti_faddr_valid = 0; 6831 sti->sti_faddr_noxlate = 0; 6832 6833 sti->sti_direct = 0; 6834 6835 ASSERT(sti->sti_ack_mp == NULL); 6836 ASSERT(sti->sti_ux_bound_vp == NULL); 6837 ASSERT(sti->sti_unbind_mp == NULL); 6838 6839 ASSERT(sti->sti_conn_ind_head == NULL); 6840 ASSERT(sti->sti_conn_ind_tail == NULL); 6841 } 6842 6843 /* 6844 * Given a sonode, grab the TPI info and free any data. 6845 */ 6846 static void 6847 sotpi_info_fini(struct sonode *so) 6848 { 6849 sotpi_info_t *sti = SOTOTPI(so); 6850 mblk_t *mp; 6851 6852 ASSERT(sti->sti_discon_ind_mp == NULL); 6853 6854 if ((mp = sti->sti_conn_ind_head) != NULL) { 6855 mblk_t *mp1; 6856 6857 while (mp) { 6858 mp1 = mp->b_next; 6859 mp->b_next = NULL; 6860 freemsg(mp); 6861 mp = mp1; 6862 } 6863 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6864 } 6865 6866 /* 6867 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6868 * indirect them. It also uses so_count as a validity test. 6869 */ 6870 mutex_enter(&so->so_lock); 6871 6872 if (sti->sti_laddr_sa) { 6873 ASSERT((caddr_t)sti->sti_faddr_sa == 6874 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6875 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6876 sti->sti_laddr_valid = 0; 6877 sti->sti_faddr_valid = 0; 6878 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6879 sti->sti_laddr_sa = NULL; 6880 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6881 sti->sti_faddr_sa = NULL; 6882 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6883 } 6884 6885 mutex_exit(&so->so_lock); 6886 6887 if ((mp = sti->sti_eaddr_mp) != NULL) { 6888 freemsg(mp); 6889 sti->sti_eaddr_mp = NULL; 6890 sti->sti_delayed_error = 0; 6891 } 6892 6893 if ((mp = sti->sti_ack_mp) != NULL) { 6894 freemsg(mp); 6895 sti->sti_ack_mp = NULL; 6896 } 6897 6898 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6899 sti->sti_nl7c_rcv_mp = NULL; 6900 freemsg(mp); 6901 } 6902 sti->sti_nl7c_rcv_rval = 0; 6903 if (sti->sti_nl7c_uri != NULL) { 6904 nl7c_urifree(so); 6905 /* urifree() cleared nl7c_uri */ 6906 } 6907 if (sti->sti_nl7c_flags) { 6908 sti->sti_nl7c_flags = 0; 6909 } 6910 6911 ASSERT(sti->sti_ux_bound_vp == NULL); 6912 if ((mp = sti->sti_unbind_mp) != NULL) { 6913 freemsg(mp); 6914 sti->sti_unbind_mp = NULL; 6915 } 6916 } 6917 6918 /* 6919 * Destroys the TPI information attached to a sonode. 6920 */ 6921 static void 6922 sotpi_info_destroy(struct sonode *so) 6923 { 6924 sotpi_info_t *sti = SOTOTPI(so); 6925 6926 i_sotpi_info_destructor(sti); 6927 kmem_free(sti, sizeof (*sti)); 6928 6929 so->so_priv = NULL; 6930 } 6931 6932 /* 6933 * Create the global sotpi socket module entry. It will never be freed. 6934 */ 6935 smod_info_t * 6936 sotpi_smod_create(void) 6937 { 6938 smod_info_t *smodp; 6939 6940 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6941 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6942 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6943 /* 6944 * Initialize the smod_refcnt to 1 so it will never be freed. 6945 */ 6946 smodp->smod_refcnt = 1; 6947 smodp->smod_uc_version = SOCK_UC_VERSION; 6948 smodp->smod_dc_version = SOCK_DC_VERSION; 6949 smodp->smod_sock_create_func = &sotpi_create; 6950 smodp->smod_sock_destroy_func = &sotpi_destroy; 6951 return (smodp); 6952 } 6953