1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015, Joyent, Inc. 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/kmem_impl.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/suntpi.h> 51 #include <sys/ddi.h> 52 #include <sys/esunddi.h> 53 #include <sys/flock.h> 54 #include <sys/modctl.h> 55 #include <sys/vtrace.h> 56 #include <sys/cmn_err.h> 57 #include <sys/pathname.h> 58 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <fs/sockfs/sockcommon.h> 85 #include <fs/sockfs/socktpi.h> 86 #include <fs/sockfs/socktpi_impl.h> 87 88 /* 89 * Possible failures when memory can't be allocated. The documented behavior: 90 * 91 * 5.5: 4.X: XNET: 92 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 93 * EINTR 94 * (4.X does not document EINTR but returns it) 95 * bind: ENOSR - ENOBUFS/ENOSR 96 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 97 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 98 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 99 * (4.X getpeername and getsockname do not fail in practice) 100 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 101 * listen: - - ENOBUFS 102 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 107 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 108 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 109 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 110 * 111 * Resolution. When allocation fails: 112 * recv: return EINTR 113 * send: return EINTR 114 * connect, accept: EINTR 115 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 116 * socket, socketpair: ENOBUFS 117 * getpeername, getsockname: sleep 118 * getsockopt, setsockopt: sleep 119 */ 120 121 #ifdef SOCK_TEST 122 /* 123 * Variables that make sockfs do something other than the standard TPI 124 * for the AF_INET transports. 125 * 126 * solisten_tpi_tcp: 127 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 128 * the transport is already bound. This is needed to avoid loosing the 129 * port number should listen() do a T_UNBIND_REQ followed by a 130 * O_T_BIND_REQ. 131 * 132 * soconnect_tpi_udp: 133 * UDP and ICMP can handle a T_CONN_REQ. 134 * This is needed to make the sequence of connect(), getsockname() 135 * return the local IP address used to send packets to the connected to 136 * destination. 137 * 138 * soconnect_tpi_tcp: 139 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 140 * Set this to non-zero to send TPI conformant messages to TCP in this 141 * respect. This is a performance optimization. 142 * 143 * soaccept_tpi_tcp: 144 * TCP can handle a T_CONN_REQ without the acceptor being bound. 145 * This is a performance optimization that has been picked up in XTI. 146 * 147 * soaccept_tpi_multioptions: 148 * When inheriting SOL_SOCKET options from the listener to the accepting 149 * socket send them as a single message for AF_INET{,6}. 150 */ 151 int solisten_tpi_tcp = 0; 152 int soconnect_tpi_udp = 0; 153 int soconnect_tpi_tcp = 0; 154 int soaccept_tpi_tcp = 0; 155 int soaccept_tpi_multioptions = 1; 156 #else /* SOCK_TEST */ 157 #define soconnect_tpi_tcp 0 158 #define soconnect_tpi_udp 0 159 #define solisten_tpi_tcp 0 160 #define soaccept_tpi_tcp 0 161 #define soaccept_tpi_multioptions 1 162 #endif /* SOCK_TEST */ 163 164 #ifdef SOCK_TEST 165 extern int do_useracc; 166 extern clock_t sock_test_timelimit; 167 #endif /* SOCK_TEST */ 168 169 extern uint32_t ucredsize; 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 static int sotpi_unbind(struct sonode *, int); 203 204 /* TPI sockfs sonode operations */ 205 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 206 int); 207 static int sotpi_accept(struct sonode *, int, struct cred *, 208 struct sonode **); 209 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 210 int, struct cred *); 211 static int sotpi_listen(struct sonode *, int, struct cred *); 212 static int sotpi_connect(struct sonode *, struct sockaddr *, 213 socklen_t, int, int, struct cred *); 214 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 215 struct uio *, struct cred *); 216 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 217 struct uio *, struct cred *); 218 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 219 struct cred *, mblk_t **); 220 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 221 struct uio *, void *, t_uscalar_t, int); 222 static int sodgram_direct(struct sonode *, struct sockaddr *, 223 socklen_t, struct uio *, int); 224 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 225 socklen_t *, boolean_t, struct cred *); 226 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 227 socklen_t *, struct cred *); 228 static int sotpi_shutdown(struct sonode *, int, struct cred *); 229 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 230 socklen_t *, int, struct cred *); 231 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 232 socklen_t, struct cred *); 233 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 234 int32_t *); 235 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 236 struct cred *, int32_t *); 237 static int sotpi_poll(struct sonode *, short, int, short *, 238 struct pollhead **); 239 static int sotpi_close(struct sonode *, int, struct cred *); 240 241 static int i_sotpi_info_constructor(sotpi_info_t *); 242 static void i_sotpi_info_destructor(sotpi_info_t *); 243 244 sonodeops_t sotpi_sonodeops = { 245 sotpi_init, /* sop_init */ 246 sotpi_accept, /* sop_accept */ 247 sotpi_bind, /* sop_bind */ 248 sotpi_listen, /* sop_listen */ 249 sotpi_connect, /* sop_connect */ 250 sotpi_recvmsg, /* sop_recvmsg */ 251 sotpi_sendmsg, /* sop_sendmsg */ 252 sotpi_sendmblk, /* sop_sendmblk */ 253 sotpi_getpeername, /* sop_getpeername */ 254 sotpi_getsockname, /* sop_getsockname */ 255 sotpi_shutdown, /* sop_shutdown */ 256 sotpi_getsockopt, /* sop_getsockopt */ 257 sotpi_setsockopt, /* sop_setsockopt */ 258 sotpi_ioctl, /* sop_ioctl */ 259 sotpi_poll, /* sop_poll */ 260 sotpi_close, /* sop_close */ 261 }; 262 263 /* 264 * Return a TPI socket vnode. 265 * 266 * Note that sockets assume that the driver will clone (either itself 267 * or by using the clone driver) i.e. a socket() call will always 268 * result in a new vnode being created. 269 */ 270 271 /* 272 * Common create code for socket and accept. If tso is set the values 273 * from that node is used instead of issuing a T_INFO_REQ. 274 */ 275 276 /* ARGSUSED */ 277 static struct sonode * 278 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 279 int version, int sflags, int *errorp, cred_t *cr) 280 { 281 struct sonode *so; 282 kmem_cache_t *cp; 283 int sfamily = family; 284 285 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 286 287 if (family == AF_NCA) { 288 /* 289 * The request is for an NCA socket so for NL7C use the 290 * INET domain instead and mark NL7C_AF_NCA below. 291 */ 292 family = AF_INET; 293 /* 294 * NL7C is not supported in the non-global zone, 295 * we enforce this restriction here. 296 */ 297 if (getzoneid() != GLOBAL_ZONEID) { 298 *errorp = ENOTSUP; 299 return (NULL); 300 } 301 } 302 303 /* 304 * to be compatible with old tpi socket implementation ignore 305 * sleep flag (sflags) passed in 306 */ 307 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 308 so = kmem_cache_alloc(cp, KM_SLEEP); 309 if (so == NULL) { 310 *errorp = ENOMEM; 311 return (NULL); 312 } 313 314 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 315 sotpi_info_init(so); 316 317 if (sfamily == AF_NCA) { 318 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 319 } 320 321 if (version == SOV_DEFAULT) 322 version = so_default_version; 323 324 so->so_version = (short)version; 325 *errorp = 0; 326 327 return (so); 328 } 329 330 static void 331 sotpi_destroy(struct sonode *so) 332 { 333 kmem_cache_t *cp; 334 struct sockparams *origsp; 335 336 /* 337 * If there is a new dealloc function (ie. smod_destroy_func), 338 * then it should check the correctness of the ops. 339 */ 340 341 ASSERT(so->so_ops == &sotpi_sonodeops); 342 343 origsp = SOTOTPI(so)->sti_orig_sp; 344 345 sotpi_info_fini(so); 346 347 if (so->so_state & SS_FALLBACK_COMP) { 348 /* 349 * A fallback happend, which means that a sotpi_info_t struct 350 * was allocated (as opposed to being allocated from the TPI 351 * sonode cache. Therefore we explicitly free the struct 352 * here. 353 */ 354 sotpi_info_destroy(so); 355 ASSERT(origsp != NULL); 356 357 origsp->sp_smod_info->smod_sock_destroy_func(so); 358 SOCKPARAMS_DEC_REF(origsp); 359 } else { 360 sonode_fini(so); 361 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 362 socktpi_cache; 363 kmem_cache_free(cp, so); 364 } 365 } 366 367 /* ARGSUSED1 */ 368 int 369 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 370 { 371 major_t maj; 372 dev_t newdev; 373 struct vnode *vp; 374 int error = 0; 375 struct stdata *stp; 376 377 sotpi_info_t *sti = SOTOTPI(so); 378 379 dprint(1, ("sotpi_init()\n")); 380 381 /* 382 * over write the sleep flag passed in but that is ok 383 * as tpi socket does not honor sleep flag. 384 */ 385 flags |= FREAD|FWRITE; 386 387 /* 388 * Record in so_flag that it is a clone. 389 */ 390 if (getmajor(sti->sti_dev) == clone_major) 391 so->so_flag |= SOCLONE; 392 393 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 394 (so->so_family == AF_INET || so->so_family == AF_INET6) && 395 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 396 so->so_protocol == IPPROTO_IP)) { 397 /* Tell tcp or udp that it's talking to sockets */ 398 flags |= SO_SOCKSTR; 399 400 /* 401 * Here we indicate to socktpi_open() our attempt to 402 * make direct calls between sockfs and transport. 403 * The final decision is left to socktpi_open(). 404 */ 405 sti->sti_direct = 1; 406 407 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 408 if (so->so_type == SOCK_STREAM && tso != NULL) { 409 if (SOTOTPI(tso)->sti_direct) { 410 /* 411 * Inherit sti_direct from listener and pass 412 * SO_ACCEPTOR open flag to tcp, indicating 413 * that this is an accept fast-path instance. 414 */ 415 flags |= SO_ACCEPTOR; 416 } else { 417 /* 418 * sti_direct is not set on listener, meaning 419 * that the listener has been converted from 420 * a socket to a stream. Ensure that the 421 * acceptor inherits these settings. 422 */ 423 sti->sti_direct = 0; 424 flags &= ~SO_SOCKSTR; 425 } 426 } 427 } 428 429 /* 430 * Tell local transport that it is talking to sockets. 431 */ 432 if (so->so_family == AF_UNIX) { 433 flags |= SO_SOCKSTR; 434 } 435 436 vp = SOTOV(so); 437 newdev = vp->v_rdev; 438 maj = getmajor(newdev); 439 ASSERT(STREAMSTAB(maj)); 440 441 error = stropen(vp, &newdev, flags, cr); 442 443 stp = vp->v_stream; 444 if (error == 0) { 445 if (so->so_flag & SOCLONE) 446 ASSERT(newdev != vp->v_rdev); 447 mutex_enter(&so->so_lock); 448 sti->sti_dev = newdev; 449 vp->v_rdev = newdev; 450 mutex_exit(&so->so_lock); 451 452 if (stp->sd_flag & STRISTTY) { 453 /* 454 * this is a post SVR4 tty driver - a socket can not 455 * be a controlling terminal. Fail the open. 456 */ 457 (void) sotpi_close(so, flags, cr); 458 return (ENOTTY); /* XXX */ 459 } 460 461 ASSERT(stp->sd_wrq != NULL); 462 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 463 464 /* 465 * If caller is interested in doing direct function call 466 * interface to/from transport module, probe the module 467 * directly beneath the streamhead to see if it qualifies. 468 * 469 * We turn off the direct interface when qualifications fail. 470 * In the acceptor case, we simply turn off the sti_direct 471 * flag on the socket. We do the fallback after the accept 472 * has completed, before the new socket is returned to the 473 * application. 474 */ 475 if (sti->sti_direct) { 476 queue_t *tq = stp->sd_wrq->q_next; 477 478 /* 479 * sti_direct is currently supported and tested 480 * only for tcp/udp; this is the main reason to 481 * have the following assertions. 482 */ 483 ASSERT(so->so_family == AF_INET || 484 so->so_family == AF_INET6); 485 ASSERT(so->so_protocol == IPPROTO_UDP || 486 so->so_protocol == IPPROTO_TCP || 487 so->so_protocol == IPPROTO_IP); 488 ASSERT(so->so_type == SOCK_DGRAM || 489 so->so_type == SOCK_STREAM); 490 491 /* 492 * Abort direct call interface if the module directly 493 * underneath the stream head is not defined with the 494 * _D_DIRECT flag. This could happen in the tcp or 495 * udp case, when some other module is autopushed 496 * above it, or for some reasons the expected module 497 * isn't purely D_MP (which is the main requirement). 498 */ 499 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 500 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 501 int rval; 502 503 /* Continue on without direct calls */ 504 sti->sti_direct = 0; 505 506 /* 507 * Cannot issue ioctl on fallback socket since 508 * there is no conn associated with the queue. 509 * The fallback downcall will notify the proto 510 * of the change. 511 */ 512 if (!(flags & SO_ACCEPTOR) && 513 !(flags & SO_FALLBACK)) { 514 if ((error = strioctl(vp, 515 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 516 cr, &rval)) != 0) { 517 (void) sotpi_close(so, flags, 518 cr); 519 return (error); 520 } 521 } 522 } 523 } 524 525 if (flags & SO_FALLBACK) { 526 /* 527 * The stream created does not have a conn. 528 * do stream set up after conn has been assigned 529 */ 530 return (error); 531 } 532 if (error = so_strinit(so, tso)) { 533 (void) sotpi_close(so, flags, cr); 534 return (error); 535 } 536 537 /* Enable sendfile() on AF_UNIX streams */ 538 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) { 539 mutex_enter(&so->so_lock); 540 so->so_mode |= SM_SENDFILESUPP; 541 mutex_exit(&so->so_lock); 542 } 543 544 /* Wildcard */ 545 if (so->so_protocol != so->so_sockparams->sp_protocol) { 546 int protocol = so->so_protocol; 547 /* 548 * Issue SO_PROTOTYPE setsockopt. 549 */ 550 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 551 &protocol, (t_uscalar_t)sizeof (protocol), cr); 552 if (error != 0) { 553 (void) sotpi_close(so, flags, cr); 554 /* 555 * Setsockopt often fails with ENOPROTOOPT but 556 * socket() should fail with 557 * EPROTONOSUPPORT/EPROTOTYPE. 558 */ 559 return (EPROTONOSUPPORT); 560 } 561 } 562 563 } else { 564 /* 565 * While the same socket can not be reopened (unlike specfs) 566 * the stream head sets STREOPENFAIL when the autopush fails. 567 */ 568 if ((stp != NULL) && 569 (stp->sd_flag & STREOPENFAIL)) { 570 /* 571 * Open failed part way through. 572 */ 573 mutex_enter(&stp->sd_lock); 574 stp->sd_flag &= ~STREOPENFAIL; 575 mutex_exit(&stp->sd_lock); 576 (void) sotpi_close(so, flags, cr); 577 return (error); 578 /*NOTREACHED*/ 579 } 580 ASSERT(stp == NULL); 581 } 582 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 583 "sockfs open:maj %d vp %p so %p error %d", 584 maj, vp, so, error); 585 return (error); 586 } 587 588 /* 589 * Bind the socket to an unspecified address in sockfs only. 590 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 591 * required in all cases. 592 */ 593 static void 594 so_automatic_bind(struct sonode *so) 595 { 596 sotpi_info_t *sti = SOTOTPI(so); 597 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 598 599 ASSERT(MUTEX_HELD(&so->so_lock)); 600 ASSERT(!(so->so_state & SS_ISBOUND)); 601 ASSERT(sti->sti_unbind_mp); 602 603 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 604 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 605 sti->sti_laddr_sa->sa_family = so->so_family; 606 so->so_state |= SS_ISBOUND; 607 } 608 609 610 /* 611 * bind the socket. 612 * 613 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 614 * are passed in we allow rebinding. Note that for backwards compatibility 615 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 616 * Thus the rebinding code is currently not executed. 617 * 618 * The constraints for rebinding are: 619 * - it is a SOCK_DGRAM, or 620 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 621 * and no listen() has been done. 622 * This rebinding code was added based on some language in the XNET book 623 * about not returning EINVAL it the protocol allows rebinding. However, 624 * this language is not present in the Posix socket draft. Thus maybe the 625 * rebinding logic should be deleted from the source. 626 * 627 * A null "name" can be used to unbind the socket if: 628 * - it is a SOCK_DGRAM, or 629 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 630 * and no listen() has been done. 631 */ 632 /* ARGSUSED */ 633 static int 634 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 635 socklen_t namelen, int backlog, int flags, struct cred *cr) 636 { 637 struct T_bind_req bind_req; 638 struct T_bind_ack *bind_ack; 639 int error = 0; 640 mblk_t *mp; 641 void *addr; 642 t_uscalar_t addrlen; 643 int unbind_on_err = 1; 644 boolean_t clear_acceptconn_on_err = B_FALSE; 645 boolean_t restore_backlog_on_err = B_FALSE; 646 int save_so_backlog; 647 t_scalar_t PRIM_type = O_T_BIND_REQ; 648 boolean_t tcp_udp_xport; 649 void *nl7c = NULL; 650 sotpi_info_t *sti = SOTOTPI(so); 651 652 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 653 (void *)so, (void *)name, namelen, backlog, flags, 654 pr_state(so->so_state, so->so_mode))); 655 656 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 657 658 if (!(flags & _SOBIND_LOCK_HELD)) { 659 mutex_enter(&so->so_lock); 660 so_lock_single(so); /* Set SOLOCKED */ 661 } else { 662 ASSERT(MUTEX_HELD(&so->so_lock)); 663 ASSERT(so->so_flag & SOLOCKED); 664 } 665 666 /* 667 * Make sure that there is a preallocated unbind_req message 668 * before binding. This message allocated when the socket is 669 * created but it might be have been consumed. 670 */ 671 if (sti->sti_unbind_mp == NULL) { 672 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 673 /* NOTE: holding so_lock while sleeping */ 674 sti->sti_unbind_mp = 675 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 676 cr); 677 } 678 679 if (flags & _SOBIND_REBIND) { 680 /* 681 * Called from solisten after doing an sotpi_unbind() or 682 * potentially without the unbind (latter for AF_INET{,6}). 683 */ 684 ASSERT(name == NULL && namelen == 0); 685 686 if (so->so_family == AF_UNIX) { 687 ASSERT(sti->sti_ux_bound_vp); 688 addr = &sti->sti_ux_laddr; 689 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 690 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 691 "addr 0x%p, vp %p\n", 692 addrlen, 693 (void *)((struct so_ux_addr *)addr)->soua_vp, 694 (void *)sti->sti_ux_bound_vp)); 695 } else { 696 addr = sti->sti_laddr_sa; 697 addrlen = (t_uscalar_t)sti->sti_laddr_len; 698 } 699 } else if (flags & _SOBIND_UNSPEC) { 700 ASSERT(name == NULL && namelen == 0); 701 702 /* 703 * The caller checked SS_ISBOUND but not necessarily 704 * under so_lock 705 */ 706 if (so->so_state & SS_ISBOUND) { 707 /* No error */ 708 goto done; 709 } 710 711 /* Set an initial local address */ 712 switch (so->so_family) { 713 case AF_UNIX: 714 /* 715 * Use an address with same size as struct sockaddr 716 * just like BSD. 717 */ 718 sti->sti_laddr_len = 719 (socklen_t)sizeof (struct sockaddr); 720 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 721 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 722 sti->sti_laddr_sa->sa_family = so->so_family; 723 724 /* 725 * Pass down an address with the implicit bind 726 * magic number and the rest all zeros. 727 * The transport will return a unique address. 728 */ 729 sti->sti_ux_laddr.soua_vp = NULL; 730 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 731 addr = &sti->sti_ux_laddr; 732 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 733 break; 734 735 case AF_INET: 736 case AF_INET6: 737 /* 738 * An unspecified bind in TPI has a NULL address. 739 * Set the address in sockfs to have the sa_family. 740 */ 741 sti->sti_laddr_len = (so->so_family == AF_INET) ? 742 (socklen_t)sizeof (sin_t) : 743 (socklen_t)sizeof (sin6_t); 744 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 745 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 746 sti->sti_laddr_sa->sa_family = so->so_family; 747 addr = NULL; 748 addrlen = 0; 749 break; 750 751 default: 752 /* 753 * An unspecified bind in TPI has a NULL address. 754 * Set the address in sockfs to be zero length. 755 * 756 * Can not assume there is a sa_family for all 757 * protocol families. For example, AF_X25 does not 758 * have a family field. 759 */ 760 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 761 sti->sti_laddr_len = 0; /* XXX correct? */ 762 addr = NULL; 763 addrlen = 0; 764 break; 765 } 766 767 } else { 768 if (so->so_state & SS_ISBOUND) { 769 /* 770 * If it is ok to rebind the socket, first unbind 771 * with the transport. A rebind to the NULL address 772 * is interpreted as an unbind. 773 * Note that a bind to NULL in BSD does unbind the 774 * socket but it fails with EINVAL. 775 * Note that regular sockets set SOV_SOCKBSD i.e. 776 * _SOBIND_SOCKBSD gets set here hence no type of 777 * socket does currently allow rebinding. 778 * 779 * If the name is NULL just do an unbind. 780 */ 781 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 782 name != NULL) { 783 error = EINVAL; 784 unbind_on_err = 0; 785 eprintsoline(so, error); 786 goto done; 787 } 788 if ((so->so_mode & SM_CONNREQUIRED) && 789 (so->so_state & SS_CANTREBIND)) { 790 error = EINVAL; 791 unbind_on_err = 0; 792 eprintsoline(so, error); 793 goto done; 794 } 795 error = sotpi_unbind(so, 0); 796 if (error) { 797 eprintsoline(so, error); 798 goto done; 799 } 800 ASSERT(!(so->so_state & SS_ISBOUND)); 801 if (name == NULL) { 802 so->so_state &= 803 ~(SS_ISCONNECTED|SS_ISCONNECTING); 804 goto done; 805 } 806 } 807 808 /* X/Open requires this check */ 809 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 810 if (xnet_check_print) { 811 printf("sockfs: X/Open bind state check " 812 "caused EINVAL\n"); 813 } 814 error = EINVAL; 815 goto done; 816 } 817 818 switch (so->so_family) { 819 case AF_UNIX: 820 /* 821 * All AF_UNIX addresses are nul terminated 822 * when copied (copyin_name) in so the minimum 823 * length is 3 bytes. 824 */ 825 if (name == NULL || 826 (ssize_t)namelen <= sizeof (short) + 1) { 827 error = EISDIR; 828 eprintsoline(so, error); 829 goto done; 830 } 831 /* 832 * Verify so_family matches the bound family. 833 * BSD does not check this for AF_UNIX resulting 834 * in funny mknods. 835 */ 836 if (name->sa_family != so->so_family) { 837 error = EAFNOSUPPORT; 838 goto done; 839 } 840 break; 841 case AF_INET: 842 if (name == NULL) { 843 error = EINVAL; 844 eprintsoline(so, error); 845 goto done; 846 } 847 if ((size_t)namelen != sizeof (sin_t)) { 848 error = name->sa_family != so->so_family ? 849 EAFNOSUPPORT : EINVAL; 850 eprintsoline(so, error); 851 goto done; 852 } 853 if ((flags & _SOBIND_XPG4_2) && 854 (name->sa_family != so->so_family)) { 855 /* 856 * This check has to be made for X/Open 857 * sockets however application failures have 858 * been observed when it is applied to 859 * all sockets. 860 */ 861 error = EAFNOSUPPORT; 862 eprintsoline(so, error); 863 goto done; 864 } 865 /* 866 * Force a zero sa_family to match so_family. 867 * 868 * Some programs like inetd(1M) don't set the 869 * family field. Other programs leave 870 * sin_family set to garbage - SunOS 4.X does 871 * not check the family field on a bind. 872 * We use the family field that 873 * was passed in to the socket() call. 874 */ 875 name->sa_family = so->so_family; 876 break; 877 878 case AF_INET6: { 879 #ifdef DEBUG 880 sin6_t *sin6 = (sin6_t *)name; 881 #endif /* DEBUG */ 882 883 if (name == NULL) { 884 error = EINVAL; 885 eprintsoline(so, error); 886 goto done; 887 } 888 if ((size_t)namelen != sizeof (sin6_t)) { 889 error = name->sa_family != so->so_family ? 890 EAFNOSUPPORT : EINVAL; 891 eprintsoline(so, error); 892 goto done; 893 } 894 if (name->sa_family != so->so_family) { 895 /* 896 * With IPv6 we require the family to match 897 * unlike in IPv4. 898 */ 899 error = EAFNOSUPPORT; 900 eprintsoline(so, error); 901 goto done; 902 } 903 #ifdef DEBUG 904 /* 905 * Verify that apps don't forget to clear 906 * sin6_scope_id etc 907 */ 908 if (sin6->sin6_scope_id != 0 && 909 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 910 zcmn_err(getzoneid(), CE_WARN, 911 "bind with uninitialized sin6_scope_id " 912 "(%d) on socket. Pid = %d\n", 913 (int)sin6->sin6_scope_id, 914 (int)curproc->p_pid); 915 } 916 if (sin6->__sin6_src_id != 0) { 917 zcmn_err(getzoneid(), CE_WARN, 918 "bind with uninitialized __sin6_src_id " 919 "(%d) on socket. Pid = %d\n", 920 (int)sin6->__sin6_src_id, 921 (int)curproc->p_pid); 922 } 923 #endif /* DEBUG */ 924 break; 925 } 926 default: 927 /* 928 * Don't do any length or sa_family check to allow 929 * non-sockaddr style addresses. 930 */ 931 if (name == NULL) { 932 error = EINVAL; 933 eprintsoline(so, error); 934 goto done; 935 } 936 break; 937 } 938 939 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 940 error = ENAMETOOLONG; 941 eprintsoline(so, error); 942 goto done; 943 } 944 /* 945 * Save local address. 946 */ 947 sti->sti_laddr_len = (socklen_t)namelen; 948 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 949 bcopy(name, sti->sti_laddr_sa, namelen); 950 951 addr = sti->sti_laddr_sa; 952 addrlen = (t_uscalar_t)sti->sti_laddr_len; 953 switch (so->so_family) { 954 case AF_INET6: 955 case AF_INET: 956 break; 957 case AF_UNIX: { 958 struct sockaddr_un *soun = 959 (struct sockaddr_un *)sti->sti_laddr_sa; 960 struct vnode *vp, *rvp; 961 struct vattr vattr; 962 963 ASSERT(sti->sti_ux_bound_vp == NULL); 964 /* 965 * Create vnode for the specified path name. 966 * Keep vnode held with a reference in sti_ux_bound_vp. 967 * Use the vnode pointer as the address used in the 968 * bind with the transport. 969 * 970 * Use the same mode as in BSD. In particular this does 971 * not observe the umask. 972 */ 973 /* MAXPATHLEN + soun_family + nul termination */ 974 if (sti->sti_laddr_len > 975 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 976 error = ENAMETOOLONG; 977 eprintsoline(so, error); 978 goto done; 979 } 980 vattr.va_type = VSOCK; 981 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 982 vattr.va_mask = AT_TYPE|AT_MODE; 983 /* NOTE: holding so_lock */ 984 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 985 EXCL, 0, &vp, CRMKNOD, 0, 0); 986 if (error) { 987 if (error == EEXIST) 988 error = EADDRINUSE; 989 eprintsoline(so, error); 990 goto done; 991 } 992 /* 993 * Establish pointer from the underlying filesystem 994 * vnode to the socket node. 995 * sti_ux_bound_vp and v_stream->sd_vnode form the 996 * cross-linkage between the underlying filesystem 997 * node and the socket node. 998 */ 999 1000 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 1001 VN_HOLD(rvp); 1002 VN_RELE(vp); 1003 vp = rvp; 1004 } 1005 1006 ASSERT(SOTOV(so)->v_stream); 1007 mutex_enter(&vp->v_lock); 1008 vp->v_stream = SOTOV(so)->v_stream; 1009 sti->sti_ux_bound_vp = vp; 1010 mutex_exit(&vp->v_lock); 1011 1012 /* 1013 * Use the vnode pointer value as a unique address 1014 * (together with the magic number to avoid conflicts 1015 * with implicit binds) in the transport provider. 1016 */ 1017 sti->sti_ux_laddr.soua_vp = 1018 (void *)sti->sti_ux_bound_vp; 1019 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1020 addr = &sti->sti_ux_laddr; 1021 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1022 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1023 addrlen, 1024 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1025 break; 1026 } 1027 } /* end switch (so->so_family) */ 1028 } 1029 1030 /* 1031 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1032 * the transport can start passing up T_CONN_IND messages 1033 * as soon as it receives the bind req and strsock_proto() 1034 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1035 */ 1036 if (flags & _SOBIND_LISTEN) { 1037 if ((so->so_state & SS_ACCEPTCONN) == 0) 1038 clear_acceptconn_on_err = B_TRUE; 1039 save_so_backlog = so->so_backlog; 1040 restore_backlog_on_err = B_TRUE; 1041 so->so_state |= SS_ACCEPTCONN; 1042 so->so_backlog = backlog; 1043 } 1044 1045 /* 1046 * If NL7C addr(s) have been configured check for addr/port match, 1047 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1048 * 1049 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1050 * family sockets only. If match mark as such. 1051 */ 1052 if (nl7c_enabled && ((addr != NULL && 1053 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1054 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1055 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1056 /* 1057 * NL7C is not supported in non-global zones, 1058 * we enforce this restriction here. 1059 */ 1060 if (so->so_zoneid == GLOBAL_ZONEID) { 1061 /* An NL7C socket, mark it */ 1062 sti->sti_nl7c_flags |= NL7C_ENABLED; 1063 if (nl7c == NULL) { 1064 /* 1065 * Was an AF_NCA bind() so add it to the 1066 * addr list for reporting purposes. 1067 */ 1068 nl7c = nl7c_add_addr(addr, addrlen); 1069 } 1070 } else 1071 nl7c = NULL; 1072 } 1073 1074 /* 1075 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1076 * for other transports we will send in a O_T_BIND_REQ. 1077 */ 1078 if (tcp_udp_xport && 1079 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1080 PRIM_type = T_BIND_REQ; 1081 1082 bind_req.PRIM_type = PRIM_type; 1083 bind_req.ADDR_length = addrlen; 1084 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1085 bind_req.CONIND_number = backlog; 1086 /* NOTE: holding so_lock while sleeping */ 1087 mp = soallocproto2(&bind_req, sizeof (bind_req), 1088 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1089 sti->sti_laddr_valid = 0; 1090 1091 /* Done using sti_laddr_sa - can drop the lock */ 1092 mutex_exit(&so->so_lock); 1093 1094 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1095 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1096 if (error) { 1097 eprintsoline(so, error); 1098 mutex_enter(&so->so_lock); 1099 goto done; 1100 } 1101 1102 mutex_enter(&so->so_lock); 1103 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1104 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1105 if (error) { 1106 eprintsoline(so, error); 1107 goto done; 1108 } 1109 ASSERT(mp); 1110 /* 1111 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1112 * strsock_proto while the lock was dropped above, the bind 1113 * is allowed to complete. 1114 */ 1115 1116 /* Mark as bound. This will be undone if we detect errors below. */ 1117 if (flags & _SOBIND_NOXLATE) { 1118 ASSERT(so->so_family == AF_UNIX); 1119 sti->sti_faddr_noxlate = 1; 1120 } 1121 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1122 so->so_state |= SS_ISBOUND; 1123 ASSERT(sti->sti_unbind_mp); 1124 1125 /* note that we've already set SS_ACCEPTCONN above */ 1126 1127 /* 1128 * Recompute addrlen - an unspecied bind sent down an 1129 * address of length zero but we expect the appropriate length 1130 * in return. 1131 */ 1132 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1133 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1134 1135 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1136 /* 1137 * The alignment restriction is really too strict but 1138 * we want enough alignment to inspect the fields of 1139 * a sockaddr_in. 1140 */ 1141 addr = sogetoff(mp, bind_ack->ADDR_offset, 1142 bind_ack->ADDR_length, 1143 __TPI_ALIGN_SIZE); 1144 if (addr == NULL) { 1145 freemsg(mp); 1146 error = EPROTO; 1147 eprintsoline(so, error); 1148 goto done; 1149 } 1150 if (!(flags & _SOBIND_UNSPEC)) { 1151 /* 1152 * Verify that the transport didn't return something we 1153 * did not want e.g. an address other than what we asked for. 1154 * 1155 * NOTE: These checks would go away if/when we switch to 1156 * using the new TPI (in which the transport would fail 1157 * the request instead of assigning a different address). 1158 * 1159 * NOTE2: For protocols that we don't know (i.e. any 1160 * other than AF_INET6, AF_INET and AF_UNIX), we 1161 * cannot know if the transport should be expected to 1162 * return the same address as that requested. 1163 * 1164 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1165 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1166 * 1167 * For example, in the case of netatalk it may be 1168 * inappropriate for the transport to return the 1169 * requested address (as it may have allocated a local 1170 * port number in behaviour similar to that of an 1171 * AF_INET bind request with a port number of zero). 1172 * 1173 * Given the definition of O_T_BIND_REQ, where the 1174 * transport may bind to an address other than the 1175 * requested address, it's not possible to determine 1176 * whether a returned address that differs from the 1177 * requested address is a reason to fail (because the 1178 * requested address was not available) or succeed 1179 * (because the transport allocated an appropriate 1180 * address and/or port). 1181 * 1182 * sockfs currently requires that the transport return 1183 * the requested address in the T_BIND_ACK, unless 1184 * there is code here to allow for any discrepancy. 1185 * Such code exists for AF_INET and AF_INET6. 1186 * 1187 * Netatalk chooses to return the requested address 1188 * rather than the (correct) allocated address. This 1189 * means that netatalk violates the TPI specification 1190 * (and would not function correctly if used from a 1191 * TLI application), but it does mean that it works 1192 * with sockfs. 1193 * 1194 * As noted above, using the newer XTI bind primitive 1195 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1196 * allow sockfs to be more sure about whether or not 1197 * the bind request had succeeded (as transports are 1198 * not permitted to bind to a different address than 1199 * that requested - they must return failure). 1200 * Unfortunately, support for T_BIND_REQ may not be 1201 * present in all transport implementations (netatalk, 1202 * for example, doesn't have it), making the 1203 * transition difficult. 1204 */ 1205 if (bind_ack->ADDR_length != addrlen) { 1206 /* Assumes that the requested address was in use */ 1207 freemsg(mp); 1208 error = EADDRINUSE; 1209 eprintsoline(so, error); 1210 goto done; 1211 } 1212 1213 switch (so->so_family) { 1214 case AF_INET6: 1215 case AF_INET: { 1216 sin_t *rname, *aname; 1217 1218 rname = (sin_t *)addr; 1219 aname = (sin_t *)sti->sti_laddr_sa; 1220 1221 /* 1222 * Take advantage of the alignment 1223 * of sin_port and sin6_port which fall 1224 * in the same place in their data structures. 1225 * Just use sin_port for either address family. 1226 * 1227 * This may become a problem if (heaven forbid) 1228 * there's a separate ipv6port_reserved... :-P 1229 * 1230 * Binding to port 0 has the semantics of letting 1231 * the transport bind to any port. 1232 * 1233 * If the transport is TCP or UDP since we had sent 1234 * a T_BIND_REQ we would not get a port other than 1235 * what we asked for. 1236 */ 1237 if (tcp_udp_xport) { 1238 /* 1239 * Pick up the new port number if we bound to 1240 * port 0. 1241 */ 1242 if (aname->sin_port == 0) 1243 aname->sin_port = rname->sin_port; 1244 sti->sti_laddr_valid = 1; 1245 break; 1246 } 1247 if (aname->sin_port != 0 && 1248 aname->sin_port != rname->sin_port) { 1249 freemsg(mp); 1250 error = EADDRINUSE; 1251 eprintsoline(so, error); 1252 goto done; 1253 } 1254 /* 1255 * Pick up the new port number if we bound to port 0. 1256 */ 1257 aname->sin_port = rname->sin_port; 1258 1259 /* 1260 * Unfortunately, addresses aren't _quite_ the same. 1261 */ 1262 if (so->so_family == AF_INET) { 1263 if (aname->sin_addr.s_addr != 1264 rname->sin_addr.s_addr) { 1265 freemsg(mp); 1266 error = EADDRNOTAVAIL; 1267 eprintsoline(so, error); 1268 goto done; 1269 } 1270 } else { 1271 sin6_t *rname6 = (sin6_t *)rname; 1272 sin6_t *aname6 = (sin6_t *)aname; 1273 1274 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1275 &rname6->sin6_addr)) { 1276 freemsg(mp); 1277 error = EADDRNOTAVAIL; 1278 eprintsoline(so, error); 1279 goto done; 1280 } 1281 } 1282 break; 1283 } 1284 case AF_UNIX: 1285 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1286 freemsg(mp); 1287 error = EADDRINUSE; 1288 eprintsoline(so, error); 1289 eprintso(so, 1290 ("addrlen %d, addr 0x%x, vp %p\n", 1291 addrlen, *((int *)addr), 1292 (void *)sti->sti_ux_bound_vp)); 1293 goto done; 1294 } 1295 sti->sti_laddr_valid = 1; 1296 break; 1297 default: 1298 /* 1299 * NOTE: This assumes that addresses can be 1300 * byte-compared for equivalence. 1301 */ 1302 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1303 freemsg(mp); 1304 error = EADDRINUSE; 1305 eprintsoline(so, error); 1306 goto done; 1307 } 1308 /* 1309 * Don't mark sti_laddr_valid, as we cannot be 1310 * sure that the returned address is the real 1311 * bound address when talking to an unknown 1312 * transport. 1313 */ 1314 break; 1315 } 1316 } else { 1317 /* 1318 * Save for returned address for getsockname. 1319 * Needed for unspecific bind unless transport supports 1320 * the TI_GETMYNAME ioctl. 1321 * Do this for AF_INET{,6} even though they do, as 1322 * caching info here is much better performance than 1323 * a TPI/STREAMS trip to the transport for getsockname. 1324 * Any which can't for some reason _must_ _not_ set 1325 * sti_laddr_valid here for the caching version of 1326 * getsockname to not break; 1327 */ 1328 switch (so->so_family) { 1329 case AF_UNIX: 1330 /* 1331 * Record the address bound with the transport 1332 * for use by socketpair. 1333 */ 1334 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1335 sti->sti_laddr_valid = 1; 1336 break; 1337 case AF_INET: 1338 case AF_INET6: 1339 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1340 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1341 sti->sti_laddr_valid = 1; 1342 break; 1343 default: 1344 /* 1345 * Don't mark sti_laddr_valid, as we cannot be 1346 * sure that the returned address is the real 1347 * bound address when talking to an unknown 1348 * transport. 1349 */ 1350 break; 1351 } 1352 } 1353 1354 if (nl7c != NULL) { 1355 /* Register listen()er sonode pointer with NL7C */ 1356 nl7c_listener_addr(nl7c, so); 1357 } 1358 1359 freemsg(mp); 1360 1361 done: 1362 if (error) { 1363 /* reset state & backlog to values held on entry */ 1364 if (clear_acceptconn_on_err == B_TRUE) 1365 so->so_state &= ~SS_ACCEPTCONN; 1366 if (restore_backlog_on_err == B_TRUE) 1367 so->so_backlog = save_so_backlog; 1368 1369 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1370 int err; 1371 1372 err = sotpi_unbind(so, 0); 1373 /* LINTED - statement has no consequent: if */ 1374 if (err) { 1375 eprintsoline(so, error); 1376 } else { 1377 ASSERT(!(so->so_state & SS_ISBOUND)); 1378 } 1379 } 1380 } 1381 if (!(flags & _SOBIND_LOCK_HELD)) { 1382 so_unlock_single(so, SOLOCKED); 1383 mutex_exit(&so->so_lock); 1384 } else { 1385 ASSERT(MUTEX_HELD(&so->so_lock)); 1386 ASSERT(so->so_flag & SOLOCKED); 1387 } 1388 return (error); 1389 } 1390 1391 /* bind the socket */ 1392 static int 1393 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1394 int flags, struct cred *cr) 1395 { 1396 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1397 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1398 1399 flags &= ~_SOBIND_SOCKETPAIR; 1400 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1401 } 1402 1403 /* 1404 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1405 * address, or when listen needs to unbind and bind. 1406 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1407 * so that a sobind can pick them up. 1408 */ 1409 static int 1410 sotpi_unbind(struct sonode *so, int flags) 1411 { 1412 struct T_unbind_req unbind_req; 1413 int error = 0; 1414 mblk_t *mp; 1415 sotpi_info_t *sti = SOTOTPI(so); 1416 1417 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1418 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1419 1420 ASSERT(MUTEX_HELD(&so->so_lock)); 1421 ASSERT(so->so_flag & SOLOCKED); 1422 1423 if (!(so->so_state & SS_ISBOUND)) { 1424 error = EINVAL; 1425 eprintsoline(so, error); 1426 goto done; 1427 } 1428 1429 mutex_exit(&so->so_lock); 1430 1431 /* 1432 * Flush the read and write side (except stream head read queue) 1433 * and send down T_UNBIND_REQ. 1434 */ 1435 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1436 1437 unbind_req.PRIM_type = T_UNBIND_REQ; 1438 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1439 0, _ALLOC_SLEEP, CRED()); 1440 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1441 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1442 mutex_enter(&so->so_lock); 1443 if (error) { 1444 eprintsoline(so, error); 1445 goto done; 1446 } 1447 1448 error = sowaitokack(so, T_UNBIND_REQ); 1449 if (error) { 1450 eprintsoline(so, error); 1451 goto done; 1452 } 1453 1454 /* 1455 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1456 * strsock_proto while the lock was dropped above, the unbind 1457 * is allowed to complete. 1458 */ 1459 if (!(flags & _SOUNBIND_REBIND)) { 1460 /* 1461 * Clear out bound address. 1462 */ 1463 vnode_t *vp; 1464 1465 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1466 sti->sti_ux_bound_vp = NULL; 1467 vn_rele_stream(vp); 1468 } 1469 /* Clear out address */ 1470 sti->sti_laddr_len = 0; 1471 } 1472 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1473 sti->sti_laddr_valid = 0; 1474 1475 done: 1476 1477 /* If the caller held the lock don't release it here */ 1478 ASSERT(MUTEX_HELD(&so->so_lock)); 1479 ASSERT(so->so_flag & SOLOCKED); 1480 1481 return (error); 1482 } 1483 1484 /* 1485 * listen on the socket. 1486 * For TPI conforming transports this has to first unbind with the transport 1487 * and then bind again using the new backlog. 1488 */ 1489 /* ARGSUSED */ 1490 int 1491 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1492 { 1493 int error = 0; 1494 sotpi_info_t *sti = SOTOTPI(so); 1495 1496 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1497 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1498 1499 if (sti->sti_serv_type == T_CLTS) 1500 return (EOPNOTSUPP); 1501 1502 /* 1503 * If the socket is ready to accept connections already, then 1504 * return without doing anything. This avoids a problem where 1505 * a second listen() call fails if a connection is pending and 1506 * leaves the socket unbound. Only when we are not unbinding 1507 * with the transport can we safely increase the backlog. 1508 */ 1509 if (so->so_state & SS_ACCEPTCONN && 1510 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1511 /*CONSTCOND*/ 1512 !solisten_tpi_tcp)) 1513 return (0); 1514 1515 if (so->so_state & SS_ISCONNECTED) 1516 return (EINVAL); 1517 1518 mutex_enter(&so->so_lock); 1519 so_lock_single(so); /* Set SOLOCKED */ 1520 1521 /* 1522 * If the listen doesn't change the backlog we do nothing. 1523 * This avoids an EPROTO error from the transport. 1524 */ 1525 if ((so->so_state & SS_ACCEPTCONN) && 1526 so->so_backlog == backlog) 1527 goto done; 1528 1529 if (!(so->so_state & SS_ISBOUND)) { 1530 /* 1531 * Must have been explicitly bound in the UNIX domain. 1532 */ 1533 if (so->so_family == AF_UNIX) { 1534 error = EINVAL; 1535 goto done; 1536 } 1537 error = sotpi_bindlisten(so, NULL, 0, backlog, 1538 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1539 } else if (backlog > 0) { 1540 /* 1541 * AF_INET{,6} hack to avoid losing the port. 1542 * Assumes that all AF_INET{,6} transports can handle a 1543 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1544 * has already bound thus it is possible to avoid the unbind. 1545 */ 1546 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1547 /*CONSTCOND*/ 1548 !solisten_tpi_tcp)) { 1549 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1550 if (error) 1551 goto done; 1552 } 1553 error = sotpi_bindlisten(so, NULL, 0, backlog, 1554 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1555 } else { 1556 so->so_state |= SS_ACCEPTCONN; 1557 so->so_backlog = backlog; 1558 } 1559 if (error) 1560 goto done; 1561 ASSERT(so->so_state & SS_ACCEPTCONN); 1562 done: 1563 so_unlock_single(so, SOLOCKED); 1564 mutex_exit(&so->so_lock); 1565 return (error); 1566 } 1567 1568 /* 1569 * Disconnect either a specified seqno or all (-1). 1570 * The former is used on listening sockets only. 1571 * 1572 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1573 * the current use of sodisconnect(seqno == -1) is only for shutdown 1574 * so there is no point (and potentially incorrect) to unbind. 1575 */ 1576 static int 1577 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1578 { 1579 struct T_discon_req discon_req; 1580 int error = 0; 1581 mblk_t *mp; 1582 1583 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1584 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1585 1586 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1587 mutex_enter(&so->so_lock); 1588 so_lock_single(so); /* Set SOLOCKED */ 1589 } else { 1590 ASSERT(MUTEX_HELD(&so->so_lock)); 1591 ASSERT(so->so_flag & SOLOCKED); 1592 } 1593 1594 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1595 error = EINVAL; 1596 eprintsoline(so, error); 1597 goto done; 1598 } 1599 1600 mutex_exit(&so->so_lock); 1601 /* 1602 * Flush the write side (unless this is a listener) 1603 * and then send down a T_DISCON_REQ. 1604 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1605 * and other messages.) 1606 */ 1607 if (!(so->so_state & SS_ACCEPTCONN)) 1608 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1609 1610 discon_req.PRIM_type = T_DISCON_REQ; 1611 discon_req.SEQ_number = seqno; 1612 mp = soallocproto1(&discon_req, sizeof (discon_req), 1613 0, _ALLOC_SLEEP, CRED()); 1614 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1615 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1616 mutex_enter(&so->so_lock); 1617 if (error) { 1618 eprintsoline(so, error); 1619 goto done; 1620 } 1621 1622 error = sowaitokack(so, T_DISCON_REQ); 1623 if (error) { 1624 eprintsoline(so, error); 1625 goto done; 1626 } 1627 /* 1628 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1629 * strsock_proto while the lock was dropped above, the disconnect 1630 * is allowed to complete. However, it is not possible to 1631 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1632 */ 1633 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1634 SOTOTPI(so)->sti_laddr_valid = 0; 1635 SOTOTPI(so)->sti_faddr_valid = 0; 1636 done: 1637 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1638 so_unlock_single(so, SOLOCKED); 1639 mutex_exit(&so->so_lock); 1640 } else { 1641 /* If the caller held the lock don't release it here */ 1642 ASSERT(MUTEX_HELD(&so->so_lock)); 1643 ASSERT(so->so_flag & SOLOCKED); 1644 } 1645 return (error); 1646 } 1647 1648 /* ARGSUSED */ 1649 int 1650 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1651 struct sonode **nsop) 1652 { 1653 struct T_conn_ind *conn_ind; 1654 struct T_conn_res *conn_res; 1655 int error = 0; 1656 mblk_t *mp, *ack_mp; 1657 struct sonode *nso; 1658 vnode_t *nvp; 1659 void *src; 1660 t_uscalar_t srclen; 1661 void *opt; 1662 t_uscalar_t optlen; 1663 t_scalar_t PRIM_type; 1664 t_scalar_t SEQ_number; 1665 size_t sinlen; 1666 sotpi_info_t *sti = SOTOTPI(so); 1667 sotpi_info_t *nsti; 1668 1669 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1670 (void *)so, fflag, (void *)nsop, 1671 pr_state(so->so_state, so->so_mode))); 1672 1673 /* 1674 * Defer single-threading the accepting socket until 1675 * the T_CONN_IND has been received and parsed and the 1676 * new sonode has been opened. 1677 */ 1678 1679 /* Check that we are not already connected */ 1680 if ((so->so_state & SS_ACCEPTCONN) == 0) 1681 goto conn_bad; 1682 again: 1683 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1684 goto e_bad; 1685 1686 ASSERT(mp != NULL); 1687 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1688 1689 /* 1690 * Save SEQ_number for error paths. 1691 */ 1692 SEQ_number = conn_ind->SEQ_number; 1693 1694 srclen = conn_ind->SRC_length; 1695 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1696 if (src == NULL) { 1697 error = EPROTO; 1698 freemsg(mp); 1699 eprintsoline(so, error); 1700 goto disconnect_unlocked; 1701 } 1702 optlen = conn_ind->OPT_length; 1703 switch (so->so_family) { 1704 case AF_INET: 1705 case AF_INET6: 1706 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1707 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1708 &opt, conn_ind->OPT_length); 1709 } else { 1710 /* 1711 * The transport (in this case TCP) hasn't sent up 1712 * a pointer to an instance for the accept fast-path. 1713 * Disable fast-path completely because the call to 1714 * sotpi_create() below would otherwise create an 1715 * incomplete TCP instance, which would lead to 1716 * problems when sockfs sends a normal T_CONN_RES 1717 * message down the new stream. 1718 */ 1719 if (sti->sti_direct) { 1720 int rval; 1721 /* 1722 * For consistency we inform tcp to disable 1723 * direct interface on the listener, though 1724 * we can certainly live without doing this 1725 * because no data will ever travel upstream 1726 * on the listening socket. 1727 */ 1728 sti->sti_direct = 0; 1729 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1730 0, 0, K_TO_K, cr, &rval); 1731 } 1732 opt = NULL; 1733 optlen = 0; 1734 } 1735 break; 1736 case AF_UNIX: 1737 default: 1738 if (optlen != 0) { 1739 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1740 __TPI_ALIGN_SIZE); 1741 if (opt == NULL) { 1742 error = EPROTO; 1743 freemsg(mp); 1744 eprintsoline(so, error); 1745 goto disconnect_unlocked; 1746 } 1747 } 1748 if (so->so_family == AF_UNIX) { 1749 if (!sti->sti_faddr_noxlate) { 1750 src = NULL; 1751 srclen = 0; 1752 } 1753 /* Extract src address from options */ 1754 if (optlen != 0) 1755 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1756 } 1757 break; 1758 } 1759 1760 /* 1761 * Create the new socket. 1762 */ 1763 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1764 if (nso == NULL) { 1765 ASSERT(error != 0); 1766 /* 1767 * Accept can not fail with ENOBUFS. sotpi_create 1768 * sleeps waiting for memory until a signal is caught 1769 * so return EINTR. 1770 */ 1771 freemsg(mp); 1772 if (error == ENOBUFS) 1773 error = EINTR; 1774 goto e_disc_unl; 1775 } 1776 nvp = SOTOV(nso); 1777 nsti = SOTOTPI(nso); 1778 1779 #ifdef DEBUG 1780 /* 1781 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1782 * it's inherited early to allow debugging of the accept code itself. 1783 */ 1784 nso->so_options |= so->so_options & SO_DEBUG; 1785 #endif /* DEBUG */ 1786 1787 /* 1788 * Save the SRC address from the T_CONN_IND 1789 * for getpeername to work on AF_UNIX and on transports that do not 1790 * support TI_GETPEERNAME. 1791 * 1792 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1793 * copyin_name(). 1794 */ 1795 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1796 error = EINVAL; 1797 freemsg(mp); 1798 eprintsoline(so, error); 1799 goto disconnect_vp_unlocked; 1800 } 1801 nsti->sti_faddr_len = (socklen_t)srclen; 1802 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1803 bcopy(src, nsti->sti_faddr_sa, srclen); 1804 nsti->sti_faddr_valid = 1; 1805 1806 /* 1807 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1808 */ 1809 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1810 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1811 cred_t *cr; 1812 pid_t cpid; 1813 1814 cr = msg_getcred(mp, &cpid); 1815 if (cr != NULL) { 1816 crhold(cr); 1817 nso->so_peercred = cr; 1818 nso->so_cpid = cpid; 1819 } 1820 freemsg(mp); 1821 1822 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1823 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1824 if (mp == NULL) { 1825 /* 1826 * Accept can not fail with ENOBUFS. 1827 * A signal was caught so return EINTR. 1828 */ 1829 error = EINTR; 1830 eprintsoline(so, error); 1831 goto disconnect_vp_unlocked; 1832 } 1833 conn_res = (struct T_conn_res *)mp->b_rptr; 1834 } else { 1835 /* 1836 * For efficency reasons we use msg_extractcred; no crhold 1837 * needed since db_credp is cleared (i.e., we move the cred 1838 * from the message to so_peercred. 1839 */ 1840 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1841 1842 mp->b_rptr = DB_BASE(mp); 1843 conn_res = (struct T_conn_res *)mp->b_rptr; 1844 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1845 1846 mblk_setcred(mp, cr, curproc->p_pid); 1847 } 1848 1849 /* 1850 * New socket must be bound at least in sockfs and, except for AF_INET, 1851 * (or AF_INET6) it also has to be bound in the transport provider. 1852 * We set the local address in the sonode from the T_OK_ACK of the 1853 * T_CONN_RES. For this reason the address we bind to here isn't 1854 * important. 1855 */ 1856 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1857 /*CONSTCOND*/ 1858 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1859 /* 1860 * Optimization for AF_INET{,6} transports 1861 * that can handle a T_CONN_RES without being bound. 1862 */ 1863 mutex_enter(&nso->so_lock); 1864 so_automatic_bind(nso); 1865 mutex_exit(&nso->so_lock); 1866 } else { 1867 /* Perform NULL bind with the transport provider. */ 1868 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1869 cr)) != 0) { 1870 ASSERT(error != ENOBUFS); 1871 freemsg(mp); 1872 eprintsoline(nso, error); 1873 goto disconnect_vp_unlocked; 1874 } 1875 } 1876 1877 /* 1878 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1879 * so that any data arriving on the new socket will cause the 1880 * appropriate signals to be delivered for the new socket. 1881 * 1882 * No other thread (except strsock_proto and strsock_misc) 1883 * can access the new socket thus we relax the locking. 1884 */ 1885 nso->so_pgrp = so->so_pgrp; 1886 nso->so_state |= so->so_state & SS_ASYNC; 1887 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1888 1889 if (nso->so_pgrp != 0) { 1890 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1891 eprintsoline(nso, error); 1892 error = 0; 1893 nso->so_pgrp = 0; 1894 } 1895 } 1896 1897 /* 1898 * Make note of the socket level options. TCP and IP level options 1899 * are already inherited. We could do all this after accept is 1900 * successful but doing it here simplifies code and no harm done 1901 * for error case. 1902 */ 1903 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1904 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1905 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1906 nso->so_sndbuf = so->so_sndbuf; 1907 nso->so_rcvbuf = so->so_rcvbuf; 1908 if (nso->so_options & SO_LINGER) 1909 nso->so_linger = so->so_linger; 1910 1911 /* 1912 * Note that the following sti_direct code path should be 1913 * removed once we are confident that the direct sockets 1914 * do not result in any degradation. 1915 */ 1916 if (sti->sti_direct) { 1917 1918 ASSERT(opt != NULL); 1919 1920 conn_res->OPT_length = optlen; 1921 conn_res->OPT_offset = MBLKL(mp); 1922 bcopy(&opt, mp->b_wptr, optlen); 1923 mp->b_wptr += optlen; 1924 conn_res->PRIM_type = T_CONN_RES; 1925 conn_res->ACCEPTOR_id = 0; 1926 PRIM_type = T_CONN_RES; 1927 1928 /* Send down the T_CONN_RES on acceptor STREAM */ 1929 error = kstrputmsg(SOTOV(nso), mp, NULL, 1930 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1931 if (error) { 1932 mutex_enter(&so->so_lock); 1933 so_lock_single(so); 1934 eprintsoline(so, error); 1935 goto disconnect_vp; 1936 } 1937 mutex_enter(&nso->so_lock); 1938 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1939 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1940 if (error) { 1941 mutex_exit(&nso->so_lock); 1942 mutex_enter(&so->so_lock); 1943 so_lock_single(so); 1944 eprintsoline(so, error); 1945 goto disconnect_vp; 1946 } 1947 if (nso->so_family == AF_INET) { 1948 sin_t *sin; 1949 1950 sin = (sin_t *)(ack_mp->b_rptr + 1951 sizeof (struct T_ok_ack)); 1952 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1953 nsti->sti_laddr_len = sizeof (sin_t); 1954 } else { 1955 sin6_t *sin6; 1956 1957 sin6 = (sin6_t *)(ack_mp->b_rptr + 1958 sizeof (struct T_ok_ack)); 1959 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1960 nsti->sti_laddr_len = sizeof (sin6_t); 1961 } 1962 freemsg(ack_mp); 1963 1964 nso->so_state |= SS_ISCONNECTED; 1965 nso->so_proto_handle = (sock_lower_handle_t)opt; 1966 nsti->sti_laddr_valid = 1; 1967 1968 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 1969 /* 1970 * A NL7C marked listen()er so the new socket 1971 * inherits the listen()er's NL7C state, except 1972 * for NL7C_POLLIN. 1973 * 1974 * Only call NL7C to process the new socket if 1975 * the listen socket allows blocking i/o. 1976 */ 1977 nsti->sti_nl7c_flags = 1978 sti->sti_nl7c_flags & (~NL7C_POLLIN); 1979 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1980 /* 1981 * Nonblocking accept() just make it 1982 * persist to defer processing to the 1983 * read-side syscall (e.g. read). 1984 */ 1985 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 1986 } else if (nl7c_process(nso, B_FALSE)) { 1987 /* 1988 * NL7C has completed processing on the 1989 * socket, close the socket and back to 1990 * the top to await the next T_CONN_IND. 1991 */ 1992 mutex_exit(&nso->so_lock); 1993 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1994 cr, NULL); 1995 VN_RELE(nvp); 1996 goto again; 1997 } 1998 /* Pass the new socket out */ 1999 } 2000 2001 mutex_exit(&nso->so_lock); 2002 2003 /* 2004 * It's possible, through the use of autopush for example, 2005 * that the acceptor stream may not support sti_direct 2006 * semantics. If the new socket does not support sti_direct 2007 * we issue a _SIOCSOCKFALLBACK to inform the transport 2008 * as we would in the I_PUSH case. 2009 */ 2010 if (nsti->sti_direct == 0) { 2011 int rval; 2012 2013 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2014 0, 0, K_TO_K, cr, &rval)) != 0) { 2015 mutex_enter(&so->so_lock); 2016 so_lock_single(so); 2017 eprintsoline(so, error); 2018 goto disconnect_vp; 2019 } 2020 } 2021 2022 /* 2023 * Pass out new socket. 2024 */ 2025 if (nsop != NULL) 2026 *nsop = nso; 2027 2028 return (0); 2029 } 2030 2031 /* 2032 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2033 * which don't support the FireEngine accept fast-path. It is also 2034 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2035 * again. Neither sockfs nor TCP attempt to find out if some other 2036 * random module has been inserted in between (in which case we 2037 * should follow TLI accept behaviour). We blindly assume the worst 2038 * case and revert back to old behaviour i.e. TCP will not send us 2039 * any option (eager) and the accept should happen on the listener 2040 * queue. Any queued T_conn_ind have already got their options removed 2041 * by so_sock2_stream() when "sockmod" was I_POP'd. 2042 */ 2043 /* 2044 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2045 */ 2046 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2047 #ifdef _ILP32 2048 queue_t *q; 2049 2050 /* 2051 * Find read queue in driver 2052 * Can safely do this since we "own" nso/nvp. 2053 */ 2054 q = strvp2wq(nvp)->q_next; 2055 while (SAMESTR(q)) 2056 q = q->q_next; 2057 q = RD(q); 2058 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2059 #else 2060 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2061 #endif /* _ILP32 */ 2062 conn_res->PRIM_type = O_T_CONN_RES; 2063 PRIM_type = O_T_CONN_RES; 2064 } else { 2065 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2066 conn_res->PRIM_type = T_CONN_RES; 2067 PRIM_type = T_CONN_RES; 2068 } 2069 conn_res->SEQ_number = SEQ_number; 2070 conn_res->OPT_length = 0; 2071 conn_res->OPT_offset = 0; 2072 2073 mutex_enter(&so->so_lock); 2074 so_lock_single(so); /* Set SOLOCKED */ 2075 mutex_exit(&so->so_lock); 2076 2077 error = kstrputmsg(SOTOV(so), mp, NULL, 2078 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2079 mutex_enter(&so->so_lock); 2080 if (error) { 2081 eprintsoline(so, error); 2082 goto disconnect_vp; 2083 } 2084 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2085 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2086 if (error) { 2087 eprintsoline(so, error); 2088 goto disconnect_vp; 2089 } 2090 mutex_exit(&so->so_lock); 2091 /* 2092 * If there is a sin/sin6 appended onto the T_OK_ACK use 2093 * that to set the local address. If this is not present 2094 * then we zero out the address and don't set the 2095 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2096 * the pathname from the listening socket. 2097 * In the case where this is TCP or an AF_UNIX socket the 2098 * client side may have queued data or a T_ORDREL in the 2099 * transport. Having now sent the T_CONN_RES we may receive 2100 * those queued messages at any time. Hold the acceptor 2101 * so_lock until its state and laddr are finalized. 2102 */ 2103 mutex_enter(&nso->so_lock); 2104 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2105 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2106 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2107 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2108 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2109 nsti->sti_laddr_len = sinlen; 2110 nsti->sti_laddr_valid = 1; 2111 } else if (nso->so_family == AF_UNIX) { 2112 ASSERT(so->so_family == AF_UNIX); 2113 nsti->sti_laddr_len = sti->sti_laddr_len; 2114 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2115 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2116 nsti->sti_laddr_len); 2117 nsti->sti_laddr_valid = 1; 2118 } else { 2119 nsti->sti_laddr_len = sti->sti_laddr_len; 2120 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2121 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2122 nsti->sti_laddr_sa->sa_family = nso->so_family; 2123 } 2124 nso->so_state |= SS_ISCONNECTED; 2125 mutex_exit(&nso->so_lock); 2126 2127 freemsg(ack_mp); 2128 2129 mutex_enter(&so->so_lock); 2130 so_unlock_single(so, SOLOCKED); 2131 mutex_exit(&so->so_lock); 2132 2133 /* 2134 * Pass out new socket. 2135 */ 2136 if (nsop != NULL) 2137 *nsop = nso; 2138 2139 return (0); 2140 2141 2142 eproto_disc_unl: 2143 error = EPROTO; 2144 e_disc_unl: 2145 eprintsoline(so, error); 2146 goto disconnect_unlocked; 2147 2148 pr_disc_vp_unl: 2149 eprintsoline(so, error); 2150 disconnect_vp_unlocked: 2151 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2152 VN_RELE(nvp); 2153 disconnect_unlocked: 2154 (void) sodisconnect(so, SEQ_number, 0); 2155 return (error); 2156 2157 pr_disc_vp: 2158 eprintsoline(so, error); 2159 disconnect_vp: 2160 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2161 so_unlock_single(so, SOLOCKED); 2162 mutex_exit(&so->so_lock); 2163 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2164 VN_RELE(nvp); 2165 return (error); 2166 2167 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2168 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2169 ? EOPNOTSUPP : EINVAL; 2170 e_bad: 2171 eprintsoline(so, error); 2172 return (error); 2173 } 2174 2175 /* 2176 * connect a socket. 2177 * 2178 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2179 * unconnect (by specifying a null address). 2180 */ 2181 int 2182 sotpi_connect(struct sonode *so, 2183 struct sockaddr *name, 2184 socklen_t namelen, 2185 int fflag, 2186 int flags, 2187 struct cred *cr) 2188 { 2189 struct T_conn_req conn_req; 2190 int error = 0; 2191 mblk_t *mp; 2192 void *src; 2193 socklen_t srclen; 2194 void *addr; 2195 socklen_t addrlen; 2196 boolean_t need_unlock; 2197 sotpi_info_t *sti = SOTOTPI(so); 2198 2199 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2200 (void *)so, (void *)name, namelen, fflag, flags, 2201 pr_state(so->so_state, so->so_mode))); 2202 2203 /* 2204 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2205 * avoid sleeping for memory with SOLOCKED held. 2206 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2207 * + sizeof (struct T_opthdr). 2208 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2209 * exceed sti_faddr_maxlen). 2210 */ 2211 mp = soallocproto(sizeof (struct T_conn_req) + 2212 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2213 cr); 2214 if (mp == NULL) { 2215 /* 2216 * Connect can not fail with ENOBUFS. A signal was 2217 * caught so return EINTR. 2218 */ 2219 error = EINTR; 2220 eprintsoline(so, error); 2221 return (error); 2222 } 2223 2224 mutex_enter(&so->so_lock); 2225 /* 2226 * Make sure there is a preallocated T_unbind_req message 2227 * before any binding. This message is allocated when the 2228 * socket is created. Since another thread can consume 2229 * so_unbind_mp by the time we return from so_lock_single(), 2230 * we should check the availability of so_unbind_mp after 2231 * we return from so_lock_single(). 2232 */ 2233 2234 so_lock_single(so); /* Set SOLOCKED */ 2235 need_unlock = B_TRUE; 2236 2237 if (sti->sti_unbind_mp == NULL) { 2238 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2239 /* NOTE: holding so_lock while sleeping */ 2240 sti->sti_unbind_mp = 2241 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2242 if (sti->sti_unbind_mp == NULL) { 2243 error = EINTR; 2244 goto done; 2245 } 2246 } 2247 2248 /* 2249 * Can't have done a listen before connecting. 2250 */ 2251 if (so->so_state & SS_ACCEPTCONN) { 2252 error = EOPNOTSUPP; 2253 goto done; 2254 } 2255 2256 /* 2257 * Must be bound with the transport 2258 */ 2259 if (!(so->so_state & SS_ISBOUND)) { 2260 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2261 /*CONSTCOND*/ 2262 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2263 /* 2264 * Optimization for AF_INET{,6} transports 2265 * that can handle a T_CONN_REQ without being bound. 2266 */ 2267 so_automatic_bind(so); 2268 } else { 2269 error = sotpi_bind(so, NULL, 0, 2270 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2271 if (error) 2272 goto done; 2273 } 2274 ASSERT(so->so_state & SS_ISBOUND); 2275 flags |= _SOCONNECT_DID_BIND; 2276 } 2277 2278 /* 2279 * Handle a connect to a name parameter of type AF_UNSPEC like a 2280 * connect to a null address. This is the portable method to 2281 * unconnect a socket. 2282 */ 2283 if ((namelen >= sizeof (sa_family_t)) && 2284 (name->sa_family == AF_UNSPEC)) { 2285 name = NULL; 2286 namelen = 0; 2287 } 2288 2289 /* 2290 * Check that we are not already connected. 2291 * A connection-oriented socket cannot be reconnected. 2292 * A connected connection-less socket can be 2293 * - connected to a different address by a subsequent connect 2294 * - "unconnected" by a connect to the NULL address 2295 */ 2296 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2297 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2298 if (so->so_mode & SM_CONNREQUIRED) { 2299 /* Connection-oriented socket */ 2300 error = so->so_state & SS_ISCONNECTED ? 2301 EISCONN : EALREADY; 2302 goto done; 2303 } 2304 /* Connection-less socket */ 2305 if (name == NULL) { 2306 /* 2307 * Remove the connected state and clear SO_DGRAM_ERRIND 2308 * since it was set when the socket was connected. 2309 * If this is UDP also send down a T_DISCON_REQ. 2310 */ 2311 int val; 2312 2313 if ((so->so_family == AF_INET || 2314 so->so_family == AF_INET6) && 2315 (so->so_type == SOCK_DGRAM || 2316 so->so_type == SOCK_RAW) && 2317 /*CONSTCOND*/ 2318 !soconnect_tpi_udp) { 2319 /* XXX What about implicitly unbinding here? */ 2320 error = sodisconnect(so, -1, 2321 _SODISCONNECT_LOCK_HELD); 2322 } else { 2323 so->so_state &= 2324 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2325 sti->sti_faddr_valid = 0; 2326 sti->sti_faddr_len = 0; 2327 } 2328 2329 /* Remove SOLOCKED since setsockopt will grab it */ 2330 so_unlock_single(so, SOLOCKED); 2331 mutex_exit(&so->so_lock); 2332 2333 val = 0; 2334 (void) sotpi_setsockopt(so, SOL_SOCKET, 2335 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2336 cr); 2337 2338 mutex_enter(&so->so_lock); 2339 so_lock_single(so); /* Set SOLOCKED */ 2340 goto done; 2341 } 2342 } 2343 ASSERT(so->so_state & SS_ISBOUND); 2344 2345 if (name == NULL || namelen == 0) { 2346 error = EINVAL; 2347 goto done; 2348 } 2349 /* 2350 * Mark the socket if sti_faddr_sa represents the transport level 2351 * address. 2352 */ 2353 if (flags & _SOCONNECT_NOXLATE) { 2354 struct sockaddr_ux *soaddr_ux; 2355 2356 ASSERT(so->so_family == AF_UNIX); 2357 if (namelen != sizeof (struct sockaddr_ux)) { 2358 error = EINVAL; 2359 goto done; 2360 } 2361 soaddr_ux = (struct sockaddr_ux *)name; 2362 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2363 namelen = sizeof (soaddr_ux->sou_addr); 2364 sti->sti_faddr_noxlate = 1; 2365 } 2366 2367 /* 2368 * Length and family checks. 2369 */ 2370 error = so_addr_verify(so, name, namelen); 2371 if (error) 2372 goto bad; 2373 2374 /* 2375 * Save foreign address. Needed for AF_UNIX as well as 2376 * transport providers that do not support TI_GETPEERNAME. 2377 * Also used for cached foreign address for TCP and UDP. 2378 */ 2379 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2380 error = EINVAL; 2381 goto done; 2382 } 2383 sti->sti_faddr_len = (socklen_t)namelen; 2384 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2385 bcopy(name, sti->sti_faddr_sa, namelen); 2386 sti->sti_faddr_valid = 1; 2387 2388 if (so->so_family == AF_UNIX) { 2389 if (sti->sti_faddr_noxlate) { 2390 /* 2391 * Already have a transport internal address. Do not 2392 * pass any (transport internal) source address. 2393 */ 2394 addr = sti->sti_faddr_sa; 2395 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2396 src = NULL; 2397 srclen = 0; 2398 } else { 2399 /* 2400 * Pass the sockaddr_un source address as an option 2401 * and translate the remote address. 2402 * Holding so_lock thus sti_laddr_sa can not change. 2403 */ 2404 src = sti->sti_laddr_sa; 2405 srclen = (t_uscalar_t)sti->sti_laddr_len; 2406 dprintso(so, 1, 2407 ("sotpi_connect UNIX: srclen %d, src %p\n", 2408 srclen, src)); 2409 error = so_ux_addr_xlate(so, 2410 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2411 (flags & _SOCONNECT_XPG4_2), 2412 &addr, &addrlen); 2413 if (error) 2414 goto bad; 2415 } 2416 } else { 2417 addr = sti->sti_faddr_sa; 2418 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2419 src = NULL; 2420 srclen = 0; 2421 } 2422 /* 2423 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2424 * option which asks the transport provider to send T_UDERR_IND 2425 * messages. These T_UDERR_IND messages are used to return connected 2426 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2427 * 2428 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2429 * we send down a T_CONN_REQ. This is needed to let the 2430 * transport assign a local address that is consistent with 2431 * the remote address. Applications depend on a getsockname() 2432 * after a connect() to retrieve the "source" IP address for 2433 * the connected socket. Invalidate the cached local address 2434 * to force getsockname() to enquire of the transport. 2435 */ 2436 if (!(so->so_mode & SM_CONNREQUIRED)) { 2437 /* 2438 * Datagram socket. 2439 */ 2440 int32_t val; 2441 2442 so_unlock_single(so, SOLOCKED); 2443 mutex_exit(&so->so_lock); 2444 2445 val = 1; 2446 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2447 &val, (t_uscalar_t)sizeof (val), cr); 2448 2449 mutex_enter(&so->so_lock); 2450 so_lock_single(so); /* Set SOLOCKED */ 2451 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2452 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2453 soconnect_tpi_udp) { 2454 soisconnected(so); 2455 goto done; 2456 } 2457 /* 2458 * Send down T_CONN_REQ etc. 2459 * Clear fflag to avoid returning EWOULDBLOCK. 2460 */ 2461 fflag = 0; 2462 ASSERT(so->so_family != AF_UNIX); 2463 sti->sti_laddr_valid = 0; 2464 } else if (sti->sti_laddr_len != 0) { 2465 /* 2466 * If the local address or port was "any" then it may be 2467 * changed by the transport as a result of the 2468 * connect. Invalidate the cached version if we have one. 2469 */ 2470 switch (so->so_family) { 2471 case AF_INET: 2472 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2473 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2474 INADDR_ANY || 2475 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2476 sti->sti_laddr_valid = 0; 2477 break; 2478 2479 case AF_INET6: 2480 ASSERT(sti->sti_laddr_len == 2481 (socklen_t)sizeof (sin6_t)); 2482 if (IN6_IS_ADDR_UNSPECIFIED( 2483 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2484 IN6_IS_ADDR_V4MAPPED_ANY( 2485 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2486 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2487 sti->sti_laddr_valid = 0; 2488 break; 2489 2490 default: 2491 break; 2492 } 2493 } 2494 2495 /* 2496 * Check for failure of an earlier call 2497 */ 2498 if (so->so_error != 0) 2499 goto so_bad; 2500 2501 /* 2502 * Send down T_CONN_REQ. Message was allocated above. 2503 */ 2504 conn_req.PRIM_type = T_CONN_REQ; 2505 conn_req.DEST_length = addrlen; 2506 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2507 if (srclen == 0) { 2508 conn_req.OPT_length = 0; 2509 conn_req.OPT_offset = 0; 2510 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2511 soappendmsg(mp, addr, addrlen); 2512 } else { 2513 /* 2514 * There is a AF_UNIX sockaddr_un to include as a source 2515 * address option. 2516 */ 2517 struct T_opthdr toh; 2518 2519 toh.level = SOL_SOCKET; 2520 toh.name = SO_SRCADDR; 2521 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2522 toh.status = 0; 2523 conn_req.OPT_length = 2524 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2525 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2526 _TPI_ALIGN_TOPT(addrlen)); 2527 2528 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2529 soappendmsg(mp, addr, addrlen); 2530 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2531 soappendmsg(mp, &toh, sizeof (toh)); 2532 soappendmsg(mp, src, srclen); 2533 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2534 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2535 } 2536 /* 2537 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2538 * in order to have the right state when the T_CONN_CON shows up. 2539 */ 2540 soisconnecting(so); 2541 mutex_exit(&so->so_lock); 2542 2543 if (AU_AUDITING()) 2544 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2545 2546 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2547 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2548 mp = NULL; 2549 mutex_enter(&so->so_lock); 2550 if (error != 0) 2551 goto bad; 2552 2553 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2554 goto bad; 2555 2556 /* Allow other threads to access the socket */ 2557 so_unlock_single(so, SOLOCKED); 2558 need_unlock = B_FALSE; 2559 2560 /* 2561 * Wait until we get a T_CONN_CON or an error 2562 */ 2563 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2564 so_lock_single(so); /* Set SOLOCKED */ 2565 need_unlock = B_TRUE; 2566 } 2567 2568 done: 2569 freemsg(mp); 2570 switch (error) { 2571 case EINPROGRESS: 2572 case EALREADY: 2573 case EISCONN: 2574 case EINTR: 2575 /* Non-fatal errors */ 2576 sti->sti_laddr_valid = 0; 2577 /* FALLTHRU */ 2578 case 0: 2579 break; 2580 default: 2581 ASSERT(need_unlock); 2582 /* 2583 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2584 * and invalidate local-address cache 2585 */ 2586 so->so_state &= ~SS_ISCONNECTING; 2587 sti->sti_laddr_valid = 0; 2588 /* A discon_ind might have already unbound us */ 2589 if ((flags & _SOCONNECT_DID_BIND) && 2590 (so->so_state & SS_ISBOUND)) { 2591 int err; 2592 2593 err = sotpi_unbind(so, 0); 2594 /* LINTED - statement has no conseq */ 2595 if (err) { 2596 eprintsoline(so, err); 2597 } 2598 } 2599 break; 2600 } 2601 if (need_unlock) 2602 so_unlock_single(so, SOLOCKED); 2603 mutex_exit(&so->so_lock); 2604 return (error); 2605 2606 so_bad: error = sogeterr(so, B_TRUE); 2607 bad: eprintsoline(so, error); 2608 goto done; 2609 } 2610 2611 /* ARGSUSED */ 2612 int 2613 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2614 { 2615 struct T_ordrel_req ordrel_req; 2616 mblk_t *mp; 2617 uint_t old_state, state_change; 2618 int error = 0; 2619 sotpi_info_t *sti = SOTOTPI(so); 2620 2621 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2622 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2623 2624 mutex_enter(&so->so_lock); 2625 so_lock_single(so); /* Set SOLOCKED */ 2626 2627 /* 2628 * SunOS 4.X has no check for datagram sockets. 2629 * 5.X checks that it is connected (ENOTCONN) 2630 * X/Open requires that we check the connected state. 2631 */ 2632 if (!(so->so_state & SS_ISCONNECTED)) { 2633 if (!xnet_skip_checks) { 2634 error = ENOTCONN; 2635 if (xnet_check_print) { 2636 printf("sockfs: X/Open shutdown check " 2637 "caused ENOTCONN\n"); 2638 } 2639 } 2640 goto done; 2641 } 2642 /* 2643 * Record the current state and then perform any state changes. 2644 * Then use the difference between the old and new states to 2645 * determine which messages need to be sent. 2646 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2647 * duplicate calls to shutdown(). 2648 */ 2649 old_state = so->so_state; 2650 2651 switch (how) { 2652 case 0: 2653 socantrcvmore(so); 2654 break; 2655 case 1: 2656 socantsendmore(so); 2657 break; 2658 case 2: 2659 socantsendmore(so); 2660 socantrcvmore(so); 2661 break; 2662 default: 2663 error = EINVAL; 2664 goto done; 2665 } 2666 2667 /* 2668 * Assumes that the SS_CANT* flags are never cleared in the above code. 2669 */ 2670 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2671 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2672 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2673 2674 switch (state_change) { 2675 case 0: 2676 dprintso(so, 1, 2677 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2678 so->so_state)); 2679 goto done; 2680 2681 case SS_CANTRCVMORE: 2682 mutex_exit(&so->so_lock); 2683 strseteof(SOTOV(so), 1); 2684 /* 2685 * strseteof takes care of read side wakeups, 2686 * pollwakeups, and signals. 2687 */ 2688 /* 2689 * Get the read lock before flushing data to avoid problems 2690 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2691 */ 2692 mutex_enter(&so->so_lock); 2693 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2694 mutex_exit(&so->so_lock); 2695 2696 /* Flush read side queue */ 2697 strflushrq(SOTOV(so), FLUSHALL); 2698 2699 mutex_enter(&so->so_lock); 2700 so_unlock_read(so); /* Clear SOREADLOCKED */ 2701 break; 2702 2703 case SS_CANTSENDMORE: 2704 mutex_exit(&so->so_lock); 2705 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2706 mutex_enter(&so->so_lock); 2707 break; 2708 2709 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2710 mutex_exit(&so->so_lock); 2711 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2712 strseteof(SOTOV(so), 1); 2713 /* 2714 * strseteof takes care of read side wakeups, 2715 * pollwakeups, and signals. 2716 */ 2717 /* 2718 * Get the read lock before flushing data to avoid problems 2719 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2720 */ 2721 mutex_enter(&so->so_lock); 2722 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2723 mutex_exit(&so->so_lock); 2724 2725 /* Flush read side queue */ 2726 strflushrq(SOTOV(so), FLUSHALL); 2727 2728 mutex_enter(&so->so_lock); 2729 so_unlock_read(so); /* Clear SOREADLOCKED */ 2730 break; 2731 } 2732 2733 ASSERT(MUTEX_HELD(&so->so_lock)); 2734 2735 /* 2736 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2737 * was set due to this call and the new state has both of them set: 2738 * Send the AF_UNIX close indication 2739 * For T_COTS send a discon_ind 2740 * 2741 * If cantsend was set due to this call: 2742 * For T_COTSORD send an ordrel_ind 2743 * 2744 * Note that for T_CLTS there is no message sent here. 2745 */ 2746 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2747 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2748 /* 2749 * For SunOS 4.X compatibility we tell the other end 2750 * that we are unable to receive at this point. 2751 */ 2752 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2753 so_unix_close(so); 2754 2755 if (sti->sti_serv_type == T_COTS) 2756 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2757 } 2758 if ((state_change & SS_CANTSENDMORE) && 2759 (sti->sti_serv_type == T_COTS_ORD)) { 2760 /* Send an orderly release */ 2761 ordrel_req.PRIM_type = T_ORDREL_REQ; 2762 2763 mutex_exit(&so->so_lock); 2764 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2765 0, _ALLOC_SLEEP, cr); 2766 /* 2767 * Send down the T_ORDREL_REQ even if there is flow control. 2768 * This prevents shutdown from blocking. 2769 * Note that there is no T_OK_ACK for ordrel_req. 2770 */ 2771 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2772 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2773 mutex_enter(&so->so_lock); 2774 if (error) { 2775 eprintsoline(so, error); 2776 goto done; 2777 } 2778 } 2779 2780 done: 2781 so_unlock_single(so, SOLOCKED); 2782 mutex_exit(&so->so_lock); 2783 return (error); 2784 } 2785 2786 /* 2787 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2788 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2789 * that we have closed. 2790 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2791 * T_UNITDATA_REQ containing the same option. 2792 * 2793 * For SOCK_DGRAM half-connections (somebody connected to this end 2794 * but this end is not connect) we don't know where to send any 2795 * SO_UNIX_CLOSE. 2796 * 2797 * We have to ignore stream head errors just in case there has been 2798 * a shutdown(output). 2799 * Ignore any flow control to try to get the message more quickly to the peer. 2800 * While locally ignoring flow control solves the problem when there 2801 * is only the loopback transport on the stream it would not provide 2802 * the correct AF_UNIX socket semantics when one or more modules have 2803 * been pushed. 2804 */ 2805 void 2806 so_unix_close(struct sonode *so) 2807 { 2808 int error; 2809 struct T_opthdr toh; 2810 mblk_t *mp; 2811 sotpi_info_t *sti = SOTOTPI(so); 2812 2813 ASSERT(MUTEX_HELD(&so->so_lock)); 2814 2815 ASSERT(so->so_family == AF_UNIX); 2816 2817 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2818 (SS_ISCONNECTED|SS_ISBOUND)) 2819 return; 2820 2821 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2822 (void *)so, pr_state(so->so_state, so->so_mode))); 2823 2824 toh.level = SOL_SOCKET; 2825 toh.name = SO_UNIX_CLOSE; 2826 2827 /* zero length + header */ 2828 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2829 toh.status = 0; 2830 2831 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2832 struct T_optdata_req tdr; 2833 2834 tdr.PRIM_type = T_OPTDATA_REQ; 2835 tdr.DATA_flag = 0; 2836 2837 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2838 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2839 2840 /* NOTE: holding so_lock while sleeping */ 2841 mp = soallocproto2(&tdr, sizeof (tdr), 2842 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2843 } else { 2844 struct T_unitdata_req tudr; 2845 void *addr; 2846 socklen_t addrlen; 2847 void *src; 2848 socklen_t srclen; 2849 struct T_opthdr toh2; 2850 t_scalar_t size; 2851 2852 /* Connecteded DGRAM socket */ 2853 2854 /* 2855 * For AF_UNIX the destination address is translated to 2856 * an internal name and the source address is passed as 2857 * an option. 2858 */ 2859 /* 2860 * Length and family checks. 2861 */ 2862 error = so_addr_verify(so, sti->sti_faddr_sa, 2863 (t_uscalar_t)sti->sti_faddr_len); 2864 if (error) { 2865 eprintsoline(so, error); 2866 return; 2867 } 2868 if (sti->sti_faddr_noxlate) { 2869 /* 2870 * Already have a transport internal address. Do not 2871 * pass any (transport internal) source address. 2872 */ 2873 addr = sti->sti_faddr_sa; 2874 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2875 src = NULL; 2876 srclen = 0; 2877 } else { 2878 /* 2879 * Pass the sockaddr_un source address as an option 2880 * and translate the remote address. 2881 * Holding so_lock thus sti_laddr_sa can not change. 2882 */ 2883 src = sti->sti_laddr_sa; 2884 srclen = (socklen_t)sti->sti_laddr_len; 2885 dprintso(so, 1, 2886 ("so_ux_close: srclen %d, src %p\n", 2887 srclen, src)); 2888 error = so_ux_addr_xlate(so, 2889 sti->sti_faddr_sa, 2890 (socklen_t)sti->sti_faddr_len, 0, 2891 &addr, &addrlen); 2892 if (error) { 2893 eprintsoline(so, error); 2894 return; 2895 } 2896 } 2897 tudr.PRIM_type = T_UNITDATA_REQ; 2898 tudr.DEST_length = addrlen; 2899 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2900 if (srclen == 0) { 2901 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2902 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2903 _TPI_ALIGN_TOPT(addrlen)); 2904 2905 size = tudr.OPT_offset + tudr.OPT_length; 2906 /* NOTE: holding so_lock while sleeping */ 2907 mp = soallocproto2(&tudr, sizeof (tudr), 2908 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2909 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2910 soappendmsg(mp, &toh, sizeof (toh)); 2911 } else { 2912 /* 2913 * There is a AF_UNIX sockaddr_un to include as a 2914 * source address option. 2915 */ 2916 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2917 _TPI_ALIGN_TOPT(srclen)); 2918 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2919 _TPI_ALIGN_TOPT(addrlen)); 2920 2921 toh2.level = SOL_SOCKET; 2922 toh2.name = SO_SRCADDR; 2923 toh2.len = (t_uscalar_t)(srclen + 2924 sizeof (struct T_opthdr)); 2925 toh2.status = 0; 2926 2927 size = tudr.OPT_offset + tudr.OPT_length; 2928 2929 /* NOTE: holding so_lock while sleeping */ 2930 mp = soallocproto2(&tudr, sizeof (tudr), 2931 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2932 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2933 soappendmsg(mp, &toh, sizeof (toh)); 2934 soappendmsg(mp, &toh2, sizeof (toh2)); 2935 soappendmsg(mp, src, srclen); 2936 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2937 } 2938 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2939 } 2940 mutex_exit(&so->so_lock); 2941 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2942 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2943 mutex_enter(&so->so_lock); 2944 } 2945 2946 /* 2947 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2948 * In addition, the caller typically verifies that there is some 2949 * potential state to clear by checking 2950 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2951 * before calling this routine. 2952 * Note that such a check can be made without holding so_lock since 2953 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2954 * decrements sti_oobsigcnt. 2955 * 2956 * When data is read *after* the point that all pending 2957 * oob data has been consumed the oob indication is cleared. 2958 * 2959 * This logic keeps select/poll returning POLLRDBAND and 2960 * SIOCATMARK returning true until we have read past 2961 * the mark. 2962 */ 2963 static void 2964 sorecv_update_oobstate(struct sonode *so) 2965 { 2966 sotpi_info_t *sti = SOTOTPI(so); 2967 2968 mutex_enter(&so->so_lock); 2969 ASSERT(so_verify_oobstate(so)); 2970 dprintso(so, 1, 2971 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2972 sti->sti_oobsigcnt, 2973 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2974 if (sti->sti_oobsigcnt == 0) { 2975 /* No more pending oob indications */ 2976 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2977 freemsg(so->so_oobmsg); 2978 so->so_oobmsg = NULL; 2979 } 2980 ASSERT(so_verify_oobstate(so)); 2981 mutex_exit(&so->so_lock); 2982 } 2983 2984 /* 2985 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2986 */ 2987 static int 2988 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2989 { 2990 sotpi_info_t *sti = SOTOTPI(so); 2991 int error = 0; 2992 mblk_t *tmp = NULL; 2993 mblk_t *pmp = NULL; 2994 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 2995 2996 ASSERT(nmp != NULL); 2997 2998 while (nmp != NULL && uiop->uio_resid > 0) { 2999 ssize_t n; 3000 3001 if (DB_TYPE(nmp) == M_DATA) { 3002 /* 3003 * We have some data, uiomove up to resid bytes. 3004 */ 3005 n = MIN(MBLKL(nmp), uiop->uio_resid); 3006 if (n > 0) 3007 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3008 nmp->b_rptr += n; 3009 if (nmp->b_rptr == nmp->b_wptr) { 3010 pmp = nmp; 3011 nmp = nmp->b_cont; 3012 } 3013 if (error) 3014 break; 3015 } else { 3016 /* 3017 * We only handle data, save for caller to handle. 3018 */ 3019 if (pmp != NULL) { 3020 pmp->b_cont = nmp->b_cont; 3021 } 3022 nmp->b_cont = NULL; 3023 if (*rmp == NULL) { 3024 *rmp = nmp; 3025 } else { 3026 tmp->b_cont = nmp; 3027 } 3028 nmp = nmp->b_cont; 3029 tmp = nmp; 3030 } 3031 } 3032 if (pmp != NULL) { 3033 /* Free any mblk_t(s) which we have consumed */ 3034 pmp->b_cont = NULL; 3035 freemsg(sti->sti_nl7c_rcv_mp); 3036 } 3037 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3038 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3039 if (error == 0) { 3040 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3041 3042 error = p->r_v.r_v2; 3043 p->r_v.r_v2 = 0; 3044 } 3045 rp->r_vals = sti->sti_nl7c_rcv_rval; 3046 sti->sti_nl7c_rcv_rval = 0; 3047 } else { 3048 /* More mblk_t(s) to process so no rval to return */ 3049 rp->r_vals = 0; 3050 } 3051 return (error); 3052 } 3053 /* 3054 * Receive the next message on the queue. 3055 * If msg_controllen is non-zero when called the caller is interested in 3056 * any received control info (options). 3057 * If msg_namelen is non-zero when called the caller is interested in 3058 * any received source address. 3059 * The routine returns with msg_control and msg_name pointing to 3060 * kmem_alloc'ed memory which the caller has to free. 3061 */ 3062 /* ARGSUSED */ 3063 int 3064 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3065 struct cred *cr) 3066 { 3067 union T_primitives *tpr; 3068 mblk_t *mp; 3069 uchar_t pri; 3070 int pflag, opflag; 3071 void *control; 3072 t_uscalar_t controllen; 3073 t_uscalar_t namelen; 3074 int so_state = so->so_state; /* Snapshot */ 3075 ssize_t saved_resid; 3076 rval_t rval; 3077 int flags; 3078 clock_t timout; 3079 int error = 0; 3080 sotpi_info_t *sti = SOTOTPI(so); 3081 3082 flags = msg->msg_flags; 3083 msg->msg_flags = 0; 3084 3085 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3086 (void *)so, (void *)msg, flags, 3087 pr_state(so->so_state, so->so_mode), so->so_error)); 3088 3089 if (so->so_version == SOV_STREAM) { 3090 so_update_attrs(so, SOACC); 3091 /* The imaginary "sockmod" has been popped - act as a stream */ 3092 return (strread(SOTOV(so), uiop, cr)); 3093 } 3094 3095 /* 3096 * If we are not connected because we have never been connected 3097 * we return ENOTCONN. If we have been connected (but are no longer 3098 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3099 * the EOF. 3100 * 3101 * An alternative would be to post an ENOTCONN error in stream head 3102 * (read+write) and clear it when we're connected. However, that error 3103 * would cause incorrect poll/select behavior! 3104 */ 3105 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3106 (so->so_mode & SM_CONNREQUIRED)) { 3107 return (ENOTCONN); 3108 } 3109 3110 /* 3111 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3112 * after checking that the read queue is empty) and returns zero. 3113 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3114 * is zero. 3115 */ 3116 3117 if (flags & MSG_OOB) { 3118 /* Check that the transport supports OOB */ 3119 if (!(so->so_mode & SM_EXDATA)) 3120 return (EOPNOTSUPP); 3121 so_update_attrs(so, SOACC); 3122 return (sorecvoob(so, msg, uiop, flags, 3123 (so->so_options & SO_OOBINLINE))); 3124 } 3125 3126 so_update_attrs(so, SOACC); 3127 3128 /* 3129 * Set msg_controllen and msg_namelen to zero here to make it 3130 * simpler in the cases that no control or name is returned. 3131 */ 3132 controllen = msg->msg_controllen; 3133 namelen = msg->msg_namelen; 3134 msg->msg_controllen = 0; 3135 msg->msg_namelen = 0; 3136 3137 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3138 namelen, controllen)); 3139 3140 mutex_enter(&so->so_lock); 3141 /* 3142 * If an NL7C enabled socket and not waiting for write data. 3143 */ 3144 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3145 NL7C_ENABLED) { 3146 if (sti->sti_nl7c_uri) { 3147 /* Close uri processing for a previous request */ 3148 nl7c_close(so); 3149 } 3150 if ((so_state & SS_CANTRCVMORE) && 3151 sti->sti_nl7c_rcv_mp == NULL) { 3152 /* Nothing to process, EOF */ 3153 mutex_exit(&so->so_lock); 3154 return (0); 3155 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3156 /* Persistent NL7C socket, try to process request */ 3157 boolean_t ret; 3158 3159 ret = nl7c_process(so, 3160 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3161 rval.r_vals = sti->sti_nl7c_rcv_rval; 3162 error = rval.r_v.r_v2; 3163 if (error) { 3164 /* Error of some sort, return it */ 3165 mutex_exit(&so->so_lock); 3166 return (error); 3167 } 3168 if (sti->sti_nl7c_flags && 3169 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3170 /* 3171 * Still an NL7C socket and no data 3172 * to pass up to the caller. 3173 */ 3174 mutex_exit(&so->so_lock); 3175 if (ret) { 3176 /* EOF */ 3177 return (0); 3178 } else { 3179 /* Need more data */ 3180 return (EAGAIN); 3181 } 3182 } 3183 } else { 3184 /* 3185 * Not persistent so no further NL7C processing. 3186 */ 3187 sti->sti_nl7c_flags = 0; 3188 } 3189 } 3190 /* 3191 * Only one reader is allowed at any given time. This is needed 3192 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3193 * 3194 * This is slightly different that BSD behavior in that it fails with 3195 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3196 * is single-threaded using sblock(), which is dropped while waiting 3197 * for data to appear. The difference shows up e.g. if one 3198 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3199 * does use nonblocking io and different threads are reading each 3200 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3201 * in this case as long as the read queue doesn't get empty. 3202 * In this implementation the thread using nonblocking io can 3203 * get an EWOULDBLOCK error due to the blocking thread executing 3204 * e.g. in the uiomove in kstrgetmsg. 3205 * This difference is not believed to be significant. 3206 */ 3207 /* Set SOREADLOCKED */ 3208 error = so_lock_read_intr(so, 3209 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3210 mutex_exit(&so->so_lock); 3211 if (error) 3212 return (error); 3213 3214 /* 3215 * Tell kstrgetmsg to not inspect the stream head errors until all 3216 * queued data has been consumed. 3217 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3218 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3219 * 3220 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3221 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3222 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3223 */ 3224 pflag = MSG_ANY | MSG_DELAYERROR; 3225 if (flags & MSG_PEEK) { 3226 pflag |= MSG_IPEEK; 3227 flags &= ~MSG_WAITALL; 3228 } 3229 if (so->so_mode & SM_ATOMIC) 3230 pflag |= MSG_DISCARDTAIL; 3231 3232 if (flags & MSG_DONTWAIT) 3233 timout = 0; 3234 else if (so->so_rcvtimeo != 0) 3235 timout = TICK_TO_MSEC(so->so_rcvtimeo); 3236 else 3237 timout = -1; 3238 opflag = pflag; 3239 retry: 3240 saved_resid = uiop->uio_resid; 3241 pri = 0; 3242 mp = NULL; 3243 if (sti->sti_nl7c_rcv_mp != NULL) { 3244 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3245 error = nl7c_sorecv(so, &mp, uiop, &rval); 3246 } else { 3247 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3248 timout, &rval); 3249 } 3250 if (error != 0) { 3251 /* kstrgetmsg returns ETIME when timeout expires */ 3252 if (error == ETIME) 3253 error = EWOULDBLOCK; 3254 goto out; 3255 } 3256 /* 3257 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3258 * For non-datagrams MOREDATA is used to set MSG_EOR. 3259 */ 3260 ASSERT(!(rval.r_val1 & MORECTL)); 3261 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3262 msg->msg_flags |= MSG_TRUNC; 3263 3264 if (mp == NULL) { 3265 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3266 /* 3267 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3268 * The draft Posix socket spec states that the mark should 3269 * not be cleared when peeking. We follow the latter. 3270 */ 3271 if ((so->so_state & 3272 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3273 (uiop->uio_resid != saved_resid) && 3274 !(flags & MSG_PEEK)) { 3275 sorecv_update_oobstate(so); 3276 } 3277 3278 mutex_enter(&so->so_lock); 3279 /* Set MSG_EOR based on MOREDATA */ 3280 if (!(rval.r_val1 & MOREDATA)) { 3281 if (so->so_state & SS_SAVEDEOR) { 3282 msg->msg_flags |= MSG_EOR; 3283 so->so_state &= ~SS_SAVEDEOR; 3284 } 3285 } 3286 /* 3287 * If some data was received (i.e. not EOF) and the 3288 * read/recv* has not been satisfied wait for some more. 3289 */ 3290 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3291 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3292 mutex_exit(&so->so_lock); 3293 pflag = opflag | MSG_NOMARK; 3294 goto retry; 3295 } 3296 goto out_locked; 3297 } 3298 3299 /* strsock_proto has already verified length and alignment */ 3300 tpr = (union T_primitives *)mp->b_rptr; 3301 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3302 3303 switch (tpr->type) { 3304 case T_DATA_IND: { 3305 if ((so->so_state & 3306 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3307 (uiop->uio_resid != saved_resid) && 3308 !(flags & MSG_PEEK)) { 3309 sorecv_update_oobstate(so); 3310 } 3311 3312 /* 3313 * Set msg_flags to MSG_EOR based on 3314 * MORE_flag and MOREDATA. 3315 */ 3316 mutex_enter(&so->so_lock); 3317 so->so_state &= ~SS_SAVEDEOR; 3318 if (!(tpr->data_ind.MORE_flag & 1)) { 3319 if (!(rval.r_val1 & MOREDATA)) 3320 msg->msg_flags |= MSG_EOR; 3321 else 3322 so->so_state |= SS_SAVEDEOR; 3323 } 3324 freemsg(mp); 3325 /* 3326 * If some data was received (i.e. not EOF) and the 3327 * read/recv* has not been satisfied wait for some more. 3328 */ 3329 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3330 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3331 mutex_exit(&so->so_lock); 3332 pflag = opflag | MSG_NOMARK; 3333 goto retry; 3334 } 3335 goto out_locked; 3336 } 3337 case T_UNITDATA_IND: { 3338 void *addr; 3339 t_uscalar_t addrlen; 3340 void *abuf; 3341 t_uscalar_t optlen; 3342 void *opt; 3343 3344 if ((so->so_state & 3345 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3346 (uiop->uio_resid != saved_resid) && 3347 !(flags & MSG_PEEK)) { 3348 sorecv_update_oobstate(so); 3349 } 3350 3351 if (namelen != 0) { 3352 /* Caller wants source address */ 3353 addrlen = tpr->unitdata_ind.SRC_length; 3354 addr = sogetoff(mp, 3355 tpr->unitdata_ind.SRC_offset, 3356 addrlen, 1); 3357 if (addr == NULL) { 3358 freemsg(mp); 3359 error = EPROTO; 3360 eprintsoline(so, error); 3361 goto out; 3362 } 3363 if (so->so_family == AF_UNIX) { 3364 /* 3365 * Can not use the transport level address. 3366 * If there is a SO_SRCADDR option carrying 3367 * the socket level address it will be 3368 * extracted below. 3369 */ 3370 addr = NULL; 3371 addrlen = 0; 3372 } 3373 } 3374 optlen = tpr->unitdata_ind.OPT_length; 3375 if (optlen != 0) { 3376 t_uscalar_t ncontrollen; 3377 3378 /* 3379 * Extract any source address option. 3380 * Determine how large cmsg buffer is needed. 3381 */ 3382 opt = sogetoff(mp, 3383 tpr->unitdata_ind.OPT_offset, 3384 optlen, __TPI_ALIGN_SIZE); 3385 3386 if (opt == NULL) { 3387 freemsg(mp); 3388 error = EPROTO; 3389 eprintsoline(so, error); 3390 goto out; 3391 } 3392 if (so->so_family == AF_UNIX) 3393 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3394 ncontrollen = so_cmsglen(mp, opt, optlen, 3395 !(flags & MSG_XPG4_2)); 3396 if (controllen != 0) 3397 controllen = ncontrollen; 3398 else if (ncontrollen != 0) 3399 msg->msg_flags |= MSG_CTRUNC; 3400 } else { 3401 controllen = 0; 3402 } 3403 3404 if (namelen != 0) { 3405 /* 3406 * Return address to caller. 3407 * Caller handles truncation if length 3408 * exceeds msg_namelen. 3409 * NOTE: AF_UNIX NUL termination is ensured by 3410 * the sender's copyin_name(). 3411 */ 3412 abuf = kmem_alloc(addrlen, KM_SLEEP); 3413 3414 bcopy(addr, abuf, addrlen); 3415 msg->msg_name = abuf; 3416 msg->msg_namelen = addrlen; 3417 } 3418 3419 if (controllen != 0) { 3420 /* 3421 * Return control msg to caller. 3422 * Caller handles truncation if length 3423 * exceeds msg_controllen. 3424 */ 3425 control = kmem_zalloc(controllen, KM_SLEEP); 3426 3427 error = so_opt2cmsg(mp, opt, optlen, 3428 !(flags & MSG_XPG4_2), 3429 control, controllen); 3430 if (error) { 3431 freemsg(mp); 3432 if (msg->msg_namelen != 0) 3433 kmem_free(msg->msg_name, 3434 msg->msg_namelen); 3435 kmem_free(control, controllen); 3436 eprintsoline(so, error); 3437 goto out; 3438 } 3439 msg->msg_control = control; 3440 msg->msg_controllen = controllen; 3441 } 3442 3443 freemsg(mp); 3444 goto out; 3445 } 3446 case T_OPTDATA_IND: { 3447 struct T_optdata_req *tdr; 3448 void *opt; 3449 t_uscalar_t optlen; 3450 3451 if ((so->so_state & 3452 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3453 (uiop->uio_resid != saved_resid) && 3454 !(flags & MSG_PEEK)) { 3455 sorecv_update_oobstate(so); 3456 } 3457 3458 tdr = (struct T_optdata_req *)mp->b_rptr; 3459 optlen = tdr->OPT_length; 3460 if (optlen != 0) { 3461 t_uscalar_t ncontrollen; 3462 /* 3463 * Determine how large cmsg buffer is needed. 3464 */ 3465 opt = sogetoff(mp, 3466 tpr->optdata_ind.OPT_offset, 3467 optlen, __TPI_ALIGN_SIZE); 3468 3469 if (opt == NULL) { 3470 freemsg(mp); 3471 error = EPROTO; 3472 eprintsoline(so, error); 3473 goto out; 3474 } 3475 3476 ncontrollen = so_cmsglen(mp, opt, optlen, 3477 !(flags & MSG_XPG4_2)); 3478 if (controllen != 0) 3479 controllen = ncontrollen; 3480 else if (ncontrollen != 0) 3481 msg->msg_flags |= MSG_CTRUNC; 3482 } else { 3483 controllen = 0; 3484 } 3485 3486 if (controllen != 0) { 3487 /* 3488 * Return control msg to caller. 3489 * Caller handles truncation if length 3490 * exceeds msg_controllen. 3491 */ 3492 control = kmem_zalloc(controllen, KM_SLEEP); 3493 3494 error = so_opt2cmsg(mp, opt, optlen, 3495 !(flags & MSG_XPG4_2), 3496 control, controllen); 3497 if (error) { 3498 freemsg(mp); 3499 kmem_free(control, controllen); 3500 eprintsoline(so, error); 3501 goto out; 3502 } 3503 msg->msg_control = control; 3504 msg->msg_controllen = controllen; 3505 } 3506 3507 /* 3508 * Set msg_flags to MSG_EOR based on 3509 * DATA_flag and MOREDATA. 3510 */ 3511 mutex_enter(&so->so_lock); 3512 so->so_state &= ~SS_SAVEDEOR; 3513 if (!(tpr->data_ind.MORE_flag & 1)) { 3514 if (!(rval.r_val1 & MOREDATA)) 3515 msg->msg_flags |= MSG_EOR; 3516 else 3517 so->so_state |= SS_SAVEDEOR; 3518 } 3519 freemsg(mp); 3520 /* 3521 * If some data was received (i.e. not EOF) and the 3522 * read/recv* has not been satisfied wait for some more. 3523 * Not possible to wait if control info was received. 3524 */ 3525 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3526 controllen == 0 && 3527 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3528 mutex_exit(&so->so_lock); 3529 pflag = opflag | MSG_NOMARK; 3530 goto retry; 3531 } 3532 goto out_locked; 3533 } 3534 case T_EXDATA_IND: { 3535 dprintso(so, 1, 3536 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3537 "state %s\n", 3538 sti->sti_oobsigcnt, sti->sti_oobcnt, 3539 saved_resid - uiop->uio_resid, 3540 pr_state(so->so_state, so->so_mode))); 3541 /* 3542 * kstrgetmsg handles MSGMARK so there is nothing to 3543 * inspect in the T_EXDATA_IND. 3544 * strsock_proto makes the stream head queue the T_EXDATA_IND 3545 * as a separate message with no M_DATA component. Furthermore, 3546 * the stream head does not consolidate M_DATA messages onto 3547 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3548 * remains a message by itself. This is needed since MSGMARK 3549 * marks both the whole message as well as the last byte 3550 * of the message. 3551 */ 3552 freemsg(mp); 3553 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3554 if (flags & MSG_PEEK) { 3555 /* 3556 * Even though we are peeking we consume the 3557 * T_EXDATA_IND thereby moving the mark information 3558 * to SS_RCVATMARK. Then the oob code below will 3559 * retry the peeking kstrgetmsg. 3560 * Note that the stream head read queue is 3561 * never flushed without holding SOREADLOCKED 3562 * thus the T_EXDATA_IND can not disappear 3563 * underneath us. 3564 */ 3565 dprintso(so, 1, 3566 ("sotpi_recvmsg: consume EXDATA_IND " 3567 "counts %d/%d state %s\n", 3568 sti->sti_oobsigcnt, 3569 sti->sti_oobcnt, 3570 pr_state(so->so_state, so->so_mode))); 3571 3572 pflag = MSG_ANY | MSG_DELAYERROR; 3573 if (so->so_mode & SM_ATOMIC) 3574 pflag |= MSG_DISCARDTAIL; 3575 3576 pri = 0; 3577 mp = NULL; 3578 3579 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3580 &pri, &pflag, (clock_t)-1, &rval); 3581 ASSERT(uiop->uio_resid == saved_resid); 3582 3583 if (error) { 3584 #ifdef SOCK_DEBUG 3585 if (error != EWOULDBLOCK && error != EINTR) { 3586 eprintsoline(so, error); 3587 } 3588 #endif /* SOCK_DEBUG */ 3589 goto out; 3590 } 3591 ASSERT(mp); 3592 tpr = (union T_primitives *)mp->b_rptr; 3593 ASSERT(tpr->type == T_EXDATA_IND); 3594 freemsg(mp); 3595 } /* end "if (flags & MSG_PEEK)" */ 3596 3597 /* 3598 * Decrement the number of queued and pending oob. 3599 * 3600 * SS_RCVATMARK is cleared when we read past a mark. 3601 * SS_HAVEOOBDATA is cleared when we've read past the 3602 * last mark. 3603 * SS_OOBPEND is cleared if we've read past the last 3604 * mark and no (new) SIGURG has been posted. 3605 */ 3606 mutex_enter(&so->so_lock); 3607 ASSERT(so_verify_oobstate(so)); 3608 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3609 ASSERT(sti->sti_oobsigcnt > 0); 3610 sti->sti_oobsigcnt--; 3611 ASSERT(sti->sti_oobcnt > 0); 3612 sti->sti_oobcnt--; 3613 /* 3614 * Since the T_EXDATA_IND has been removed from the stream 3615 * head, but we have not read data past the mark, 3616 * sockfs needs to track that the socket is still at the mark. 3617 * 3618 * Since no data was received call kstrgetmsg again to wait 3619 * for data. 3620 */ 3621 so->so_state |= SS_RCVATMARK; 3622 mutex_exit(&so->so_lock); 3623 dprintso(so, 1, 3624 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3625 sti->sti_oobsigcnt, sti->sti_oobcnt, 3626 pr_state(so->so_state, so->so_mode))); 3627 pflag = opflag; 3628 goto retry; 3629 } 3630 default: 3631 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3632 (void *)so, tpr->type, (void *)mp); 3633 ASSERT(0); 3634 freemsg(mp); 3635 error = EPROTO; 3636 eprintsoline(so, error); 3637 goto out; 3638 } 3639 /* NOTREACHED */ 3640 out: 3641 mutex_enter(&so->so_lock); 3642 out_locked: 3643 so_unlock_read(so); /* Clear SOREADLOCKED */ 3644 mutex_exit(&so->so_lock); 3645 return (error); 3646 } 3647 3648 /* 3649 * Sending data with options on a datagram socket. 3650 * Assumes caller has verified that SS_ISBOUND etc. are set. 3651 */ 3652 static int 3653 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3654 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3655 { 3656 struct T_unitdata_req tudr; 3657 mblk_t *mp; 3658 int error; 3659 void *addr; 3660 socklen_t addrlen; 3661 void *src; 3662 socklen_t srclen; 3663 ssize_t len; 3664 int size; 3665 struct T_opthdr toh; 3666 struct fdbuf *fdbuf; 3667 t_uscalar_t optlen; 3668 void *fds; 3669 int fdlen; 3670 sotpi_info_t *sti = SOTOTPI(so); 3671 3672 ASSERT(name && namelen); 3673 ASSERT(control && controllen); 3674 3675 len = uiop->uio_resid; 3676 if (len > (ssize_t)sti->sti_tidu_size) { 3677 return (EMSGSIZE); 3678 } 3679 3680 /* 3681 * For AF_UNIX the destination address is translated to an internal 3682 * name and the source address is passed as an option. 3683 * Also, file descriptors are passed as file pointers in an 3684 * option. 3685 */ 3686 3687 /* 3688 * Length and family checks. 3689 */ 3690 error = so_addr_verify(so, name, namelen); 3691 if (error) { 3692 eprintsoline(so, error); 3693 return (error); 3694 } 3695 if (so->so_family == AF_UNIX) { 3696 if (sti->sti_faddr_noxlate) { 3697 /* 3698 * Already have a transport internal address. Do not 3699 * pass any (transport internal) source address. 3700 */ 3701 addr = name; 3702 addrlen = namelen; 3703 src = NULL; 3704 srclen = 0; 3705 } else { 3706 /* 3707 * Pass the sockaddr_un source address as an option 3708 * and translate the remote address. 3709 * 3710 * Note that this code does not prevent sti_laddr_sa 3711 * from changing while it is being used. Thus 3712 * if an unbind+bind occurs concurrently with this 3713 * send the peer might see a partially new and a 3714 * partially old "from" address. 3715 */ 3716 src = sti->sti_laddr_sa; 3717 srclen = (t_uscalar_t)sti->sti_laddr_len; 3718 dprintso(so, 1, 3719 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3720 srclen, src)); 3721 error = so_ux_addr_xlate(so, name, namelen, 3722 (flags & MSG_XPG4_2), 3723 &addr, &addrlen); 3724 if (error) { 3725 eprintsoline(so, error); 3726 return (error); 3727 } 3728 } 3729 } else { 3730 addr = name; 3731 addrlen = namelen; 3732 src = NULL; 3733 srclen = 0; 3734 } 3735 optlen = so_optlen(control, controllen, 3736 !(flags & MSG_XPG4_2)); 3737 tudr.PRIM_type = T_UNITDATA_REQ; 3738 tudr.DEST_length = addrlen; 3739 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3740 if (srclen != 0) 3741 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3742 _TPI_ALIGN_TOPT(srclen)); 3743 else 3744 tudr.OPT_length = optlen; 3745 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3746 _TPI_ALIGN_TOPT(addrlen)); 3747 3748 size = tudr.OPT_offset + tudr.OPT_length; 3749 3750 /* 3751 * File descriptors only when SM_FDPASSING set. 3752 */ 3753 error = so_getfdopt(control, controllen, 3754 !(flags & MSG_XPG4_2), &fds, &fdlen); 3755 if (error) 3756 return (error); 3757 if (fdlen != -1) { 3758 if (!(so->so_mode & SM_FDPASSING)) 3759 return (EOPNOTSUPP); 3760 3761 error = fdbuf_create(fds, fdlen, &fdbuf); 3762 if (error) 3763 return (error); 3764 3765 /* 3766 * Pre-allocate enough additional space for lower level modules 3767 * to append an option (e.g. see tl_unitdata). The following 3768 * is enough extra space for the largest option we might append. 3769 */ 3770 size += sizeof (struct T_opthdr) + ucredsize; 3771 mp = fdbuf_allocmsg(size, fdbuf); 3772 } else { 3773 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3774 if (mp == NULL) { 3775 /* 3776 * Caught a signal waiting for memory. 3777 * Let send* return EINTR. 3778 */ 3779 return (EINTR); 3780 } 3781 } 3782 soappendmsg(mp, &tudr, sizeof (tudr)); 3783 soappendmsg(mp, addr, addrlen); 3784 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3785 3786 if (fdlen != -1) { 3787 ASSERT(fdbuf != NULL); 3788 toh.level = SOL_SOCKET; 3789 toh.name = SO_FILEP; 3790 toh.len = fdbuf->fd_size + 3791 (t_uscalar_t)sizeof (struct T_opthdr); 3792 toh.status = 0; 3793 soappendmsg(mp, &toh, sizeof (toh)); 3794 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3795 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3796 } 3797 if (srclen != 0) { 3798 /* 3799 * There is a AF_UNIX sockaddr_un to include as a source 3800 * address option. 3801 */ 3802 toh.level = SOL_SOCKET; 3803 toh.name = SO_SRCADDR; 3804 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3805 toh.status = 0; 3806 soappendmsg(mp, &toh, sizeof (toh)); 3807 soappendmsg(mp, src, srclen); 3808 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3809 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3810 } 3811 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3812 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3813 /* 3814 * Normally at most 3 bytes left in the message, but we might have 3815 * allowed for extra space if we're passing fd's through. 3816 */ 3817 ASSERT(MBLKL(mp) <= (ssize_t)size); 3818 3819 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3820 if (AU_AUDITING()) 3821 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3822 3823 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3824 #ifdef SOCK_DEBUG 3825 if (error) { 3826 eprintsoline(so, error); 3827 } 3828 #endif /* SOCK_DEBUG */ 3829 return (error); 3830 } 3831 3832 /* 3833 * Sending data with options on a connected stream socket. 3834 * Assumes caller has verified that SS_ISCONNECTED is set. 3835 */ 3836 static int 3837 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3838 t_uscalar_t controllen, int flags) 3839 { 3840 struct T_optdata_req tdr; 3841 mblk_t *mp; 3842 int error; 3843 ssize_t iosize; 3844 int size; 3845 struct fdbuf *fdbuf; 3846 t_uscalar_t optlen; 3847 void *fds; 3848 int fdlen; 3849 struct T_opthdr toh; 3850 sotpi_info_t *sti = SOTOTPI(so); 3851 3852 dprintso(so, 1, 3853 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3854 3855 /* 3856 * Has to be bound and connected. However, since no locks are 3857 * held the state could have changed after sotpi_sendmsg checked it 3858 * thus it is not possible to ASSERT on the state. 3859 */ 3860 3861 /* Options on connection-oriented only when SM_OPTDATA set. */ 3862 if (!(so->so_mode & SM_OPTDATA)) 3863 return (EOPNOTSUPP); 3864 3865 do { 3866 /* 3867 * Set the MORE flag if uio_resid does not fit in this 3868 * message or if the caller passed in "more". 3869 * Error for transports with zero tidu_size. 3870 */ 3871 tdr.PRIM_type = T_OPTDATA_REQ; 3872 iosize = sti->sti_tidu_size; 3873 if (iosize <= 0) 3874 return (EMSGSIZE); 3875 if (uiop->uio_resid > iosize) { 3876 tdr.DATA_flag = 1; 3877 } else { 3878 if (more) 3879 tdr.DATA_flag = 1; 3880 else 3881 tdr.DATA_flag = 0; 3882 iosize = uiop->uio_resid; 3883 } 3884 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3885 tdr.DATA_flag, iosize)); 3886 3887 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3888 tdr.OPT_length = optlen; 3889 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3890 3891 size = (int)sizeof (tdr) + optlen; 3892 /* 3893 * File descriptors only when SM_FDPASSING set. 3894 */ 3895 error = so_getfdopt(control, controllen, 3896 !(flags & MSG_XPG4_2), &fds, &fdlen); 3897 if (error) 3898 return (error); 3899 if (fdlen != -1) { 3900 if (!(so->so_mode & SM_FDPASSING)) 3901 return (EOPNOTSUPP); 3902 3903 error = fdbuf_create(fds, fdlen, &fdbuf); 3904 if (error) 3905 return (error); 3906 3907 /* 3908 * Pre-allocate enough additional space for lower level 3909 * modules to append an option (e.g. see tl_unitdata). 3910 * The following is enough extra space for the largest 3911 * option we might append. 3912 */ 3913 size += sizeof (struct T_opthdr) + ucredsize; 3914 mp = fdbuf_allocmsg(size, fdbuf); 3915 } else { 3916 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3917 if (mp == NULL) { 3918 /* 3919 * Caught a signal waiting for memory. 3920 * Let send* return EINTR. 3921 */ 3922 return (EINTR); 3923 } 3924 } 3925 soappendmsg(mp, &tdr, sizeof (tdr)); 3926 3927 if (fdlen != -1) { 3928 ASSERT(fdbuf != NULL); 3929 toh.level = SOL_SOCKET; 3930 toh.name = SO_FILEP; 3931 toh.len = fdbuf->fd_size + 3932 (t_uscalar_t)sizeof (struct T_opthdr); 3933 toh.status = 0; 3934 soappendmsg(mp, &toh, sizeof (toh)); 3935 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3936 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3937 } 3938 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3939 /* 3940 * Normally at most 3 bytes left in the message, but we might 3941 * have allowed for extra space if we're passing fd's through. 3942 */ 3943 ASSERT(MBLKL(mp) <= (ssize_t)size); 3944 3945 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3946 3947 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3948 0, MSG_BAND, 0); 3949 if (error) { 3950 eprintsoline(so, error); 3951 return (error); 3952 } 3953 control = NULL; 3954 if (uiop->uio_resid > 0) { 3955 /* 3956 * Recheck for fatal errors. Fail write even though 3957 * some data have been written. This is consistent 3958 * with strwrite semantics and BSD sockets semantics. 3959 */ 3960 if (so->so_state & SS_CANTSENDMORE) { 3961 eprintsoline(so, error); 3962 return (EPIPE); 3963 } 3964 if (so->so_error != 0) { 3965 mutex_enter(&so->so_lock); 3966 error = sogeterr(so, B_TRUE); 3967 mutex_exit(&so->so_lock); 3968 if (error != 0) { 3969 eprintsoline(so, error); 3970 return (error); 3971 } 3972 } 3973 } 3974 } while (uiop->uio_resid > 0); 3975 return (0); 3976 } 3977 3978 /* 3979 * Sending data on a datagram socket. 3980 * Assumes caller has verified that SS_ISBOUND etc. are set. 3981 * 3982 * For AF_UNIX the destination address is translated to an internal 3983 * name and the source address is passed as an option. 3984 */ 3985 int 3986 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3987 struct uio *uiop, int flags) 3988 { 3989 struct T_unitdata_req tudr; 3990 mblk_t *mp; 3991 int error; 3992 void *addr; 3993 socklen_t addrlen; 3994 void *src; 3995 socklen_t srclen; 3996 ssize_t len; 3997 sotpi_info_t *sti = SOTOTPI(so); 3998 3999 ASSERT(name != NULL && namelen != 0); 4000 4001 len = uiop->uio_resid; 4002 if (len > sti->sti_tidu_size) { 4003 error = EMSGSIZE; 4004 goto done; 4005 } 4006 4007 /* Length and family checks */ 4008 error = so_addr_verify(so, name, namelen); 4009 if (error != 0) 4010 goto done; 4011 4012 if (sti->sti_direct) 4013 return (sodgram_direct(so, name, namelen, uiop, flags)); 4014 4015 if (so->so_family == AF_UNIX) { 4016 if (sti->sti_faddr_noxlate) { 4017 /* 4018 * Already have a transport internal address. Do not 4019 * pass any (transport internal) source address. 4020 */ 4021 addr = name; 4022 addrlen = namelen; 4023 src = NULL; 4024 srclen = 0; 4025 } else { 4026 /* 4027 * Pass the sockaddr_un source address as an option 4028 * and translate the remote address. 4029 * 4030 * Note that this code does not prevent sti_laddr_sa 4031 * from changing while it is being used. Thus 4032 * if an unbind+bind occurs concurrently with this 4033 * send the peer might see a partially new and a 4034 * partially old "from" address. 4035 */ 4036 src = sti->sti_laddr_sa; 4037 srclen = (socklen_t)sti->sti_laddr_len; 4038 dprintso(so, 1, 4039 ("sosend_dgram UNIX: srclen %d, src %p\n", 4040 srclen, src)); 4041 error = so_ux_addr_xlate(so, name, namelen, 4042 (flags & MSG_XPG4_2), 4043 &addr, &addrlen); 4044 if (error) { 4045 eprintsoline(so, error); 4046 goto done; 4047 } 4048 } 4049 } else { 4050 addr = name; 4051 addrlen = namelen; 4052 src = NULL; 4053 srclen = 0; 4054 } 4055 tudr.PRIM_type = T_UNITDATA_REQ; 4056 tudr.DEST_length = addrlen; 4057 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4058 if (srclen == 0) { 4059 tudr.OPT_length = 0; 4060 tudr.OPT_offset = 0; 4061 4062 mp = soallocproto2(&tudr, sizeof (tudr), 4063 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4064 if (mp == NULL) { 4065 /* 4066 * Caught a signal waiting for memory. 4067 * Let send* return EINTR. 4068 */ 4069 error = EINTR; 4070 goto done; 4071 } 4072 } else { 4073 /* 4074 * There is a AF_UNIX sockaddr_un to include as a source 4075 * address option. 4076 */ 4077 struct T_opthdr toh; 4078 ssize_t size; 4079 4080 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4081 _TPI_ALIGN_TOPT(srclen)); 4082 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4083 _TPI_ALIGN_TOPT(addrlen)); 4084 4085 toh.level = SOL_SOCKET; 4086 toh.name = SO_SRCADDR; 4087 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4088 toh.status = 0; 4089 4090 size = tudr.OPT_offset + tudr.OPT_length; 4091 mp = soallocproto2(&tudr, sizeof (tudr), 4092 addr, addrlen, size, _ALLOC_INTR, CRED()); 4093 if (mp == NULL) { 4094 /* 4095 * Caught a signal waiting for memory. 4096 * Let send* return EINTR. 4097 */ 4098 error = EINTR; 4099 goto done; 4100 } 4101 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4102 soappendmsg(mp, &toh, sizeof (toh)); 4103 soappendmsg(mp, src, srclen); 4104 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4105 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4106 } 4107 4108 if (AU_AUDITING()) 4109 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4110 4111 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4112 done: 4113 #ifdef SOCK_DEBUG 4114 if (error) { 4115 eprintsoline(so, error); 4116 } 4117 #endif /* SOCK_DEBUG */ 4118 return (error); 4119 } 4120 4121 /* 4122 * Sending data on a connected stream socket. 4123 * Assumes caller has verified that SS_ISCONNECTED is set. 4124 */ 4125 int 4126 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4127 int sflag) 4128 { 4129 struct T_data_req tdr; 4130 mblk_t *mp; 4131 int error; 4132 ssize_t iosize; 4133 sotpi_info_t *sti = SOTOTPI(so); 4134 4135 dprintso(so, 1, 4136 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4137 (void *)so, uiop->uio_resid, prim, sflag)); 4138 4139 /* 4140 * Has to be bound and connected. However, since no locks are 4141 * held the state could have changed after sotpi_sendmsg checked it 4142 * thus it is not possible to ASSERT on the state. 4143 */ 4144 4145 do { 4146 /* 4147 * Set the MORE flag if uio_resid does not fit in this 4148 * message or if the caller passed in "more". 4149 * Error for transports with zero tidu_size. 4150 */ 4151 tdr.PRIM_type = prim; 4152 iosize = sti->sti_tidu_size; 4153 if (iosize <= 0) 4154 return (EMSGSIZE); 4155 if (uiop->uio_resid > iosize) { 4156 tdr.MORE_flag = 1; 4157 } else { 4158 if (more) 4159 tdr.MORE_flag = 1; 4160 else 4161 tdr.MORE_flag = 0; 4162 iosize = uiop->uio_resid; 4163 } 4164 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4165 prim, tdr.MORE_flag, iosize)); 4166 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4167 if (mp == NULL) { 4168 /* 4169 * Caught a signal waiting for memory. 4170 * Let send* return EINTR. 4171 */ 4172 return (EINTR); 4173 } 4174 4175 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4176 0, sflag | MSG_BAND, 0); 4177 if (error) { 4178 eprintsoline(so, error); 4179 return (error); 4180 } 4181 if (uiop->uio_resid > 0) { 4182 /* 4183 * Recheck for fatal errors. Fail write even though 4184 * some data have been written. This is consistent 4185 * with strwrite semantics and BSD sockets semantics. 4186 */ 4187 if (so->so_state & SS_CANTSENDMORE) { 4188 eprintsoline(so, error); 4189 return (EPIPE); 4190 } 4191 if (so->so_error != 0) { 4192 mutex_enter(&so->so_lock); 4193 error = sogeterr(so, B_TRUE); 4194 mutex_exit(&so->so_lock); 4195 if (error != 0) { 4196 eprintsoline(so, error); 4197 return (error); 4198 } 4199 } 4200 } 4201 } while (uiop->uio_resid > 0); 4202 return (0); 4203 } 4204 4205 /* 4206 * Check the state for errors and call the appropriate send function. 4207 * 4208 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4209 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4210 * after sending the message. 4211 */ 4212 static int 4213 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4214 struct cred *cr) 4215 { 4216 int so_state; 4217 int so_mode; 4218 int error; 4219 struct sockaddr *name; 4220 t_uscalar_t namelen; 4221 int dontroute; 4222 int flags; 4223 sotpi_info_t *sti = SOTOTPI(so); 4224 4225 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4226 (void *)so, (void *)msg, msg->msg_flags, 4227 pr_state(so->so_state, so->so_mode), so->so_error)); 4228 4229 if (so->so_version == SOV_STREAM) { 4230 /* The imaginary "sockmod" has been popped - act as a stream */ 4231 so_update_attrs(so, SOMOD); 4232 return (strwrite(SOTOV(so), uiop, cr)); 4233 } 4234 4235 mutex_enter(&so->so_lock); 4236 so_state = so->so_state; 4237 4238 if (so_state & SS_CANTSENDMORE) { 4239 mutex_exit(&so->so_lock); 4240 return (EPIPE); 4241 } 4242 4243 if (so->so_error != 0) { 4244 error = sogeterr(so, B_TRUE); 4245 if (error != 0) { 4246 mutex_exit(&so->so_lock); 4247 return (error); 4248 } 4249 } 4250 4251 name = (struct sockaddr *)msg->msg_name; 4252 namelen = msg->msg_namelen; 4253 4254 so_mode = so->so_mode; 4255 4256 if (name == NULL) { 4257 if (!(so_state & SS_ISCONNECTED)) { 4258 mutex_exit(&so->so_lock); 4259 if (so_mode & SM_CONNREQUIRED) 4260 return (ENOTCONN); 4261 else 4262 return (EDESTADDRREQ); 4263 } 4264 if (so_mode & SM_CONNREQUIRED) { 4265 name = NULL; 4266 namelen = 0; 4267 } else { 4268 /* 4269 * Note that this code does not prevent sti_faddr_sa 4270 * from changing while it is being used. Thus 4271 * if an "unconnect"+connect occurs concurrently with 4272 * this send the datagram might be delivered to a 4273 * garbaled address. 4274 */ 4275 ASSERT(sti->sti_faddr_sa); 4276 name = sti->sti_faddr_sa; 4277 namelen = (t_uscalar_t)sti->sti_faddr_len; 4278 } 4279 } else { 4280 if (!(so_state & SS_ISCONNECTED) && 4281 (so_mode & SM_CONNREQUIRED)) { 4282 /* Required but not connected */ 4283 mutex_exit(&so->so_lock); 4284 return (ENOTCONN); 4285 } 4286 /* 4287 * Ignore the address on connection-oriented sockets. 4288 * Just like BSD this code does not generate an error for 4289 * TCP (a CONNREQUIRED socket) when sending to an address 4290 * passed in with sendto/sendmsg. Instead the data is 4291 * delivered on the connection as if no address had been 4292 * supplied. 4293 */ 4294 if ((so_state & SS_ISCONNECTED) && 4295 !(so_mode & SM_CONNREQUIRED)) { 4296 mutex_exit(&so->so_lock); 4297 return (EISCONN); 4298 } 4299 if (!(so_state & SS_ISBOUND)) { 4300 so_lock_single(so); /* Set SOLOCKED */ 4301 error = sotpi_bind(so, NULL, 0, 4302 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4303 so_unlock_single(so, SOLOCKED); 4304 if (error) { 4305 mutex_exit(&so->so_lock); 4306 eprintsoline(so, error); 4307 return (error); 4308 } 4309 } 4310 /* 4311 * Handle delayed datagram errors. These are only queued 4312 * when the application sets SO_DGRAM_ERRIND. 4313 * Return the error if we are sending to the address 4314 * that was returned in the last T_UDERROR_IND. 4315 * If sending to some other address discard the delayed 4316 * error indication. 4317 */ 4318 if (sti->sti_delayed_error) { 4319 struct T_uderror_ind *tudi; 4320 void *addr; 4321 t_uscalar_t addrlen; 4322 boolean_t match = B_FALSE; 4323 4324 ASSERT(sti->sti_eaddr_mp); 4325 error = sti->sti_delayed_error; 4326 sti->sti_delayed_error = 0; 4327 tudi = 4328 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4329 addrlen = tudi->DEST_length; 4330 addr = sogetoff(sti->sti_eaddr_mp, 4331 tudi->DEST_offset, addrlen, 1); 4332 ASSERT(addr); /* Checked by strsock_proto */ 4333 switch (so->so_family) { 4334 case AF_INET: { 4335 /* Compare just IP address and port */ 4336 sin_t *sin1 = (sin_t *)name; 4337 sin_t *sin2 = (sin_t *)addr; 4338 4339 if (addrlen == sizeof (sin_t) && 4340 namelen == addrlen && 4341 sin1->sin_port == sin2->sin_port && 4342 sin1->sin_addr.s_addr == 4343 sin2->sin_addr.s_addr) 4344 match = B_TRUE; 4345 break; 4346 } 4347 case AF_INET6: { 4348 /* Compare just IP address and port. Not flow */ 4349 sin6_t *sin1 = (sin6_t *)name; 4350 sin6_t *sin2 = (sin6_t *)addr; 4351 4352 if (addrlen == sizeof (sin6_t) && 4353 namelen == addrlen && 4354 sin1->sin6_port == sin2->sin6_port && 4355 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4356 &sin2->sin6_addr)) 4357 match = B_TRUE; 4358 break; 4359 } 4360 case AF_UNIX: 4361 default: 4362 if (namelen == addrlen && 4363 bcmp(name, addr, namelen) == 0) 4364 match = B_TRUE; 4365 } 4366 if (match) { 4367 freemsg(sti->sti_eaddr_mp); 4368 sti->sti_eaddr_mp = NULL; 4369 mutex_exit(&so->so_lock); 4370 #ifdef DEBUG 4371 dprintso(so, 0, 4372 ("sockfs delayed error %d for %s\n", 4373 error, 4374 pr_addr(so->so_family, name, namelen))); 4375 #endif /* DEBUG */ 4376 return (error); 4377 } 4378 freemsg(sti->sti_eaddr_mp); 4379 sti->sti_eaddr_mp = NULL; 4380 } 4381 } 4382 mutex_exit(&so->so_lock); 4383 4384 flags = msg->msg_flags; 4385 dontroute = 0; 4386 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4387 uint32_t val; 4388 4389 val = 1; 4390 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4391 &val, (t_uscalar_t)sizeof (val), cr); 4392 if (error) 4393 return (error); 4394 dontroute = 1; 4395 } 4396 4397 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4398 error = EOPNOTSUPP; 4399 goto done; 4400 } 4401 if (msg->msg_controllen != 0) { 4402 if (!(so_mode & SM_CONNREQUIRED)) { 4403 so_update_attrs(so, SOMOD); 4404 error = sosend_dgramcmsg(so, name, namelen, uiop, 4405 msg->msg_control, msg->msg_controllen, flags); 4406 } else { 4407 if (flags & MSG_OOB) { 4408 /* Can't generate T_EXDATA_REQ with options */ 4409 error = EOPNOTSUPP; 4410 goto done; 4411 } 4412 so_update_attrs(so, SOMOD); 4413 error = sosend_svccmsg(so, uiop, 4414 !(flags & MSG_EOR), 4415 msg->msg_control, msg->msg_controllen, 4416 flags); 4417 } 4418 goto done; 4419 } 4420 4421 so_update_attrs(so, SOMOD); 4422 if (!(so_mode & SM_CONNREQUIRED)) { 4423 /* 4424 * If there is no SO_DONTROUTE to turn off return immediately 4425 * from send_dgram. This can allow tail-call optimizations. 4426 */ 4427 if (!dontroute) { 4428 return (sosend_dgram(so, name, namelen, uiop, flags)); 4429 } 4430 error = sosend_dgram(so, name, namelen, uiop, flags); 4431 } else { 4432 t_scalar_t prim; 4433 int sflag; 4434 4435 /* Ignore msg_name in the connected state */ 4436 if (flags & MSG_OOB) { 4437 prim = T_EXDATA_REQ; 4438 /* 4439 * Send down T_EXDATA_REQ even if there is flow 4440 * control for data. 4441 */ 4442 sflag = MSG_IGNFLOW; 4443 } else { 4444 if (so_mode & SM_BYTESTREAM) { 4445 /* Byte stream transport - use write */ 4446 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4447 4448 /* Send M_DATA messages */ 4449 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4450 (error = nl7c_data(so, uiop)) >= 0) { 4451 /* NL7C consumed the data */ 4452 return (error); 4453 } 4454 /* 4455 * If there is no SO_DONTROUTE to turn off, 4456 * sti_direct is on, and there is no flow 4457 * control, we can take the fast path. 4458 */ 4459 if (!dontroute && sti->sti_direct != 0 && 4460 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4461 return (sostream_direct(so, uiop, 4462 NULL, cr)); 4463 } 4464 error = strwrite(SOTOV(so), uiop, cr); 4465 goto done; 4466 } 4467 prim = T_DATA_REQ; 4468 sflag = 0; 4469 } 4470 /* 4471 * If there is no SO_DONTROUTE to turn off return immediately 4472 * from sosend_svc. This can allow tail-call optimizations. 4473 */ 4474 if (!dontroute) 4475 return (sosend_svc(so, uiop, prim, 4476 !(flags & MSG_EOR), sflag)); 4477 error = sosend_svc(so, uiop, prim, 4478 !(flags & MSG_EOR), sflag); 4479 } 4480 ASSERT(dontroute); 4481 done: 4482 if (dontroute) { 4483 uint32_t val; 4484 4485 val = 0; 4486 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4487 &val, (t_uscalar_t)sizeof (val), cr); 4488 } 4489 return (error); 4490 } 4491 4492 /* 4493 * kstrwritemp() has very similar semantics as that of strwrite(). 4494 * The main difference is it obtains mblks from the caller and also 4495 * does not do any copy as done in strwrite() from user buffers to 4496 * kernel buffers. 4497 * 4498 * Currently, this routine is used by sendfile to send data allocated 4499 * within the kernel without any copying. This interface does not use the 4500 * synchronous stream interface as synch. stream interface implies 4501 * copying. 4502 */ 4503 int 4504 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4505 { 4506 struct stdata *stp; 4507 struct queue *wqp; 4508 mblk_t *newmp; 4509 char waitflag; 4510 int tempmode; 4511 int error = 0; 4512 int done = 0; 4513 struct sonode *so; 4514 boolean_t direct; 4515 4516 ASSERT(vp->v_stream); 4517 stp = vp->v_stream; 4518 4519 so = VTOSO(vp); 4520 direct = _SOTOTPI(so)->sti_direct; 4521 4522 /* 4523 * This is the sockfs direct fast path. canputnext() need 4524 * not be accurate so we don't grab the sd_lock here. If 4525 * we get flow-controlled, we grab sd_lock just before the 4526 * do..while loop below to emulate what strwrite() does. 4527 */ 4528 wqp = stp->sd_wrq; 4529 if (canputnext(wqp) && direct && 4530 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4531 return (sostream_direct(so, NULL, mp, CRED())); 4532 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4533 /* Fast check of flags before acquiring the lock */ 4534 mutex_enter(&stp->sd_lock); 4535 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4536 mutex_exit(&stp->sd_lock); 4537 if (error != 0) { 4538 if (!(stp->sd_flag & STPLEX) && 4539 (stp->sd_wput_opt & SW_SIGPIPE)) { 4540 error = EPIPE; 4541 } 4542 return (error); 4543 } 4544 } 4545 4546 waitflag = WRITEWAIT; 4547 if (stp->sd_flag & OLDNDELAY) 4548 tempmode = fmode & ~FNDELAY; 4549 else 4550 tempmode = fmode; 4551 4552 mutex_enter(&stp->sd_lock); 4553 do { 4554 if (canputnext(wqp)) { 4555 mutex_exit(&stp->sd_lock); 4556 if (stp->sd_wputdatafunc != NULL) { 4557 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4558 NULL, NULL, NULL); 4559 if (newmp == NULL) { 4560 /* The caller will free mp */ 4561 return (ECOMM); 4562 } 4563 mp = newmp; 4564 } 4565 putnext(wqp, mp); 4566 return (0); 4567 } 4568 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4569 &done); 4570 } while (error == 0 && !done); 4571 4572 mutex_exit(&stp->sd_lock); 4573 /* 4574 * EAGAIN tells the application to try again. ENOMEM 4575 * is returned only if the memory allocation size 4576 * exceeds the physical limits of the system. ENOMEM 4577 * can't be true here. 4578 */ 4579 if (error == ENOMEM) 4580 error = EAGAIN; 4581 return (error); 4582 } 4583 4584 /* ARGSUSED */ 4585 static int 4586 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4587 struct cred *cr, mblk_t **mpp) 4588 { 4589 int error; 4590 4591 switch (so->so_family) { 4592 case AF_INET: 4593 case AF_INET6: 4594 case AF_UNIX: 4595 break; 4596 default: 4597 return (EAFNOSUPPORT); 4598 4599 } 4600 4601 if (so->so_state & SS_CANTSENDMORE) 4602 return (EPIPE); 4603 4604 if (so->so_type != SOCK_STREAM) 4605 return (EOPNOTSUPP); 4606 4607 if ((so->so_state & SS_ISCONNECTED) == 0) 4608 return (ENOTCONN); 4609 4610 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4611 if (error == 0) 4612 *mpp = NULL; 4613 return (error); 4614 } 4615 4616 /* 4617 * Sending data on a datagram socket. 4618 * Assumes caller has verified that SS_ISBOUND etc. are set. 4619 */ 4620 /* ARGSUSED */ 4621 static int 4622 sodgram_direct(struct sonode *so, struct sockaddr *name, 4623 socklen_t namelen, struct uio *uiop, int flags) 4624 { 4625 struct T_unitdata_req tudr; 4626 mblk_t *mp = NULL; 4627 int error = 0; 4628 void *addr; 4629 socklen_t addrlen; 4630 ssize_t len; 4631 struct stdata *stp = SOTOV(so)->v_stream; 4632 int so_state; 4633 queue_t *udp_wq; 4634 boolean_t connected; 4635 mblk_t *mpdata = NULL; 4636 sotpi_info_t *sti = SOTOTPI(so); 4637 uint32_t auditing = AU_AUDITING(); 4638 4639 ASSERT(name != NULL && namelen != 0); 4640 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4641 ASSERT(!(so->so_mode & SM_EXDATA)); 4642 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4643 ASSERT(SOTOV(so)->v_type == VSOCK); 4644 4645 /* Caller checked for proper length */ 4646 len = uiop->uio_resid; 4647 ASSERT(len <= sti->sti_tidu_size); 4648 4649 /* Length and family checks have been done by caller */ 4650 ASSERT(name->sa_family == so->so_family); 4651 ASSERT(so->so_family == AF_INET || 4652 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4653 ASSERT(so->so_family == AF_INET6 || 4654 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4655 4656 addr = name; 4657 addrlen = namelen; 4658 4659 if (stp->sd_sidp != NULL && 4660 (error = straccess(stp, JCWRITE)) != 0) 4661 goto done; 4662 4663 so_state = so->so_state; 4664 4665 connected = so_state & SS_ISCONNECTED; 4666 if (!connected) { 4667 tudr.PRIM_type = T_UNITDATA_REQ; 4668 tudr.DEST_length = addrlen; 4669 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4670 tudr.OPT_length = 0; 4671 tudr.OPT_offset = 0; 4672 4673 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4674 _ALLOC_INTR, CRED()); 4675 if (mp == NULL) { 4676 /* 4677 * Caught a signal waiting for memory. 4678 * Let send* return EINTR. 4679 */ 4680 error = EINTR; 4681 goto done; 4682 } 4683 } 4684 4685 /* 4686 * For UDP we don't break up the copyin into smaller pieces 4687 * as in the TCP case. That means if ENOMEM is returned by 4688 * mcopyinuio() then the uio vector has not been modified at 4689 * all and we fallback to either strwrite() or kstrputmsg() 4690 * below. Note also that we never generate priority messages 4691 * from here. 4692 */ 4693 udp_wq = stp->sd_wrq->q_next; 4694 if (canput(udp_wq) && 4695 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4696 ASSERT(DB_TYPE(mpdata) == M_DATA); 4697 ASSERT(uiop->uio_resid == 0); 4698 if (!connected) 4699 linkb(mp, mpdata); 4700 else 4701 mp = mpdata; 4702 if (auditing) 4703 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4704 4705 udp_wput(udp_wq, mp); 4706 return (0); 4707 } 4708 4709 ASSERT(mpdata == NULL); 4710 if (error != 0 && error != ENOMEM) { 4711 freemsg(mp); 4712 return (error); 4713 } 4714 4715 /* 4716 * For connected, let strwrite() handle the blocking case. 4717 * Otherwise we fall thru and use kstrputmsg(). 4718 */ 4719 if (connected) 4720 return (strwrite(SOTOV(so), uiop, CRED())); 4721 4722 if (auditing) 4723 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4724 4725 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4726 done: 4727 #ifdef SOCK_DEBUG 4728 if (error != 0) { 4729 eprintsoline(so, error); 4730 } 4731 #endif /* SOCK_DEBUG */ 4732 return (error); 4733 } 4734 4735 int 4736 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4737 { 4738 struct stdata *stp = SOTOV(so)->v_stream; 4739 ssize_t iosize, rmax, maxblk; 4740 queue_t *tcp_wq = stp->sd_wrq->q_next; 4741 mblk_t *newmp; 4742 int error = 0, wflag = 0; 4743 4744 ASSERT(so->so_mode & SM_BYTESTREAM); 4745 ASSERT(SOTOV(so)->v_type == VSOCK); 4746 4747 if (stp->sd_sidp != NULL && 4748 (error = straccess(stp, JCWRITE)) != 0) 4749 return (error); 4750 4751 if (uiop == NULL) { 4752 /* 4753 * kstrwritemp() should have checked sd_flag and 4754 * flow-control before coming here. If we end up 4755 * here it means that we can simply pass down the 4756 * data to tcp. 4757 */ 4758 ASSERT(mp != NULL); 4759 if (stp->sd_wputdatafunc != NULL) { 4760 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4761 NULL, NULL, NULL); 4762 if (newmp == NULL) { 4763 /* The caller will free mp */ 4764 return (ECOMM); 4765 } 4766 mp = newmp; 4767 } 4768 tcp_wput(tcp_wq, mp); 4769 return (0); 4770 } 4771 4772 /* Fallback to strwrite() to do proper error handling */ 4773 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4774 return (strwrite(SOTOV(so), uiop, cr)); 4775 4776 rmax = stp->sd_qn_maxpsz; 4777 ASSERT(rmax >= 0 || rmax == INFPSZ); 4778 if (rmax == 0 || uiop->uio_resid <= 0) 4779 return (0); 4780 4781 if (rmax == INFPSZ) 4782 rmax = uiop->uio_resid; 4783 4784 maxblk = stp->sd_maxblk; 4785 4786 for (;;) { 4787 iosize = MIN(uiop->uio_resid, rmax); 4788 4789 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4790 if (mp == NULL) { 4791 /* 4792 * Fallback to strwrite() for ENOMEM; if this 4793 * is our first time in this routine and the uio 4794 * vector has not been modified, we will end up 4795 * calling strwrite() without any flag set. 4796 */ 4797 if (error == ENOMEM) 4798 goto slow_send; 4799 else 4800 return (error); 4801 } 4802 ASSERT(uiop->uio_resid >= 0); 4803 /* 4804 * If mp is non-NULL and ENOMEM is set, it means that 4805 * mcopyinuio() was able to break down some of the user 4806 * data into one or more mblks. Send the partial data 4807 * to tcp and let the rest be handled in strwrite(). 4808 */ 4809 ASSERT(error == 0 || error == ENOMEM); 4810 if (stp->sd_wputdatafunc != NULL) { 4811 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4812 NULL, NULL, NULL); 4813 if (newmp == NULL) { 4814 /* The caller will free mp */ 4815 return (ECOMM); 4816 } 4817 mp = newmp; 4818 } 4819 tcp_wput(tcp_wq, mp); 4820 4821 wflag |= NOINTR; 4822 4823 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4824 ASSERT(error == 0); 4825 break; 4826 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4827 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4828 slow_send: 4829 /* 4830 * We were able to send down partial data using 4831 * the direct call interface, but are now relying 4832 * on strwrite() to handle the non-fastpath cases. 4833 * If the socket is blocking we will sleep in 4834 * strwaitq() until write is permitted, otherwise, 4835 * we will need to return the amount of bytes 4836 * written so far back to the app. This is the 4837 * reason why we pass NOINTR flag to strwrite() 4838 * for non-blocking socket, because we don't want 4839 * to return EAGAIN when portion of the user data 4840 * has actually been sent down. 4841 */ 4842 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4843 } 4844 } 4845 return (0); 4846 } 4847 4848 /* 4849 * Update sti_faddr by asking the transport (unless AF_UNIX). 4850 */ 4851 /* ARGSUSED */ 4852 int 4853 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4854 boolean_t accept, struct cred *cr) 4855 { 4856 struct strbuf strbuf; 4857 int error = 0, res; 4858 void *addr; 4859 t_uscalar_t addrlen; 4860 k_sigset_t smask; 4861 sotpi_info_t *sti = SOTOTPI(so); 4862 4863 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4864 (void *)so, pr_state(so->so_state, so->so_mode))); 4865 4866 ASSERT(*namelen > 0); 4867 mutex_enter(&so->so_lock); 4868 so_lock_single(so); /* Set SOLOCKED */ 4869 4870 if (accept) { 4871 bcopy(sti->sti_faddr_sa, name, 4872 MIN(*namelen, sti->sti_faddr_len)); 4873 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4874 goto done; 4875 } 4876 4877 if (!(so->so_state & SS_ISCONNECTED)) { 4878 error = ENOTCONN; 4879 goto done; 4880 } 4881 /* Added this check for X/Open */ 4882 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4883 error = EINVAL; 4884 if (xnet_check_print) { 4885 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4886 } 4887 goto done; 4888 } 4889 4890 if (sti->sti_faddr_valid) { 4891 bcopy(sti->sti_faddr_sa, name, 4892 MIN(*namelen, sti->sti_faddr_len)); 4893 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4894 goto done; 4895 } 4896 4897 #ifdef DEBUG 4898 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4899 pr_addr(so->so_family, sti->sti_faddr_sa, 4900 (t_uscalar_t)sti->sti_faddr_len))); 4901 #endif /* DEBUG */ 4902 4903 if (so->so_family == AF_UNIX) { 4904 /* Transport has different name space - return local info */ 4905 if (sti->sti_faddr_noxlate) 4906 *namelen = 0; 4907 error = 0; 4908 goto done; 4909 } 4910 4911 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4912 4913 ASSERT(sti->sti_faddr_sa); 4914 /* Allocate local buffer to use with ioctl */ 4915 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4916 mutex_exit(&so->so_lock); 4917 addr = kmem_alloc(addrlen, KM_SLEEP); 4918 4919 /* 4920 * Issue TI_GETPEERNAME with signals masked. 4921 * Put the result in sti_faddr_sa so that getpeername works after 4922 * a shutdown(output). 4923 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4924 * back to the socket. 4925 */ 4926 strbuf.buf = addr; 4927 strbuf.maxlen = addrlen; 4928 strbuf.len = 0; 4929 4930 sigintr(&smask, 0); 4931 res = 0; 4932 ASSERT(cr); 4933 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4934 0, K_TO_K, cr, &res); 4935 sigunintr(&smask); 4936 4937 mutex_enter(&so->so_lock); 4938 /* 4939 * If there is an error record the error in so_error put don't fail 4940 * the getpeername. Instead fallback on the recorded 4941 * sti->sti_faddr_sa. 4942 */ 4943 if (error) { 4944 /* 4945 * Various stream head errors can be returned to the ioctl. 4946 * However, it is impossible to determine which ones of 4947 * these are really socket level errors that were incorrectly 4948 * consumed by the ioctl. Thus this code silently ignores the 4949 * error - to code explicitly does not reinstate the error 4950 * using soseterror(). 4951 * Experiments have shows that at least this set of 4952 * errors are reported and should not be reinstated on the 4953 * socket: 4954 * EINVAL E.g. if an I_LINK was in effect when 4955 * getpeername was called. 4956 * EPIPE The ioctl error semantics prefer the write 4957 * side error over the read side error. 4958 * ENOTCONN The transport just got disconnected but 4959 * sockfs had not yet seen the T_DISCON_IND 4960 * when issuing the ioctl. 4961 */ 4962 error = 0; 4963 } else if (res == 0 && strbuf.len > 0 && 4964 (so->so_state & SS_ISCONNECTED)) { 4965 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4966 sti->sti_faddr_len = (socklen_t)strbuf.len; 4967 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4968 sti->sti_faddr_valid = 1; 4969 4970 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4971 *namelen = sti->sti_faddr_len; 4972 } 4973 kmem_free(addr, addrlen); 4974 #ifdef DEBUG 4975 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4976 pr_addr(so->so_family, sti->sti_faddr_sa, 4977 (t_uscalar_t)sti->sti_faddr_len))); 4978 #endif /* DEBUG */ 4979 done: 4980 so_unlock_single(so, SOLOCKED); 4981 mutex_exit(&so->so_lock); 4982 return (error); 4983 } 4984 4985 /* 4986 * Update sti_laddr by asking the transport (unless AF_UNIX). 4987 */ 4988 int 4989 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4990 struct cred *cr) 4991 { 4992 struct strbuf strbuf; 4993 int error = 0, res; 4994 void *addr; 4995 t_uscalar_t addrlen; 4996 k_sigset_t smask; 4997 sotpi_info_t *sti = SOTOTPI(so); 4998 4999 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 5000 (void *)so, pr_state(so->so_state, so->so_mode))); 5001 5002 ASSERT(*namelen > 0); 5003 mutex_enter(&so->so_lock); 5004 so_lock_single(so); /* Set SOLOCKED */ 5005 5006 #ifdef DEBUG 5007 5008 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 5009 pr_addr(so->so_family, sti->sti_laddr_sa, 5010 (t_uscalar_t)sti->sti_laddr_len))); 5011 #endif /* DEBUG */ 5012 if (sti->sti_laddr_valid) { 5013 bcopy(sti->sti_laddr_sa, name, 5014 MIN(*namelen, sti->sti_laddr_len)); 5015 *namelen = sti->sti_laddr_len; 5016 goto done; 5017 } 5018 5019 if (so->so_family == AF_UNIX) { 5020 /* 5021 * Transport has different name space - return local info. If we 5022 * have enough space, let consumers know the family. 5023 */ 5024 if (*namelen >= sizeof (sa_family_t)) { 5025 name->sa_family = AF_UNIX; 5026 *namelen = sizeof (sa_family_t); 5027 } else { 5028 *namelen = 0; 5029 } 5030 error = 0; 5031 goto done; 5032 } 5033 if (!(so->so_state & SS_ISBOUND)) { 5034 /* If not bound, then nothing to return. */ 5035 error = 0; 5036 goto done; 5037 } 5038 5039 /* Allocate local buffer to use with ioctl */ 5040 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5041 mutex_exit(&so->so_lock); 5042 addr = kmem_alloc(addrlen, KM_SLEEP); 5043 5044 /* 5045 * Issue TI_GETMYNAME with signals masked. 5046 * Put the result in sti_laddr_sa so that getsockname works after 5047 * a shutdown(output). 5048 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5049 * back to the socket. 5050 */ 5051 strbuf.buf = addr; 5052 strbuf.maxlen = addrlen; 5053 strbuf.len = 0; 5054 5055 sigintr(&smask, 0); 5056 res = 0; 5057 ASSERT(cr); 5058 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5059 0, K_TO_K, cr, &res); 5060 sigunintr(&smask); 5061 5062 mutex_enter(&so->so_lock); 5063 /* 5064 * If there is an error record the error in so_error put don't fail 5065 * the getsockname. Instead fallback on the recorded 5066 * sti->sti_laddr_sa. 5067 */ 5068 if (error) { 5069 /* 5070 * Various stream head errors can be returned to the ioctl. 5071 * However, it is impossible to determine which ones of 5072 * these are really socket level errors that were incorrectly 5073 * consumed by the ioctl. Thus this code silently ignores the 5074 * error - to code explicitly does not reinstate the error 5075 * using soseterror(). 5076 * Experiments have shows that at least this set of 5077 * errors are reported and should not be reinstated on the 5078 * socket: 5079 * EINVAL E.g. if an I_LINK was in effect when 5080 * getsockname was called. 5081 * EPIPE The ioctl error semantics prefer the write 5082 * side error over the read side error. 5083 */ 5084 error = 0; 5085 } else if (res == 0 && strbuf.len > 0 && 5086 (so->so_state & SS_ISBOUND)) { 5087 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5088 sti->sti_laddr_len = (socklen_t)strbuf.len; 5089 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5090 sti->sti_laddr_valid = 1; 5091 5092 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5093 *namelen = sti->sti_laddr_len; 5094 } 5095 kmem_free(addr, addrlen); 5096 #ifdef DEBUG 5097 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5098 pr_addr(so->so_family, sti->sti_laddr_sa, 5099 (t_uscalar_t)sti->sti_laddr_len))); 5100 #endif /* DEBUG */ 5101 done: 5102 so_unlock_single(so, SOLOCKED); 5103 mutex_exit(&so->so_lock); 5104 return (error); 5105 } 5106 5107 /* 5108 * Get socket options. For SOL_SOCKET options some options are handled 5109 * by the sockfs while others use the value recorded in the sonode as a 5110 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5111 * 5112 * On the return most *optlenp bytes are copied to optval. 5113 */ 5114 /* ARGSUSED */ 5115 int 5116 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5117 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5118 { 5119 struct T_optmgmt_req optmgmt_req; 5120 struct T_optmgmt_ack *optmgmt_ack; 5121 struct opthdr oh; 5122 struct opthdr *opt_res; 5123 mblk_t *mp = NULL; 5124 int error = 0; 5125 void *option = NULL; /* Set if fallback value */ 5126 t_uscalar_t maxlen = *optlenp; 5127 t_uscalar_t len; 5128 uint32_t value; 5129 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5130 struct timeval32 tmo_val32; 5131 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5132 5133 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5134 (void *)so, level, option_name, optval, (void *)optlenp, 5135 pr_state(so->so_state, so->so_mode))); 5136 5137 mutex_enter(&so->so_lock); 5138 so_lock_single(so); /* Set SOLOCKED */ 5139 5140 /* 5141 * Check for SOL_SOCKET options. 5142 * Certain SOL_SOCKET options are returned directly whereas 5143 * others only provide a default (fallback) value should 5144 * the T_SVR4_OPTMGMT_REQ fail. 5145 */ 5146 if (level == SOL_SOCKET) { 5147 /* Check parameters */ 5148 switch (option_name) { 5149 case SO_TYPE: 5150 case SO_ERROR: 5151 case SO_DEBUG: 5152 case SO_ACCEPTCONN: 5153 case SO_REUSEADDR: 5154 case SO_KEEPALIVE: 5155 case SO_DONTROUTE: 5156 case SO_BROADCAST: 5157 case SO_USELOOPBACK: 5158 case SO_OOBINLINE: 5159 case SO_SNDBUF: 5160 case SO_RCVBUF: 5161 #ifdef notyet 5162 case SO_SNDLOWAT: 5163 case SO_RCVLOWAT: 5164 #endif /* notyet */ 5165 case SO_DOMAIN: 5166 case SO_DGRAM_ERRIND: 5167 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5168 error = EINVAL; 5169 eprintsoline(so, error); 5170 goto done2; 5171 } 5172 break; 5173 case SO_RCVTIMEO: 5174 case SO_SNDTIMEO: 5175 if (get_udatamodel() == DATAMODEL_NONE || 5176 get_udatamodel() == DATAMODEL_NATIVE) { 5177 if (maxlen < sizeof (struct timeval)) { 5178 error = EINVAL; 5179 eprintsoline(so, error); 5180 goto done2; 5181 } 5182 } else { 5183 if (maxlen < sizeof (struct timeval32)) { 5184 error = EINVAL; 5185 eprintsoline(so, error); 5186 goto done2; 5187 } 5188 5189 } 5190 break; 5191 case SO_LINGER: 5192 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5193 error = EINVAL; 5194 eprintsoline(so, error); 5195 goto done2; 5196 } 5197 break; 5198 case SO_SND_BUFINFO: 5199 if (maxlen < (t_uscalar_t) 5200 sizeof (struct so_snd_bufinfo)) { 5201 error = EINVAL; 5202 eprintsoline(so, error); 5203 goto done2; 5204 } 5205 break; 5206 } 5207 5208 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5209 5210 switch (option_name) { 5211 case SO_TYPE: 5212 value = so->so_type; 5213 option = &value; 5214 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5215 5216 case SO_ERROR: 5217 value = sogeterr(so, B_TRUE); 5218 option = &value; 5219 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5220 5221 case SO_ACCEPTCONN: 5222 if (so->so_state & SS_ACCEPTCONN) 5223 value = SO_ACCEPTCONN; 5224 else 5225 value = 0; 5226 #ifdef DEBUG 5227 if (value) { 5228 dprintso(so, 1, 5229 ("sotpi_getsockopt: 0x%x is set\n", 5230 option_name)); 5231 } else { 5232 dprintso(so, 1, 5233 ("sotpi_getsockopt: 0x%x not set\n", 5234 option_name)); 5235 } 5236 #endif /* DEBUG */ 5237 option = &value; 5238 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5239 5240 case SO_DEBUG: 5241 case SO_REUSEADDR: 5242 case SO_KEEPALIVE: 5243 case SO_DONTROUTE: 5244 case SO_BROADCAST: 5245 case SO_USELOOPBACK: 5246 case SO_OOBINLINE: 5247 case SO_DGRAM_ERRIND: 5248 value = (so->so_options & option_name); 5249 #ifdef DEBUG 5250 if (value) { 5251 dprintso(so, 1, 5252 ("sotpi_getsockopt: 0x%x is set\n", 5253 option_name)); 5254 } else { 5255 dprintso(so, 1, 5256 ("sotpi_getsockopt: 0x%x not set\n", 5257 option_name)); 5258 } 5259 #endif /* DEBUG */ 5260 option = &value; 5261 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5262 5263 /* 5264 * The following options are only returned by sockfs when the 5265 * T_SVR4_OPTMGMT_REQ fails. 5266 */ 5267 case SO_LINGER: 5268 option = &so->so_linger; 5269 len = (t_uscalar_t)sizeof (struct linger); 5270 break; 5271 case SO_SNDBUF: { 5272 ssize_t lvalue; 5273 5274 /* 5275 * If the option has not been set then get a default 5276 * value from the read queue. This value is 5277 * returned if the transport fails 5278 * the T_SVR4_OPTMGMT_REQ. 5279 */ 5280 lvalue = so->so_sndbuf; 5281 if (lvalue == 0) { 5282 mutex_exit(&so->so_lock); 5283 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5284 QHIWAT, 0, &lvalue); 5285 mutex_enter(&so->so_lock); 5286 dprintso(so, 1, 5287 ("got SO_SNDBUF %ld from q\n", lvalue)); 5288 } 5289 value = (int)lvalue; 5290 option = &value; 5291 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5292 break; 5293 } 5294 case SO_RCVBUF: { 5295 ssize_t lvalue; 5296 5297 /* 5298 * If the option has not been set then get a default 5299 * value from the read queue. This value is 5300 * returned if the transport fails 5301 * the T_SVR4_OPTMGMT_REQ. 5302 * 5303 * XXX If SO_RCVBUF has been set and this is an 5304 * XPG 4.2 application then do not ask the transport 5305 * since the transport might adjust the value and not 5306 * return exactly what was set by the application. 5307 * For non-XPG 4.2 application we return the value 5308 * that the transport is actually using. 5309 */ 5310 lvalue = so->so_rcvbuf; 5311 if (lvalue == 0) { 5312 mutex_exit(&so->so_lock); 5313 (void) strqget(RD(strvp2wq(SOTOV(so))), 5314 QHIWAT, 0, &lvalue); 5315 mutex_enter(&so->so_lock); 5316 dprintso(so, 1, 5317 ("got SO_RCVBUF %ld from q\n", lvalue)); 5318 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5319 value = (int)lvalue; 5320 option = &value; 5321 goto copyout; /* skip asking transport */ 5322 } 5323 value = (int)lvalue; 5324 option = &value; 5325 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5326 break; 5327 } 5328 case SO_DOMAIN: 5329 value = so->so_family; 5330 option = &value; 5331 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5332 5333 #ifdef notyet 5334 /* 5335 * We do not implement the semantics of these options 5336 * thus we shouldn't implement the options either. 5337 */ 5338 case SO_SNDLOWAT: 5339 value = so->so_sndlowat; 5340 option = &value; 5341 break; 5342 case SO_RCVLOWAT: 5343 value = so->so_rcvlowat; 5344 option = &value; 5345 break; 5346 #endif /* notyet */ 5347 case SO_SNDTIMEO: 5348 case SO_RCVTIMEO: { 5349 clock_t val; 5350 5351 if (option_name == SO_RCVTIMEO) 5352 val = drv_hztousec(so->so_rcvtimeo); 5353 else 5354 val = drv_hztousec(so->so_sndtimeo); 5355 tmo_val.tv_sec = val / (1000 * 1000); 5356 tmo_val.tv_usec = val % (1000 * 1000); 5357 if (get_udatamodel() == DATAMODEL_NONE || 5358 get_udatamodel() == DATAMODEL_NATIVE) { 5359 option = &tmo_val; 5360 len = sizeof (struct timeval); 5361 } else { 5362 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5363 option = &tmo_val32; 5364 len = sizeof (struct timeval32); 5365 } 5366 break; 5367 } 5368 case SO_SND_BUFINFO: { 5369 snd_bufinfo.sbi_wroff = 5370 (so->so_proto_props).sopp_wroff; 5371 snd_bufinfo.sbi_maxblk = 5372 (so->so_proto_props).sopp_maxblk; 5373 snd_bufinfo.sbi_maxpsz = 5374 (so->so_proto_props).sopp_maxpsz; 5375 snd_bufinfo.sbi_tail = 5376 (so->so_proto_props).sopp_tail; 5377 option = &snd_bufinfo; 5378 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5379 break; 5380 } 5381 } 5382 } 5383 5384 mutex_exit(&so->so_lock); 5385 5386 /* Send request */ 5387 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5388 optmgmt_req.MGMT_flags = T_CHECK; 5389 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5390 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5391 5392 oh.level = level; 5393 oh.name = option_name; 5394 oh.len = maxlen; 5395 5396 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5397 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5398 /* Let option management work in the presence of data flow control */ 5399 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5400 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5401 mp = NULL; 5402 mutex_enter(&so->so_lock); 5403 if (error) { 5404 eprintsoline(so, error); 5405 goto done2; 5406 } 5407 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5408 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5409 if (error) { 5410 if (option != NULL) { 5411 /* We have a fallback value */ 5412 error = 0; 5413 goto copyout; 5414 } 5415 eprintsoline(so, error); 5416 goto done2; 5417 } 5418 ASSERT(mp); 5419 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5420 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5421 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5422 if (opt_res == NULL) { 5423 if (option != NULL) { 5424 /* We have a fallback value */ 5425 error = 0; 5426 goto copyout; 5427 } 5428 error = EPROTO; 5429 eprintsoline(so, error); 5430 goto done; 5431 } 5432 option = &opt_res[1]; 5433 5434 /* check to ensure that the option is within bounds */ 5435 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5436 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5437 if (option != NULL) { 5438 /* We have a fallback value */ 5439 error = 0; 5440 goto copyout; 5441 } 5442 error = EPROTO; 5443 eprintsoline(so, error); 5444 goto done; 5445 } 5446 5447 len = opt_res->len; 5448 5449 copyout: { 5450 t_uscalar_t size = MIN(len, maxlen); 5451 bcopy(option, optval, size); 5452 bcopy(&size, optlenp, sizeof (size)); 5453 } 5454 done: 5455 freemsg(mp); 5456 done2: 5457 so_unlock_single(so, SOLOCKED); 5458 mutex_exit(&so->so_lock); 5459 5460 return (error); 5461 } 5462 5463 /* 5464 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5465 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5466 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5467 * setsockopt has to work even if the transport does not support the option. 5468 */ 5469 /* ARGSUSED */ 5470 int 5471 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5472 const void *optval, t_uscalar_t optlen, struct cred *cr) 5473 { 5474 struct T_optmgmt_req optmgmt_req; 5475 struct opthdr oh; 5476 mblk_t *mp; 5477 int error = 0; 5478 boolean_t handled = B_FALSE; 5479 5480 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5481 (void *)so, level, option_name, optval, optlen, 5482 pr_state(so->so_state, so->so_mode))); 5483 5484 /* X/Open requires this check */ 5485 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5486 if (xnet_check_print) 5487 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5488 return (EINVAL); 5489 } 5490 5491 mutex_enter(&so->so_lock); 5492 so_lock_single(so); /* Set SOLOCKED */ 5493 mutex_exit(&so->so_lock); 5494 5495 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5496 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5497 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5498 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5499 5500 oh.level = level; 5501 oh.name = option_name; 5502 oh.len = optlen; 5503 5504 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5505 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5506 /* Let option management work in the presence of data flow control */ 5507 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5508 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5509 mp = NULL; 5510 mutex_enter(&so->so_lock); 5511 if (error) { 5512 eprintsoline(so, error); 5513 goto done2; 5514 } 5515 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5516 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5517 if (error) { 5518 eprintsoline(so, error); 5519 goto done; 5520 } 5521 ASSERT(mp); 5522 /* No need to verify T_optmgmt_ack */ 5523 freemsg(mp); 5524 done: 5525 /* 5526 * Check for SOL_SOCKET options and record their values. 5527 * If we know about a SOL_SOCKET parameter and the transport 5528 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5529 * EPROTO) we let the setsockopt succeed. 5530 */ 5531 if (level == SOL_SOCKET) { 5532 /* Check parameters */ 5533 switch (option_name) { 5534 case SO_DEBUG: 5535 case SO_REUSEADDR: 5536 case SO_KEEPALIVE: 5537 case SO_DONTROUTE: 5538 case SO_BROADCAST: 5539 case SO_USELOOPBACK: 5540 case SO_OOBINLINE: 5541 case SO_SNDBUF: 5542 case SO_RCVBUF: 5543 #ifdef notyet 5544 case SO_SNDLOWAT: 5545 case SO_RCVLOWAT: 5546 #endif /* notyet */ 5547 case SO_DGRAM_ERRIND: 5548 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5549 error = EINVAL; 5550 eprintsoline(so, error); 5551 goto done2; 5552 } 5553 ASSERT(optval); 5554 handled = B_TRUE; 5555 break; 5556 case SO_SNDTIMEO: 5557 case SO_RCVTIMEO: 5558 if (get_udatamodel() == DATAMODEL_NONE || 5559 get_udatamodel() == DATAMODEL_NATIVE) { 5560 if (optlen != sizeof (struct timeval)) { 5561 error = EINVAL; 5562 eprintsoline(so, error); 5563 goto done2; 5564 } 5565 } else { 5566 if (optlen != sizeof (struct timeval32)) { 5567 error = EINVAL; 5568 eprintsoline(so, error); 5569 goto done2; 5570 } 5571 } 5572 ASSERT(optval); 5573 handled = B_TRUE; 5574 break; 5575 case SO_LINGER: 5576 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5577 error = EINVAL; 5578 eprintsoline(so, error); 5579 goto done2; 5580 } 5581 ASSERT(optval); 5582 handled = B_TRUE; 5583 break; 5584 } 5585 5586 #define intvalue (*(int32_t *)optval) 5587 5588 switch (option_name) { 5589 case SO_TYPE: 5590 case SO_ERROR: 5591 case SO_ACCEPTCONN: 5592 /* Can't be set */ 5593 error = ENOPROTOOPT; 5594 goto done2; 5595 case SO_LINGER: { 5596 struct linger *l = (struct linger *)optval; 5597 5598 so->so_linger.l_linger = l->l_linger; 5599 if (l->l_onoff) { 5600 so->so_linger.l_onoff = SO_LINGER; 5601 so->so_options |= SO_LINGER; 5602 } else { 5603 so->so_linger.l_onoff = 0; 5604 so->so_options &= ~SO_LINGER; 5605 } 5606 break; 5607 } 5608 5609 case SO_DEBUG: 5610 #ifdef SOCK_TEST 5611 if (intvalue & 2) 5612 sock_test_timelimit = 10 * hz; 5613 else 5614 sock_test_timelimit = 0; 5615 5616 if (intvalue & 4) 5617 do_useracc = 0; 5618 else 5619 do_useracc = 1; 5620 #endif /* SOCK_TEST */ 5621 /* FALLTHRU */ 5622 case SO_REUSEADDR: 5623 case SO_KEEPALIVE: 5624 case SO_DONTROUTE: 5625 case SO_BROADCAST: 5626 case SO_USELOOPBACK: 5627 case SO_OOBINLINE: 5628 case SO_DGRAM_ERRIND: 5629 if (intvalue != 0) { 5630 dprintso(so, 1, 5631 ("socket_setsockopt: setting 0x%x\n", 5632 option_name)); 5633 so->so_options |= option_name; 5634 } else { 5635 dprintso(so, 1, 5636 ("socket_setsockopt: clearing 0x%x\n", 5637 option_name)); 5638 so->so_options &= ~option_name; 5639 } 5640 break; 5641 /* 5642 * The following options are only returned by us when the 5643 * transport layer fails. 5644 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5645 * since the transport might adjust the value and not 5646 * return exactly what was set by the application. 5647 */ 5648 case SO_SNDBUF: 5649 so->so_sndbuf = intvalue; 5650 break; 5651 case SO_RCVBUF: 5652 so->so_rcvbuf = intvalue; 5653 break; 5654 case SO_RCVPSH: 5655 so->so_rcv_timer_interval = intvalue; 5656 break; 5657 #ifdef notyet 5658 /* 5659 * We do not implement the semantics of these options 5660 * thus we shouldn't implement the options either. 5661 */ 5662 case SO_SNDLOWAT: 5663 so->so_sndlowat = intvalue; 5664 break; 5665 case SO_RCVLOWAT: 5666 so->so_rcvlowat = intvalue; 5667 break; 5668 #endif /* notyet */ 5669 case SO_SNDTIMEO: 5670 case SO_RCVTIMEO: { 5671 struct timeval tl; 5672 clock_t val; 5673 5674 if (get_udatamodel() == DATAMODEL_NONE || 5675 get_udatamodel() == DATAMODEL_NATIVE) 5676 bcopy(&tl, (struct timeval *)optval, 5677 sizeof (struct timeval)); 5678 else 5679 TIMEVAL32_TO_TIMEVAL(&tl, 5680 (struct timeval32 *)optval); 5681 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5682 if (option_name == SO_RCVTIMEO) 5683 so->so_rcvtimeo = drv_usectohz(val); 5684 else 5685 so->so_sndtimeo = drv_usectohz(val); 5686 break; 5687 } 5688 } 5689 #undef intvalue 5690 5691 if (error) { 5692 if ((error == ENOPROTOOPT || error == EPROTO || 5693 error == EINVAL) && handled) { 5694 dprintso(so, 1, 5695 ("setsockopt: ignoring error %d for 0x%x\n", 5696 error, option_name)); 5697 error = 0; 5698 } 5699 } 5700 } 5701 done2: 5702 so_unlock_single(so, SOLOCKED); 5703 mutex_exit(&so->so_lock); 5704 return (error); 5705 } 5706 5707 /* 5708 * sotpi_close() is called when the last open reference goes away. 5709 */ 5710 /* ARGSUSED */ 5711 int 5712 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5713 { 5714 struct vnode *vp = SOTOV(so); 5715 dev_t dev; 5716 int error = 0; 5717 sotpi_info_t *sti = SOTOTPI(so); 5718 5719 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5720 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5721 5722 dev = sti->sti_dev; 5723 5724 ASSERT(STREAMSTAB(getmajor(dev))); 5725 5726 mutex_enter(&so->so_lock); 5727 so_lock_single(so); /* Set SOLOCKED */ 5728 5729 ASSERT(so_verify_oobstate(so)); 5730 5731 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5732 sti->sti_nl7c_flags = 0; 5733 nl7c_close(so); 5734 } 5735 5736 if (vp->v_stream != NULL) { 5737 vnode_t *ux_vp; 5738 5739 if (so->so_family == AF_UNIX) { 5740 /* Could avoid this when CANTSENDMORE for !dgram */ 5741 so_unix_close(so); 5742 } 5743 5744 mutex_exit(&so->so_lock); 5745 /* 5746 * Disassemble the linkage from the AF_UNIX underlying file 5747 * system vnode to this socket (by atomically clearing 5748 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5749 * and frees the stream head. 5750 */ 5751 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5752 ASSERT(ux_vp->v_stream); 5753 sti->sti_ux_bound_vp = NULL; 5754 vn_rele_stream(ux_vp); 5755 } 5756 error = strclose(vp, flag, cr); 5757 vp->v_stream = NULL; 5758 mutex_enter(&so->so_lock); 5759 } 5760 5761 /* 5762 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5763 */ 5764 so_flush_discon_ind(so); 5765 5766 so_unlock_single(so, SOLOCKED); 5767 mutex_exit(&so->so_lock); 5768 5769 /* 5770 * Needed for STREAMs. 5771 * Decrement the device driver's reference count for streams 5772 * opened via the clone dip. The driver was held in clone_open(). 5773 * The absence of clone_close() forces this asymmetry. 5774 */ 5775 if (so->so_flag & SOCLONE) 5776 ddi_rele_driver(getmajor(dev)); 5777 5778 return (error); 5779 } 5780 5781 static int 5782 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5783 struct cred *cr, int32_t *rvalp) 5784 { 5785 struct vnode *vp = SOTOV(so); 5786 sotpi_info_t *sti = SOTOTPI(so); 5787 int error = 0; 5788 5789 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5790 cmd, arg, pr_state(so->so_state, so->so_mode))); 5791 5792 switch (cmd) { 5793 case SIOCSQPTR: 5794 /* 5795 * SIOCSQPTR is valid only when helper stream is created 5796 * by the protocol. 5797 */ 5798 case _I_INSERT: 5799 case _I_REMOVE: 5800 /* 5801 * Since there's no compelling reason to support these ioctls 5802 * on sockets, and doing so would increase the complexity 5803 * markedly, prevent it. 5804 */ 5805 return (EOPNOTSUPP); 5806 5807 case I_FIND: 5808 case I_LIST: 5809 case I_LOOK: 5810 case I_POP: 5811 case I_PUSH: 5812 /* 5813 * To prevent races and inconsistencies between the actual 5814 * state of the stream and the state according to the sonode, 5815 * we serialize all operations which modify or operate on the 5816 * list of modules on the socket's stream. 5817 */ 5818 mutex_enter(&sti->sti_plumb_lock); 5819 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5820 mutex_exit(&sti->sti_plumb_lock); 5821 return (error); 5822 5823 default: 5824 if (so->so_version != SOV_STREAM) 5825 break; 5826 5827 /* 5828 * The imaginary "sockmod" has been popped; act as a stream. 5829 */ 5830 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5831 } 5832 5833 ASSERT(so->so_version != SOV_STREAM); 5834 5835 /* 5836 * Process socket-specific ioctls. 5837 */ 5838 switch (cmd) { 5839 case FIONBIO: { 5840 int32_t value; 5841 5842 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5843 (mode & (int)FKIOCTL))) 5844 return (EFAULT); 5845 5846 mutex_enter(&so->so_lock); 5847 if (value) { 5848 so->so_state |= SS_NDELAY; 5849 } else { 5850 so->so_state &= ~SS_NDELAY; 5851 } 5852 mutex_exit(&so->so_lock); 5853 return (0); 5854 } 5855 5856 case FIOASYNC: { 5857 int32_t value; 5858 5859 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5860 (mode & (int)FKIOCTL))) 5861 return (EFAULT); 5862 5863 mutex_enter(&so->so_lock); 5864 /* 5865 * SS_ASYNC flag not already set correctly? 5866 * (!value != !(so->so_state & SS_ASYNC)) 5867 * but some engineers find that too hard to read. 5868 */ 5869 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5870 value != 0 && (so->so_state & SS_ASYNC) == 0) 5871 error = so_flip_async(so, vp, mode, cr); 5872 mutex_exit(&so->so_lock); 5873 return (error); 5874 } 5875 5876 case SIOCSPGRP: 5877 case FIOSETOWN: { 5878 pid_t pgrp; 5879 5880 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5881 (mode & (int)FKIOCTL))) 5882 return (EFAULT); 5883 5884 mutex_enter(&so->so_lock); 5885 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5886 /* Any change? */ 5887 if (pgrp != so->so_pgrp) 5888 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5889 mutex_exit(&so->so_lock); 5890 return (error); 5891 } 5892 case SIOCGPGRP: 5893 case FIOGETOWN: 5894 if (so_copyout(&so->so_pgrp, (void *)arg, 5895 sizeof (pid_t), (mode & (int)FKIOCTL))) 5896 return (EFAULT); 5897 return (0); 5898 5899 case SIOCATMARK: { 5900 int retval; 5901 uint_t so_state; 5902 5903 /* 5904 * strwaitmark has a finite timeout after which it 5905 * returns -1 if the mark state is undetermined. 5906 * In order to avoid any race between the mark state 5907 * in sockfs and the mark state in the stream head this 5908 * routine loops until the mark state can be determined 5909 * (or the urgent data indication has been removed by some 5910 * other thread). 5911 */ 5912 do { 5913 mutex_enter(&so->so_lock); 5914 so_state = so->so_state; 5915 mutex_exit(&so->so_lock); 5916 if (so_state & SS_RCVATMARK) { 5917 retval = 1; 5918 } else if (!(so_state & SS_OOBPEND)) { 5919 /* 5920 * No SIGURG has been generated -- there is no 5921 * pending or present urgent data. Thus can't 5922 * possibly be at the mark. 5923 */ 5924 retval = 0; 5925 } else { 5926 /* 5927 * Have the stream head wait until there is 5928 * either some messages on the read queue, or 5929 * STRATMARK or STRNOTATMARK gets set. The 5930 * STRNOTATMARK flag is used so that the 5931 * transport can send up a MSGNOTMARKNEXT 5932 * M_DATA to indicate that it is not 5933 * at the mark and additional data is not about 5934 * to be send upstream. 5935 * 5936 * If the mark state is undetermined this will 5937 * return -1 and we will loop rechecking the 5938 * socket state. 5939 */ 5940 retval = strwaitmark(vp); 5941 } 5942 } while (retval == -1); 5943 5944 if (so_copyout(&retval, (void *)arg, sizeof (int), 5945 (mode & (int)FKIOCTL))) 5946 return (EFAULT); 5947 return (0); 5948 } 5949 5950 case I_FDINSERT: 5951 case I_SENDFD: 5952 case I_RECVFD: 5953 case I_ATMARK: 5954 case _SIOCSOCKFALLBACK: 5955 /* 5956 * These ioctls do not apply to sockets. I_FDINSERT can be 5957 * used to send M_PROTO messages without modifying the socket 5958 * state. I_SENDFD/RECVFD should not be used for socket file 5959 * descriptor passing since they assume a twisted stream. 5960 * SIOCATMARK must be used instead of I_ATMARK. 5961 * 5962 * _SIOCSOCKFALLBACK from an application should never be 5963 * processed. It is only generated by socktpi_open() or 5964 * in response to I_POP or I_PUSH. 5965 */ 5966 #ifdef DEBUG 5967 zcmn_err(getzoneid(), CE_WARN, 5968 "Unsupported STREAMS ioctl 0x%x on socket. " 5969 "Pid = %d\n", cmd, curproc->p_pid); 5970 #endif /* DEBUG */ 5971 return (EOPNOTSUPP); 5972 5973 case _I_GETPEERCRED: 5974 if ((mode & FKIOCTL) == 0) 5975 return (EINVAL); 5976 5977 mutex_enter(&so->so_lock); 5978 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 5979 error = ENOTSUP; 5980 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 5981 error = ENOTCONN; 5982 } else if (so->so_peercred != NULL) { 5983 k_peercred_t *kp = (k_peercred_t *)arg; 5984 kp->pc_cr = so->so_peercred; 5985 kp->pc_cpid = so->so_cpid; 5986 crhold(so->so_peercred); 5987 } else { 5988 error = EINVAL; 5989 } 5990 mutex_exit(&so->so_lock); 5991 return (error); 5992 5993 default: 5994 /* 5995 * Do the higher-order bits of the ioctl cmd indicate 5996 * that it is an I_* streams ioctl? 5997 */ 5998 if ((cmd & 0xffffff00U) == STR && 5999 so->so_version == SOV_SOCKBSD) { 6000 #ifdef DEBUG 6001 zcmn_err(getzoneid(), CE_WARN, 6002 "Unsupported STREAMS ioctl 0x%x on socket. " 6003 "Pid = %d\n", cmd, curproc->p_pid); 6004 #endif /* DEBUG */ 6005 return (EOPNOTSUPP); 6006 } 6007 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6008 } 6009 } 6010 6011 /* 6012 * Handle plumbing-related ioctls. 6013 */ 6014 static int 6015 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6016 struct cred *cr, int32_t *rvalp) 6017 { 6018 static const char sockmod_name[] = "sockmod"; 6019 struct sonode *so = VTOSO(vp); 6020 char mname[FMNAMESZ + 1]; 6021 int error; 6022 sotpi_info_t *sti = SOTOTPI(so); 6023 6024 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6025 6026 if (so->so_version == SOV_SOCKBSD) 6027 return (EOPNOTSUPP); 6028 6029 if (so->so_version == SOV_STREAM) { 6030 /* 6031 * The imaginary "sockmod" has been popped - act as a stream. 6032 * If this is a push of sockmod then change back to a socket. 6033 */ 6034 if (cmd == I_PUSH) { 6035 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6036 (void *)arg, mname, sizeof (mname), NULL); 6037 6038 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6039 dprintso(so, 0, ("socktpi_ioctl: going to " 6040 "socket version\n")); 6041 so_stream2sock(so); 6042 return (0); 6043 } 6044 } 6045 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6046 } 6047 6048 switch (cmd) { 6049 case I_PUSH: 6050 if (sti->sti_direct) { 6051 mutex_enter(&so->so_lock); 6052 so_lock_single(so); 6053 mutex_exit(&so->so_lock); 6054 6055 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6056 cr, rvalp); 6057 6058 mutex_enter(&so->so_lock); 6059 if (error == 0) 6060 sti->sti_direct = 0; 6061 so_unlock_single(so, SOLOCKED); 6062 mutex_exit(&so->so_lock); 6063 6064 if (error != 0) 6065 return (error); 6066 } 6067 6068 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6069 if (error == 0) 6070 sti->sti_pushcnt++; 6071 return (error); 6072 6073 case I_POP: 6074 if (sti->sti_pushcnt == 0) { 6075 /* Emulate sockmod being popped */ 6076 dprintso(so, 0, 6077 ("socktpi_ioctl: going to STREAMS version\n")); 6078 return (so_sock2stream(so)); 6079 } 6080 6081 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6082 if (error == 0) 6083 sti->sti_pushcnt--; 6084 return (error); 6085 6086 case I_LIST: { 6087 struct str_mlist *kmlistp, *umlistp; 6088 struct str_list kstrlist; 6089 ssize_t kstrlistsize; 6090 int i, nmods; 6091 6092 STRUCT_DECL(str_list, ustrlist); 6093 STRUCT_INIT(ustrlist, mode); 6094 6095 if (arg == NULL) { 6096 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6097 if (error == 0) 6098 (*rvalp)++; /* Add one for sockmod */ 6099 return (error); 6100 } 6101 6102 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6103 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6104 if (error != 0) 6105 return (error); 6106 6107 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6108 if (nmods <= 0) 6109 return (EINVAL); 6110 /* 6111 * Ceiling nmods at nstrpush to prevent someone from 6112 * maliciously consuming lots of kernel memory. 6113 */ 6114 nmods = MIN(nmods, nstrpush); 6115 6116 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6117 kstrlist.sl_nmods = nmods; 6118 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6119 6120 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6121 cr, rvalp); 6122 if (error != 0) 6123 goto done; 6124 6125 /* 6126 * Considering the module list as a 0-based array of sl_nmods 6127 * modules, sockmod should conceptually exist at slot 6128 * sti_pushcnt. Insert sockmod at this location by sliding all 6129 * of the module names after so_pushcnt over by one. We know 6130 * that there will be room to do this since we allocated 6131 * sl_modlist with an additional slot. 6132 */ 6133 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6134 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6135 6136 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6137 kstrlist.sl_nmods++; 6138 6139 /* 6140 * Copy all of the entries out to ustrlist. 6141 */ 6142 kmlistp = kstrlist.sl_modlist; 6143 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6144 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6145 error = so_copyout(kmlistp++, umlistp++, 6146 sizeof (struct str_mlist), mode & FKIOCTL); 6147 if (error != 0) 6148 goto done; 6149 } 6150 6151 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6152 mode & FKIOCTL); 6153 if (error == 0) 6154 *rvalp = 0; 6155 done: 6156 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6157 return (error); 6158 } 6159 case I_LOOK: 6160 if (sti->sti_pushcnt == 0) { 6161 return (so_copyout(sockmod_name, (void *)arg, 6162 sizeof (sockmod_name), mode & FKIOCTL)); 6163 } 6164 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6165 6166 case I_FIND: 6167 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6168 if (error && error != EINVAL) 6169 return (error); 6170 6171 /* if not found and string was sockmod return 1 */ 6172 if (*rvalp == 0 || error == EINVAL) { 6173 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6174 (void *)arg, mname, sizeof (mname), NULL); 6175 if (error == ENAMETOOLONG) 6176 error = EINVAL; 6177 6178 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6179 *rvalp = 1; 6180 } 6181 return (error); 6182 6183 default: 6184 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6185 break; 6186 } 6187 6188 return (0); 6189 } 6190 6191 /* 6192 * Wrapper around the streams poll routine that implements socket poll 6193 * semantics. 6194 * The sockfs never calls pollwakeup itself - the stream head take care 6195 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6196 * stream head there can never be a deadlock due to holding so_lock across 6197 * pollwakeup and acquiring so_lock in this routine. 6198 * 6199 * However, since the performance of VOP_POLL is critical we avoid 6200 * acquiring so_lock here. This is based on two assumptions: 6201 * - The poll implementation holds locks to serialize the VOP_POLL call 6202 * and a pollwakeup for the same pollhead. This ensures that should 6203 * e.g. so_state change during a socktpi_poll call the pollwakeup 6204 * (which strsock_* and strrput conspire to issue) is issued after 6205 * the state change. Thus the pollwakeup will block until VOP_POLL has 6206 * returned and then wake up poll and have it call VOP_POLL again. 6207 * - The reading of so_state without holding so_lock does not result in 6208 * stale data that is older than the latest state change that has dropped 6209 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6210 * memory barrier to force the data into the coherency domain. 6211 */ 6212 static int 6213 sotpi_poll( 6214 struct sonode *so, 6215 short events, 6216 int anyyet, 6217 short *reventsp, 6218 struct pollhead **phpp) 6219 { 6220 short origevents = events; 6221 struct vnode *vp = SOTOV(so); 6222 int error; 6223 int so_state = so->so_state; /* snapshot */ 6224 sotpi_info_t *sti = SOTOTPI(so); 6225 6226 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6227 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6228 6229 ASSERT(vp->v_type == VSOCK); 6230 ASSERT(vp->v_stream != NULL); 6231 6232 if (so->so_version == SOV_STREAM) { 6233 /* The imaginary "sockmod" has been popped - act as a stream */ 6234 return (strpoll(vp->v_stream, events, anyyet, 6235 reventsp, phpp)); 6236 } 6237 6238 if (!(so_state & SS_ISCONNECTED) && 6239 (so->so_mode & SM_CONNREQUIRED)) { 6240 /* Not connected yet - turn off write side events */ 6241 events &= ~(POLLOUT|POLLWRBAND); 6242 } 6243 /* 6244 * Check for errors without calling strpoll if the caller wants them. 6245 * In sockets the errors are represented as input/output events 6246 * and there is no need to ask the stream head for this information. 6247 */ 6248 if (so->so_error != 0 && 6249 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6250 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6251 return (0); 6252 } 6253 /* 6254 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6255 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6256 * will not trigger a POLLIN event with POLLRDDATA set. 6257 * The handling of urgent data (causing POLLRDBAND) is done by 6258 * inspecting SS_OOBPEND below. 6259 */ 6260 events |= POLLRDDATA; 6261 6262 /* 6263 * After shutdown(output) a stream head write error is set. 6264 * However, we should not return output events. 6265 */ 6266 events |= POLLNOERR; 6267 error = strpoll(vp->v_stream, events, anyyet, 6268 reventsp, phpp); 6269 if (error) 6270 return (error); 6271 6272 ASSERT(!(*reventsp & POLLERR)); 6273 6274 /* 6275 * Notes on T_CONN_IND handling for sockets. 6276 * 6277 * If strpoll() returned without events, SR_POLLIN is guaranteed 6278 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6279 * 6280 * Since the so_lock is not held, soqueueconnind() may have run 6281 * and a T_CONN_IND may be waiting. We now check for any queued 6282 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6283 * to ensure poll returns. 6284 * 6285 * However: 6286 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6287 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6288 * the following actions will occur; taken together they ensure the 6289 * syscall will return. 6290 * 6291 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6292 * the accept() was run on a non-blocking socket sowaitconnind() 6293 * may have already returned EWOULDBLOCK, so not be waiting to 6294 * process the message. Additionally socktpi_poll() has probably 6295 * proceeded past the sti_conn_ind_head check below. 6296 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6297 * this thread, however that could occur before poll_common() 6298 * has entered cv_wait. 6299 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6300 * 6301 * Before proceeding to cv_wait() in poll_common() for an event, 6302 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6303 * and if set, re-calls strpoll() to ensure the late arriving 6304 * T_CONN_IND is recognized, and pollsys() returns. 6305 */ 6306 6307 if (sti->sti_conn_ind_head != NULL) 6308 *reventsp |= (POLLIN|POLLRDNORM) & events; 6309 6310 if (so->so_state & SS_CANTRCVMORE) { 6311 *reventsp |= POLLRDHUP & events; 6312 6313 if (so->so_state & SS_CANTSENDMORE) 6314 *reventsp |= POLLHUP; 6315 } 6316 6317 if (so->so_state & SS_OOBPEND) 6318 *reventsp |= POLLRDBAND & events; 6319 6320 if (sti->sti_nl7c_rcv_mp != NULL) { 6321 *reventsp |= (POLLIN|POLLRDNORM) & events; 6322 } 6323 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6324 ((POLLIN|POLLRDNORM) & *reventsp)) { 6325 sti->sti_nl7c_flags |= NL7C_POLLIN; 6326 } 6327 6328 return (0); 6329 } 6330 6331 /*ARGSUSED*/ 6332 static int 6333 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6334 { 6335 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6336 int error = 0; 6337 6338 error = sonode_constructor(buf, cdrarg, kmflags); 6339 if (error != 0) 6340 return (error); 6341 6342 error = i_sotpi_info_constructor(&st->st_info); 6343 if (error != 0) 6344 sonode_destructor(buf, cdrarg); 6345 6346 st->st_sonode.so_priv = &st->st_info; 6347 6348 return (error); 6349 } 6350 6351 /*ARGSUSED1*/ 6352 static void 6353 socktpi_destructor(void *buf, void *cdrarg) 6354 { 6355 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6356 6357 ASSERT(st->st_sonode.so_priv == &st->st_info); 6358 st->st_sonode.so_priv = NULL; 6359 6360 i_sotpi_info_destructor(&st->st_info); 6361 sonode_destructor(buf, cdrarg); 6362 } 6363 6364 static int 6365 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6366 { 6367 int retval; 6368 6369 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6370 struct sonode *so = (struct sonode *)buf; 6371 sotpi_info_t *sti = SOTOTPI(so); 6372 6373 mutex_enter(&socklist.sl_lock); 6374 6375 sti->sti_next_so = socklist.sl_list; 6376 sti->sti_prev_so = NULL; 6377 if (sti->sti_next_so != NULL) 6378 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6379 socklist.sl_list = so; 6380 6381 mutex_exit(&socklist.sl_lock); 6382 6383 } 6384 return (retval); 6385 } 6386 6387 static void 6388 socktpi_unix_destructor(void *buf, void *cdrarg) 6389 { 6390 struct sonode *so = (struct sonode *)buf; 6391 sotpi_info_t *sti = SOTOTPI(so); 6392 6393 mutex_enter(&socklist.sl_lock); 6394 6395 if (sti->sti_next_so != NULL) 6396 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6397 if (sti->sti_prev_so != NULL) 6398 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6399 else 6400 socklist.sl_list = sti->sti_next_so; 6401 6402 mutex_exit(&socklist.sl_lock); 6403 6404 socktpi_destructor(buf, cdrarg); 6405 } 6406 6407 int 6408 socktpi_init(void) 6409 { 6410 /* 6411 * Create sonode caches. We create a special one for AF_UNIX so 6412 * that we can track them for netstat(1m). 6413 */ 6414 socktpi_cache = kmem_cache_create("socktpi_cache", 6415 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6416 socktpi_destructor, NULL, NULL, NULL, 0); 6417 6418 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6419 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6420 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6421 6422 return (0); 6423 } 6424 6425 /* 6426 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6427 * 6428 * Caller must still update state and mode using sotpi_update_state(). 6429 */ 6430 int 6431 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6432 boolean_t *direct, queue_t **qp, struct cred *cr) 6433 { 6434 sotpi_info_t *sti; 6435 struct sockparams *origsp = so->so_sockparams; 6436 sock_lower_handle_t handle = so->so_proto_handle; 6437 struct stdata *stp; 6438 struct vnode *vp; 6439 queue_t *q; 6440 int error = 0; 6441 6442 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6443 SS_FALLBACK_PENDING); 6444 ASSERT(SOCK_IS_NONSTR(so)); 6445 6446 *qp = NULL; 6447 *direct = B_FALSE; 6448 so->so_sockparams = newsp; 6449 /* 6450 * Allocate and initalize fields required by TPI. 6451 */ 6452 (void) sotpi_info_create(so, KM_SLEEP); 6453 sotpi_info_init(so); 6454 6455 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6456 sotpi_info_fini(so); 6457 sotpi_info_destroy(so); 6458 return (error); 6459 } 6460 ASSERT(handle == so->so_proto_handle); 6461 sti = SOTOTPI(so); 6462 if (sti->sti_direct != 0) 6463 *direct = B_TRUE; 6464 6465 /* 6466 * Keep the original sp around so we can properly dispose of the 6467 * sonode when the socket is being closed. 6468 */ 6469 sti->sti_orig_sp = origsp; 6470 6471 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6472 so_alloc_addr(so, so->so_max_addr_len); 6473 6474 /* 6475 * If the application has done a SIOCSPGRP, make sure the 6476 * STREAM head is aware. This needs to take place before 6477 * the protocol start sending up messages. Otherwise we 6478 * might miss to generate SIGPOLL. 6479 * 6480 * It is possible that the application will receive duplicate 6481 * signals if some were already generated for either data or 6482 * connection indications. 6483 */ 6484 if (so->so_pgrp != 0) { 6485 if (so_set_events(so, so->so_vnode, cr) != 0) 6486 so->so_pgrp = 0; 6487 } 6488 6489 /* 6490 * Determine which queue to use. 6491 */ 6492 vp = SOTOV(so); 6493 stp = vp->v_stream; 6494 ASSERT(stp != NULL); 6495 q = stp->sd_wrq->q_next; 6496 6497 /* 6498 * Skip any modules that may have been auto pushed when the device 6499 * was opened 6500 */ 6501 while (q->q_next != NULL) 6502 q = q->q_next; 6503 *qp = _RD(q); 6504 6505 /* This is now a STREAMS sockets */ 6506 so->so_not_str = B_FALSE; 6507 6508 return (error); 6509 } 6510 6511 /* 6512 * Revert a TPI sonode. It is only allowed to revert the sonode during 6513 * the fallback process. 6514 */ 6515 void 6516 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6517 { 6518 vnode_t *vp = SOTOV(so); 6519 6520 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6521 SS_FALLBACK_PENDING); 6522 ASSERT(!SOCK_IS_NONSTR(so)); 6523 ASSERT(vp->v_stream != NULL); 6524 6525 strclean(vp); 6526 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6527 6528 /* 6529 * Restore the original sockparams. The caller is responsible for 6530 * dropping the ref to the new sp. 6531 */ 6532 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6533 6534 sotpi_info_fini(so); 6535 sotpi_info_destroy(so); 6536 6537 /* This is no longer a STREAMS sockets */ 6538 so->so_not_str = B_TRUE; 6539 } 6540 6541 void 6542 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6543 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6544 socklen_t faddrlen, short opts) 6545 { 6546 sotpi_info_t *sti = SOTOTPI(so); 6547 6548 so_proc_tcapability_ack(so, tcap); 6549 6550 so->so_options |= opts; 6551 6552 /* 6553 * Determine whether the foreign and local address are valid 6554 */ 6555 if (laddrlen != 0) { 6556 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6557 sti->sti_laddr_len = laddrlen; 6558 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6559 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6560 } 6561 6562 if (faddrlen != 0) { 6563 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6564 sti->sti_faddr_len = faddrlen; 6565 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6566 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6567 } 6568 6569 } 6570 6571 /* 6572 * Allocate enough space to cache the local and foreign addresses. 6573 */ 6574 void 6575 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6576 { 6577 sotpi_info_t *sti = SOTOTPI(so); 6578 6579 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6580 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6581 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6582 P2ROUNDUP(maxlen, KMEM_ALIGN); 6583 so->so_max_addr_len = sti->sti_laddr_maxlen; 6584 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6585 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6586 + sti->sti_laddr_maxlen); 6587 6588 if (so->so_family == AF_UNIX) { 6589 /* 6590 * Initialize AF_UNIX related fields. 6591 */ 6592 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6593 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6594 } 6595 } 6596 6597 6598 sotpi_info_t * 6599 sotpi_sototpi(struct sonode *so) 6600 { 6601 sotpi_info_t *sti; 6602 6603 ASSERT(so != NULL); 6604 6605 sti = (sotpi_info_t *)so->so_priv; 6606 6607 ASSERT(sti != NULL); 6608 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6609 6610 return (sti); 6611 } 6612 6613 static int 6614 i_sotpi_info_constructor(sotpi_info_t *sti) 6615 { 6616 sti->sti_magic = SOTPI_INFO_MAGIC; 6617 sti->sti_ack_mp = NULL; 6618 sti->sti_discon_ind_mp = NULL; 6619 sti->sti_ux_bound_vp = NULL; 6620 sti->sti_unbind_mp = NULL; 6621 6622 sti->sti_conn_ind_head = NULL; 6623 sti->sti_conn_ind_tail = NULL; 6624 6625 sti->sti_laddr_sa = NULL; 6626 sti->sti_faddr_sa = NULL; 6627 6628 sti->sti_nl7c_flags = 0; 6629 sti->sti_nl7c_uri = NULL; 6630 sti->sti_nl7c_rcv_mp = NULL; 6631 6632 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6633 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6634 6635 return (0); 6636 } 6637 6638 static void 6639 i_sotpi_info_destructor(sotpi_info_t *sti) 6640 { 6641 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6642 ASSERT(sti->sti_ack_mp == NULL); 6643 ASSERT(sti->sti_discon_ind_mp == NULL); 6644 ASSERT(sti->sti_ux_bound_vp == NULL); 6645 ASSERT(sti->sti_unbind_mp == NULL); 6646 6647 ASSERT(sti->sti_conn_ind_head == NULL); 6648 ASSERT(sti->sti_conn_ind_tail == NULL); 6649 6650 ASSERT(sti->sti_laddr_sa == NULL); 6651 ASSERT(sti->sti_faddr_sa == NULL); 6652 6653 ASSERT(sti->sti_nl7c_flags == 0); 6654 ASSERT(sti->sti_nl7c_uri == NULL); 6655 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6656 6657 mutex_destroy(&sti->sti_plumb_lock); 6658 cv_destroy(&sti->sti_ack_cv); 6659 } 6660 6661 /* 6662 * Creates and attaches TPI information to the given sonode 6663 */ 6664 static boolean_t 6665 sotpi_info_create(struct sonode *so, int kmflags) 6666 { 6667 sotpi_info_t *sti; 6668 6669 ASSERT(so->so_priv == NULL); 6670 6671 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6672 return (B_FALSE); 6673 6674 if (i_sotpi_info_constructor(sti) != 0) { 6675 kmem_free(sti, sizeof (*sti)); 6676 return (B_FALSE); 6677 } 6678 6679 so->so_priv = (void *)sti; 6680 return (B_TRUE); 6681 } 6682 6683 /* 6684 * Initializes the TPI information. 6685 */ 6686 static void 6687 sotpi_info_init(struct sonode *so) 6688 { 6689 struct vnode *vp = SOTOV(so); 6690 sotpi_info_t *sti = SOTOTPI(so); 6691 time_t now; 6692 6693 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6694 vp->v_rdev = sti->sti_dev; 6695 6696 sti->sti_orig_sp = NULL; 6697 6698 sti->sti_pushcnt = 0; 6699 6700 now = gethrestime_sec(); 6701 sti->sti_atime = now; 6702 sti->sti_mtime = now; 6703 sti->sti_ctime = now; 6704 6705 sti->sti_eaddr_mp = NULL; 6706 sti->sti_delayed_error = 0; 6707 6708 sti->sti_provinfo = NULL; 6709 6710 sti->sti_oobcnt = 0; 6711 sti->sti_oobsigcnt = 0; 6712 6713 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6714 6715 sti->sti_laddr_sa = 0; 6716 sti->sti_faddr_sa = 0; 6717 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6718 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6719 6720 sti->sti_laddr_valid = 0; 6721 sti->sti_faddr_valid = 0; 6722 sti->sti_faddr_noxlate = 0; 6723 6724 sti->sti_direct = 0; 6725 6726 ASSERT(sti->sti_ack_mp == NULL); 6727 ASSERT(sti->sti_ux_bound_vp == NULL); 6728 ASSERT(sti->sti_unbind_mp == NULL); 6729 6730 ASSERT(sti->sti_conn_ind_head == NULL); 6731 ASSERT(sti->sti_conn_ind_tail == NULL); 6732 } 6733 6734 /* 6735 * Given a sonode, grab the TPI info and free any data. 6736 */ 6737 static void 6738 sotpi_info_fini(struct sonode *so) 6739 { 6740 sotpi_info_t *sti = SOTOTPI(so); 6741 mblk_t *mp; 6742 6743 ASSERT(sti->sti_discon_ind_mp == NULL); 6744 6745 if ((mp = sti->sti_conn_ind_head) != NULL) { 6746 mblk_t *mp1; 6747 6748 while (mp) { 6749 mp1 = mp->b_next; 6750 mp->b_next = NULL; 6751 freemsg(mp); 6752 mp = mp1; 6753 } 6754 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6755 } 6756 6757 /* 6758 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6759 * indirect them. It also uses so_count as a validity test. 6760 */ 6761 mutex_enter(&so->so_lock); 6762 6763 if (sti->sti_laddr_sa) { 6764 ASSERT((caddr_t)sti->sti_faddr_sa == 6765 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6766 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6767 sti->sti_laddr_valid = 0; 6768 sti->sti_faddr_valid = 0; 6769 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6770 sti->sti_laddr_sa = NULL; 6771 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6772 sti->sti_faddr_sa = NULL; 6773 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6774 } 6775 6776 mutex_exit(&so->so_lock); 6777 6778 if ((mp = sti->sti_eaddr_mp) != NULL) { 6779 freemsg(mp); 6780 sti->sti_eaddr_mp = NULL; 6781 sti->sti_delayed_error = 0; 6782 } 6783 6784 if ((mp = sti->sti_ack_mp) != NULL) { 6785 freemsg(mp); 6786 sti->sti_ack_mp = NULL; 6787 } 6788 6789 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6790 sti->sti_nl7c_rcv_mp = NULL; 6791 freemsg(mp); 6792 } 6793 sti->sti_nl7c_rcv_rval = 0; 6794 if (sti->sti_nl7c_uri != NULL) { 6795 nl7c_urifree(so); 6796 /* urifree() cleared nl7c_uri */ 6797 } 6798 if (sti->sti_nl7c_flags) { 6799 sti->sti_nl7c_flags = 0; 6800 } 6801 6802 ASSERT(sti->sti_ux_bound_vp == NULL); 6803 if ((mp = sti->sti_unbind_mp) != NULL) { 6804 freemsg(mp); 6805 sti->sti_unbind_mp = NULL; 6806 } 6807 } 6808 6809 /* 6810 * Destroys the TPI information attached to a sonode. 6811 */ 6812 static void 6813 sotpi_info_destroy(struct sonode *so) 6814 { 6815 sotpi_info_t *sti = SOTOTPI(so); 6816 6817 i_sotpi_info_destructor(sti); 6818 kmem_free(sti, sizeof (*sti)); 6819 6820 so->so_priv = NULL; 6821 } 6822 6823 /* 6824 * Create the global sotpi socket module entry. It will never be freed. 6825 */ 6826 smod_info_t * 6827 sotpi_smod_create(void) 6828 { 6829 smod_info_t *smodp; 6830 6831 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6832 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6833 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6834 /* 6835 * Initialize the smod_refcnt to 1 so it will never be freed. 6836 */ 6837 smodp->smod_refcnt = 1; 6838 smodp->smod_uc_version = SOCK_UC_VERSION; 6839 smodp->smod_dc_version = SOCK_DC_VERSION; 6840 smodp->smod_sock_create_func = &sotpi_create; 6841 smodp->smod_sock_destroy_func = &sotpi_destroy; 6842 return (smodp); 6843 } 6844