1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/buf.h> 33 #include <sys/conf.h> 34 #include <sys/cred.h> 35 #include <sys/kmem.h> 36 #include <sys/kmem_impl.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/suntpi.h> 51 #include <sys/ddi.h> 52 #include <sys/esunddi.h> 53 #include <sys/flock.h> 54 #include <sys/modctl.h> 55 #include <sys/vtrace.h> 56 #include <sys/cmn_err.h> 57 #include <sys/pathname.h> 58 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <fs/sockfs/sockcommon.h> 85 #include <fs/sockfs/socktpi.h> 86 #include <fs/sockfs/socktpi_impl.h> 87 88 /* 89 * Possible failures when memory can't be allocated. The documented behavior: 90 * 91 * 5.5: 4.X: XNET: 92 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 93 * EINTR 94 * (4.X does not document EINTR but returns it) 95 * bind: ENOSR - ENOBUFS/ENOSR 96 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 97 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 98 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 99 * (4.X getpeername and getsockname do not fail in practice) 100 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 101 * listen: - - ENOBUFS 102 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 105 * EINTR 106 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 107 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 108 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 109 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 110 * 111 * Resolution. When allocation fails: 112 * recv: return EINTR 113 * send: return EINTR 114 * connect, accept: EINTR 115 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 116 * socket, socketpair: ENOBUFS 117 * getpeername, getsockname: sleep 118 * getsockopt, setsockopt: sleep 119 */ 120 121 #ifdef SOCK_TEST 122 /* 123 * Variables that make sockfs do something other than the standard TPI 124 * for the AF_INET transports. 125 * 126 * solisten_tpi_tcp: 127 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 128 * the transport is already bound. This is needed to avoid loosing the 129 * port number should listen() do a T_UNBIND_REQ followed by a 130 * O_T_BIND_REQ. 131 * 132 * soconnect_tpi_udp: 133 * UDP and ICMP can handle a T_CONN_REQ. 134 * This is needed to make the sequence of connect(), getsockname() 135 * return the local IP address used to send packets to the connected to 136 * destination. 137 * 138 * soconnect_tpi_tcp: 139 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 140 * Set this to non-zero to send TPI conformant messages to TCP in this 141 * respect. This is a performance optimization. 142 * 143 * soaccept_tpi_tcp: 144 * TCP can handle a T_CONN_REQ without the acceptor being bound. 145 * This is a performance optimization that has been picked up in XTI. 146 * 147 * soaccept_tpi_multioptions: 148 * When inheriting SOL_SOCKET options from the listener to the accepting 149 * socket send them as a single message for AF_INET{,6}. 150 */ 151 int solisten_tpi_tcp = 0; 152 int soconnect_tpi_udp = 0; 153 int soconnect_tpi_tcp = 0; 154 int soaccept_tpi_tcp = 0; 155 int soaccept_tpi_multioptions = 1; 156 #else /* SOCK_TEST */ 157 #define soconnect_tpi_tcp 0 158 #define soconnect_tpi_udp 0 159 #define solisten_tpi_tcp 0 160 #define soaccept_tpi_tcp 0 161 #define soaccept_tpi_multioptions 1 162 #endif /* SOCK_TEST */ 163 164 #ifdef SOCK_TEST 165 extern int do_useracc; 166 extern clock_t sock_test_timelimit; 167 #endif /* SOCK_TEST */ 168 169 extern uint32_t ucredsize; 170 171 /* 172 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 173 * applications working. Turn on this flag to disable these checks. 174 */ 175 int xnet_skip_checks = 0; 176 int xnet_check_print = 0; 177 int xnet_truncate_print = 0; 178 179 static void sotpi_destroy(struct sonode *); 180 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 181 int, int *, cred_t *cr); 182 183 static boolean_t sotpi_info_create(struct sonode *, int); 184 static void sotpi_info_init(struct sonode *); 185 static void sotpi_info_fini(struct sonode *); 186 static void sotpi_info_destroy(struct sonode *); 187 188 /* 189 * Do direct function call to the transport layer below; this would 190 * also allow the transport to utilize read-side synchronous stream 191 * interface if necessary. This is a /etc/system tunable that must 192 * not be modified on a running system. By default this is enabled 193 * for performance reasons and may be disabled for debugging purposes. 194 */ 195 boolean_t socktpi_direct = B_TRUE; 196 197 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 198 199 extern void sigintr(k_sigset_t *, int); 200 extern void sigunintr(k_sigset_t *); 201 202 static int sotpi_unbind(struct sonode *, int); 203 204 /* TPI sockfs sonode operations */ 205 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 206 int); 207 static int sotpi_accept(struct sonode *, int, struct cred *, 208 struct sonode **); 209 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 210 int, struct cred *); 211 static int sotpi_listen(struct sonode *, int, struct cred *); 212 static int sotpi_connect(struct sonode *, struct sockaddr *, 213 socklen_t, int, int, struct cred *); 214 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 215 struct uio *, struct cred *); 216 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 217 struct uio *, struct cred *); 218 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 219 struct cred *, mblk_t **); 220 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 221 struct uio *, void *, t_uscalar_t, int); 222 static int sodgram_direct(struct sonode *, struct sockaddr *, 223 socklen_t, struct uio *, int); 224 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 225 socklen_t *, boolean_t, struct cred *); 226 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 227 socklen_t *, struct cred *); 228 static int sotpi_shutdown(struct sonode *, int, struct cred *); 229 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 230 socklen_t *, int, struct cred *); 231 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 232 socklen_t, struct cred *); 233 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 234 int32_t *); 235 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 236 struct cred *, int32_t *); 237 static int sotpi_poll(struct sonode *, short, int, short *, 238 struct pollhead **); 239 static int sotpi_close(struct sonode *, int, struct cred *); 240 241 static int i_sotpi_info_constructor(sotpi_info_t *); 242 static void i_sotpi_info_destructor(sotpi_info_t *); 243 244 sonodeops_t sotpi_sonodeops = { 245 sotpi_init, /* sop_init */ 246 sotpi_accept, /* sop_accept */ 247 sotpi_bind, /* sop_bind */ 248 sotpi_listen, /* sop_listen */ 249 sotpi_connect, /* sop_connect */ 250 sotpi_recvmsg, /* sop_recvmsg */ 251 sotpi_sendmsg, /* sop_sendmsg */ 252 sotpi_sendmblk, /* sop_sendmblk */ 253 sotpi_getpeername, /* sop_getpeername */ 254 sotpi_getsockname, /* sop_getsockname */ 255 sotpi_shutdown, /* sop_shutdown */ 256 sotpi_getsockopt, /* sop_getsockopt */ 257 sotpi_setsockopt, /* sop_setsockopt */ 258 sotpi_ioctl, /* sop_ioctl */ 259 sotpi_poll, /* sop_poll */ 260 sotpi_close, /* sop_close */ 261 }; 262 263 /* 264 * Return a TPI socket vnode. 265 * 266 * Note that sockets assume that the driver will clone (either itself 267 * or by using the clone driver) i.e. a socket() call will always 268 * result in a new vnode being created. 269 */ 270 271 /* 272 * Common create code for socket and accept. If tso is set the values 273 * from that node is used instead of issuing a T_INFO_REQ. 274 */ 275 276 /* ARGSUSED */ 277 static struct sonode * 278 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 279 int version, int sflags, int *errorp, cred_t *cr) 280 { 281 struct sonode *so; 282 kmem_cache_t *cp; 283 int sfamily = family; 284 285 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 286 287 if (family == AF_NCA) { 288 /* 289 * The request is for an NCA socket so for NL7C use the 290 * INET domain instead and mark NL7C_AF_NCA below. 291 */ 292 family = AF_INET; 293 /* 294 * NL7C is not supported in the non-global zone, 295 * we enforce this restriction here. 296 */ 297 if (getzoneid() != GLOBAL_ZONEID) { 298 *errorp = ENOTSUP; 299 return (NULL); 300 } 301 } 302 303 /* 304 * to be compatible with old tpi socket implementation ignore 305 * sleep flag (sflags) passed in 306 */ 307 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 308 so = kmem_cache_alloc(cp, KM_SLEEP); 309 if (so == NULL) { 310 *errorp = ENOMEM; 311 return (NULL); 312 } 313 314 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 315 sotpi_info_init(so); 316 317 if (sfamily == AF_NCA) { 318 SOTOTPI(so)->sti_nl7c_flags = NL7C_AF_NCA; 319 } 320 321 if (version == SOV_DEFAULT) 322 version = so_default_version; 323 324 so->so_version = (short)version; 325 *errorp = 0; 326 327 return (so); 328 } 329 330 static void 331 sotpi_destroy(struct sonode *so) 332 { 333 kmem_cache_t *cp; 334 struct sockparams *origsp; 335 336 /* 337 * If there is a new dealloc function (ie. smod_destroy_func), 338 * then it should check the correctness of the ops. 339 */ 340 341 ASSERT(so->so_ops == &sotpi_sonodeops); 342 343 origsp = SOTOTPI(so)->sti_orig_sp; 344 345 sotpi_info_fini(so); 346 347 if (so->so_state & SS_FALLBACK_COMP) { 348 /* 349 * A fallback happend, which means that a sotpi_info_t struct 350 * was allocated (as opposed to being allocated from the TPI 351 * sonode cache. Therefore we explicitly free the struct 352 * here. 353 */ 354 sotpi_info_destroy(so); 355 ASSERT(origsp != NULL); 356 357 origsp->sp_smod_info->smod_sock_destroy_func(so); 358 SOCKPARAMS_DEC_REF(origsp); 359 } else { 360 sonode_fini(so); 361 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 362 socktpi_cache; 363 kmem_cache_free(cp, so); 364 } 365 } 366 367 /* ARGSUSED1 */ 368 int 369 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 370 { 371 major_t maj; 372 dev_t newdev; 373 struct vnode *vp; 374 int error = 0; 375 struct stdata *stp; 376 377 sotpi_info_t *sti = SOTOTPI(so); 378 379 dprint(1, ("sotpi_init()\n")); 380 381 /* 382 * over write the sleep flag passed in but that is ok 383 * as tpi socket does not honor sleep flag. 384 */ 385 flags |= FREAD|FWRITE; 386 387 /* 388 * Record in so_flag that it is a clone. 389 */ 390 if (getmajor(sti->sti_dev) == clone_major) 391 so->so_flag |= SOCLONE; 392 393 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 394 (so->so_family == AF_INET || so->so_family == AF_INET6) && 395 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 396 so->so_protocol == IPPROTO_IP)) { 397 /* Tell tcp or udp that it's talking to sockets */ 398 flags |= SO_SOCKSTR; 399 400 /* 401 * Here we indicate to socktpi_open() our attempt to 402 * make direct calls between sockfs and transport. 403 * The final decision is left to socktpi_open(). 404 */ 405 sti->sti_direct = 1; 406 407 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 408 if (so->so_type == SOCK_STREAM && tso != NULL) { 409 if (SOTOTPI(tso)->sti_direct) { 410 /* 411 * Inherit sti_direct from listener and pass 412 * SO_ACCEPTOR open flag to tcp, indicating 413 * that this is an accept fast-path instance. 414 */ 415 flags |= SO_ACCEPTOR; 416 } else { 417 /* 418 * sti_direct is not set on listener, meaning 419 * that the listener has been converted from 420 * a socket to a stream. Ensure that the 421 * acceptor inherits these settings. 422 */ 423 sti->sti_direct = 0; 424 flags &= ~SO_SOCKSTR; 425 } 426 } 427 } 428 429 /* 430 * Tell local transport that it is talking to sockets. 431 */ 432 if (so->so_family == AF_UNIX) { 433 flags |= SO_SOCKSTR; 434 } 435 436 vp = SOTOV(so); 437 newdev = vp->v_rdev; 438 maj = getmajor(newdev); 439 ASSERT(STREAMSTAB(maj)); 440 441 error = stropen(vp, &newdev, flags, cr); 442 443 stp = vp->v_stream; 444 if (error == 0) { 445 if (so->so_flag & SOCLONE) 446 ASSERT(newdev != vp->v_rdev); 447 mutex_enter(&so->so_lock); 448 sti->sti_dev = newdev; 449 vp->v_rdev = newdev; 450 mutex_exit(&so->so_lock); 451 452 if (stp->sd_flag & STRISTTY) { 453 /* 454 * this is a post SVR4 tty driver - a socket can not 455 * be a controlling terminal. Fail the open. 456 */ 457 (void) sotpi_close(so, flags, cr); 458 return (ENOTTY); /* XXX */ 459 } 460 461 ASSERT(stp->sd_wrq != NULL); 462 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 463 464 /* 465 * If caller is interested in doing direct function call 466 * interface to/from transport module, probe the module 467 * directly beneath the streamhead to see if it qualifies. 468 * 469 * We turn off the direct interface when qualifications fail. 470 * In the acceptor case, we simply turn off the sti_direct 471 * flag on the socket. We do the fallback after the accept 472 * has completed, before the new socket is returned to the 473 * application. 474 */ 475 if (sti->sti_direct) { 476 queue_t *tq = stp->sd_wrq->q_next; 477 478 /* 479 * sti_direct is currently supported and tested 480 * only for tcp/udp; this is the main reason to 481 * have the following assertions. 482 */ 483 ASSERT(so->so_family == AF_INET || 484 so->so_family == AF_INET6); 485 ASSERT(so->so_protocol == IPPROTO_UDP || 486 so->so_protocol == IPPROTO_TCP || 487 so->so_protocol == IPPROTO_IP); 488 ASSERT(so->so_type == SOCK_DGRAM || 489 so->so_type == SOCK_STREAM); 490 491 /* 492 * Abort direct call interface if the module directly 493 * underneath the stream head is not defined with the 494 * _D_DIRECT flag. This could happen in the tcp or 495 * udp case, when some other module is autopushed 496 * above it, or for some reasons the expected module 497 * isn't purely D_MP (which is the main requirement). 498 */ 499 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 500 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 501 int rval; 502 503 /* Continue on without direct calls */ 504 sti->sti_direct = 0; 505 506 /* 507 * Cannot issue ioctl on fallback socket since 508 * there is no conn associated with the queue. 509 * The fallback downcall will notify the proto 510 * of the change. 511 */ 512 if (!(flags & SO_ACCEPTOR) && 513 !(flags & SO_FALLBACK)) { 514 if ((error = strioctl(vp, 515 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 516 cr, &rval)) != 0) { 517 (void) sotpi_close(so, flags, 518 cr); 519 return (error); 520 } 521 } 522 } 523 } 524 525 if (flags & SO_FALLBACK) { 526 /* 527 * The stream created does not have a conn. 528 * do stream set up after conn has been assigned 529 */ 530 return (error); 531 } 532 if (error = so_strinit(so, tso)) { 533 (void) sotpi_close(so, flags, cr); 534 return (error); 535 } 536 537 /* Wildcard */ 538 if (so->so_protocol != so->so_sockparams->sp_protocol) { 539 int protocol = so->so_protocol; 540 /* 541 * Issue SO_PROTOTYPE setsockopt. 542 */ 543 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 544 &protocol, (t_uscalar_t)sizeof (protocol), cr); 545 if (error != 0) { 546 (void) sotpi_close(so, flags, cr); 547 /* 548 * Setsockopt often fails with ENOPROTOOPT but 549 * socket() should fail with 550 * EPROTONOSUPPORT/EPROTOTYPE. 551 */ 552 return (EPROTONOSUPPORT); 553 } 554 } 555 556 } else { 557 /* 558 * While the same socket can not be reopened (unlike specfs) 559 * the stream head sets STREOPENFAIL when the autopush fails. 560 */ 561 if ((stp != NULL) && 562 (stp->sd_flag & STREOPENFAIL)) { 563 /* 564 * Open failed part way through. 565 */ 566 mutex_enter(&stp->sd_lock); 567 stp->sd_flag &= ~STREOPENFAIL; 568 mutex_exit(&stp->sd_lock); 569 (void) sotpi_close(so, flags, cr); 570 return (error); 571 /*NOTREACHED*/ 572 } 573 ASSERT(stp == NULL); 574 } 575 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 576 "sockfs open:maj %d vp %p so %p error %d", 577 maj, vp, so, error); 578 return (error); 579 } 580 581 /* 582 * Bind the socket to an unspecified address in sockfs only. 583 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 584 * required in all cases. 585 */ 586 static void 587 so_automatic_bind(struct sonode *so) 588 { 589 sotpi_info_t *sti = SOTOTPI(so); 590 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 591 592 ASSERT(MUTEX_HELD(&so->so_lock)); 593 ASSERT(!(so->so_state & SS_ISBOUND)); 594 ASSERT(sti->sti_unbind_mp); 595 596 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 597 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 598 sti->sti_laddr_sa->sa_family = so->so_family; 599 so->so_state |= SS_ISBOUND; 600 } 601 602 603 /* 604 * bind the socket. 605 * 606 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 607 * are passed in we allow rebinding. Note that for backwards compatibility 608 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 609 * Thus the rebinding code is currently not executed. 610 * 611 * The constraints for rebinding are: 612 * - it is a SOCK_DGRAM, or 613 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 614 * and no listen() has been done. 615 * This rebinding code was added based on some language in the XNET book 616 * about not returning EINVAL it the protocol allows rebinding. However, 617 * this language is not present in the Posix socket draft. Thus maybe the 618 * rebinding logic should be deleted from the source. 619 * 620 * A null "name" can be used to unbind the socket if: 621 * - it is a SOCK_DGRAM, or 622 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 623 * and no listen() has been done. 624 */ 625 /* ARGSUSED */ 626 static int 627 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 628 socklen_t namelen, int backlog, int flags, struct cred *cr) 629 { 630 struct T_bind_req bind_req; 631 struct T_bind_ack *bind_ack; 632 int error = 0; 633 mblk_t *mp; 634 void *addr; 635 t_uscalar_t addrlen; 636 int unbind_on_err = 1; 637 boolean_t clear_acceptconn_on_err = B_FALSE; 638 boolean_t restore_backlog_on_err = B_FALSE; 639 int save_so_backlog; 640 t_scalar_t PRIM_type = O_T_BIND_REQ; 641 boolean_t tcp_udp_xport; 642 void *nl7c = NULL; 643 sotpi_info_t *sti = SOTOTPI(so); 644 645 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 646 (void *)so, (void *)name, namelen, backlog, flags, 647 pr_state(so->so_state, so->so_mode))); 648 649 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 650 651 if (!(flags & _SOBIND_LOCK_HELD)) { 652 mutex_enter(&so->so_lock); 653 so_lock_single(so); /* Set SOLOCKED */ 654 } else { 655 ASSERT(MUTEX_HELD(&so->so_lock)); 656 ASSERT(so->so_flag & SOLOCKED); 657 } 658 659 /* 660 * Make sure that there is a preallocated unbind_req message 661 * before binding. This message allocated when the socket is 662 * created but it might be have been consumed. 663 */ 664 if (sti->sti_unbind_mp == NULL) { 665 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 666 /* NOTE: holding so_lock while sleeping */ 667 sti->sti_unbind_mp = 668 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 669 cr); 670 } 671 672 if (flags & _SOBIND_REBIND) { 673 /* 674 * Called from solisten after doing an sotpi_unbind() or 675 * potentially without the unbind (latter for AF_INET{,6}). 676 */ 677 ASSERT(name == NULL && namelen == 0); 678 679 if (so->so_family == AF_UNIX) { 680 ASSERT(sti->sti_ux_bound_vp); 681 addr = &sti->sti_ux_laddr; 682 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 683 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 684 "addr 0x%p, vp %p\n", 685 addrlen, 686 (void *)((struct so_ux_addr *)addr)->soua_vp, 687 (void *)sti->sti_ux_bound_vp)); 688 } else { 689 addr = sti->sti_laddr_sa; 690 addrlen = (t_uscalar_t)sti->sti_laddr_len; 691 } 692 } else if (flags & _SOBIND_UNSPEC) { 693 ASSERT(name == NULL && namelen == 0); 694 695 /* 696 * The caller checked SS_ISBOUND but not necessarily 697 * under so_lock 698 */ 699 if (so->so_state & SS_ISBOUND) { 700 /* No error */ 701 goto done; 702 } 703 704 /* Set an initial local address */ 705 switch (so->so_family) { 706 case AF_UNIX: 707 /* 708 * Use an address with same size as struct sockaddr 709 * just like BSD. 710 */ 711 sti->sti_laddr_len = 712 (socklen_t)sizeof (struct sockaddr); 713 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 714 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 715 sti->sti_laddr_sa->sa_family = so->so_family; 716 717 /* 718 * Pass down an address with the implicit bind 719 * magic number and the rest all zeros. 720 * The transport will return a unique address. 721 */ 722 sti->sti_ux_laddr.soua_vp = NULL; 723 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 724 addr = &sti->sti_ux_laddr; 725 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 726 break; 727 728 case AF_INET: 729 case AF_INET6: 730 /* 731 * An unspecified bind in TPI has a NULL address. 732 * Set the address in sockfs to have the sa_family. 733 */ 734 sti->sti_laddr_len = (so->so_family == AF_INET) ? 735 (socklen_t)sizeof (sin_t) : 736 (socklen_t)sizeof (sin6_t); 737 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 738 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 739 sti->sti_laddr_sa->sa_family = so->so_family; 740 addr = NULL; 741 addrlen = 0; 742 break; 743 744 default: 745 /* 746 * An unspecified bind in TPI has a NULL address. 747 * Set the address in sockfs to be zero length. 748 * 749 * Can not assume there is a sa_family for all 750 * protocol families. For example, AF_X25 does not 751 * have a family field. 752 */ 753 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 754 sti->sti_laddr_len = 0; /* XXX correct? */ 755 addr = NULL; 756 addrlen = 0; 757 break; 758 } 759 760 } else { 761 if (so->so_state & SS_ISBOUND) { 762 /* 763 * If it is ok to rebind the socket, first unbind 764 * with the transport. A rebind to the NULL address 765 * is interpreted as an unbind. 766 * Note that a bind to NULL in BSD does unbind the 767 * socket but it fails with EINVAL. 768 * Note that regular sockets set SOV_SOCKBSD i.e. 769 * _SOBIND_SOCKBSD gets set here hence no type of 770 * socket does currently allow rebinding. 771 * 772 * If the name is NULL just do an unbind. 773 */ 774 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 775 name != NULL) { 776 error = EINVAL; 777 unbind_on_err = 0; 778 eprintsoline(so, error); 779 goto done; 780 } 781 if ((so->so_mode & SM_CONNREQUIRED) && 782 (so->so_state & SS_CANTREBIND)) { 783 error = EINVAL; 784 unbind_on_err = 0; 785 eprintsoline(so, error); 786 goto done; 787 } 788 error = sotpi_unbind(so, 0); 789 if (error) { 790 eprintsoline(so, error); 791 goto done; 792 } 793 ASSERT(!(so->so_state & SS_ISBOUND)); 794 if (name == NULL) { 795 so->so_state &= 796 ~(SS_ISCONNECTED|SS_ISCONNECTING); 797 goto done; 798 } 799 } 800 801 /* X/Open requires this check */ 802 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 803 if (xnet_check_print) { 804 printf("sockfs: X/Open bind state check " 805 "caused EINVAL\n"); 806 } 807 error = EINVAL; 808 goto done; 809 } 810 811 switch (so->so_family) { 812 case AF_UNIX: 813 /* 814 * All AF_UNIX addresses are nul terminated 815 * when copied (copyin_name) in so the minimum 816 * length is 3 bytes. 817 */ 818 if (name == NULL || 819 (ssize_t)namelen <= sizeof (short) + 1) { 820 error = EISDIR; 821 eprintsoline(so, error); 822 goto done; 823 } 824 /* 825 * Verify so_family matches the bound family. 826 * BSD does not check this for AF_UNIX resulting 827 * in funny mknods. 828 */ 829 if (name->sa_family != so->so_family) { 830 error = EAFNOSUPPORT; 831 goto done; 832 } 833 break; 834 case AF_INET: 835 if (name == NULL) { 836 error = EINVAL; 837 eprintsoline(so, error); 838 goto done; 839 } 840 if ((size_t)namelen != sizeof (sin_t)) { 841 error = name->sa_family != so->so_family ? 842 EAFNOSUPPORT : EINVAL; 843 eprintsoline(so, error); 844 goto done; 845 } 846 if ((flags & _SOBIND_XPG4_2) && 847 (name->sa_family != so->so_family)) { 848 /* 849 * This check has to be made for X/Open 850 * sockets however application failures have 851 * been observed when it is applied to 852 * all sockets. 853 */ 854 error = EAFNOSUPPORT; 855 eprintsoline(so, error); 856 goto done; 857 } 858 /* 859 * Force a zero sa_family to match so_family. 860 * 861 * Some programs like inetd(1M) don't set the 862 * family field. Other programs leave 863 * sin_family set to garbage - SunOS 4.X does 864 * not check the family field on a bind. 865 * We use the family field that 866 * was passed in to the socket() call. 867 */ 868 name->sa_family = so->so_family; 869 break; 870 871 case AF_INET6: { 872 #ifdef DEBUG 873 sin6_t *sin6 = (sin6_t *)name; 874 #endif /* DEBUG */ 875 876 if (name == NULL) { 877 error = EINVAL; 878 eprintsoline(so, error); 879 goto done; 880 } 881 if ((size_t)namelen != sizeof (sin6_t)) { 882 error = name->sa_family != so->so_family ? 883 EAFNOSUPPORT : EINVAL; 884 eprintsoline(so, error); 885 goto done; 886 } 887 if (name->sa_family != so->so_family) { 888 /* 889 * With IPv6 we require the family to match 890 * unlike in IPv4. 891 */ 892 error = EAFNOSUPPORT; 893 eprintsoline(so, error); 894 goto done; 895 } 896 #ifdef DEBUG 897 /* 898 * Verify that apps don't forget to clear 899 * sin6_scope_id etc 900 */ 901 if (sin6->sin6_scope_id != 0 && 902 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 903 zcmn_err(getzoneid(), CE_WARN, 904 "bind with uninitialized sin6_scope_id " 905 "(%d) on socket. Pid = %d\n", 906 (int)sin6->sin6_scope_id, 907 (int)curproc->p_pid); 908 } 909 if (sin6->__sin6_src_id != 0) { 910 zcmn_err(getzoneid(), CE_WARN, 911 "bind with uninitialized __sin6_src_id " 912 "(%d) on socket. Pid = %d\n", 913 (int)sin6->__sin6_src_id, 914 (int)curproc->p_pid); 915 } 916 #endif /* DEBUG */ 917 break; 918 } 919 default: 920 /* 921 * Don't do any length or sa_family check to allow 922 * non-sockaddr style addresses. 923 */ 924 if (name == NULL) { 925 error = EINVAL; 926 eprintsoline(so, error); 927 goto done; 928 } 929 break; 930 } 931 932 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 933 error = ENAMETOOLONG; 934 eprintsoline(so, error); 935 goto done; 936 } 937 /* 938 * Save local address. 939 */ 940 sti->sti_laddr_len = (socklen_t)namelen; 941 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 942 bcopy(name, sti->sti_laddr_sa, namelen); 943 944 addr = sti->sti_laddr_sa; 945 addrlen = (t_uscalar_t)sti->sti_laddr_len; 946 switch (so->so_family) { 947 case AF_INET6: 948 case AF_INET: 949 break; 950 case AF_UNIX: { 951 struct sockaddr_un *soun = 952 (struct sockaddr_un *)sti->sti_laddr_sa; 953 struct vnode *vp, *rvp; 954 struct vattr vattr; 955 956 ASSERT(sti->sti_ux_bound_vp == NULL); 957 /* 958 * Create vnode for the specified path name. 959 * Keep vnode held with a reference in sti_ux_bound_vp. 960 * Use the vnode pointer as the address used in the 961 * bind with the transport. 962 * 963 * Use the same mode as in BSD. In particular this does 964 * not observe the umask. 965 */ 966 /* MAXPATHLEN + soun_family + nul termination */ 967 if (sti->sti_laddr_len > 968 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 969 error = ENAMETOOLONG; 970 eprintsoline(so, error); 971 goto done; 972 } 973 vattr.va_type = VSOCK; 974 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 975 vattr.va_mask = AT_TYPE|AT_MODE; 976 /* NOTE: holding so_lock */ 977 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 978 EXCL, 0, &vp, CRMKNOD, 0, 0); 979 if (error) { 980 if (error == EEXIST) 981 error = EADDRINUSE; 982 eprintsoline(so, error); 983 goto done; 984 } 985 /* 986 * Establish pointer from the underlying filesystem 987 * vnode to the socket node. 988 * sti_ux_bound_vp and v_stream->sd_vnode form the 989 * cross-linkage between the underlying filesystem 990 * node and the socket node. 991 */ 992 993 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 994 VN_HOLD(rvp); 995 VN_RELE(vp); 996 vp = rvp; 997 } 998 999 ASSERT(SOTOV(so)->v_stream); 1000 mutex_enter(&vp->v_lock); 1001 vp->v_stream = SOTOV(so)->v_stream; 1002 sti->sti_ux_bound_vp = vp; 1003 mutex_exit(&vp->v_lock); 1004 1005 /* 1006 * Use the vnode pointer value as a unique address 1007 * (together with the magic number to avoid conflicts 1008 * with implicit binds) in the transport provider. 1009 */ 1010 sti->sti_ux_laddr.soua_vp = 1011 (void *)sti->sti_ux_bound_vp; 1012 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1013 addr = &sti->sti_ux_laddr; 1014 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1015 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1016 addrlen, 1017 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1018 break; 1019 } 1020 } /* end switch (so->so_family) */ 1021 } 1022 1023 /* 1024 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1025 * the transport can start passing up T_CONN_IND messages 1026 * as soon as it receives the bind req and strsock_proto() 1027 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1028 */ 1029 if (flags & _SOBIND_LISTEN) { 1030 if ((so->so_state & SS_ACCEPTCONN) == 0) 1031 clear_acceptconn_on_err = B_TRUE; 1032 save_so_backlog = so->so_backlog; 1033 restore_backlog_on_err = B_TRUE; 1034 so->so_state |= SS_ACCEPTCONN; 1035 so->so_backlog = backlog; 1036 } 1037 1038 /* 1039 * If NL7C addr(s) have been configured check for addr/port match, 1040 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 1041 * 1042 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 1043 * family sockets only. If match mark as such. 1044 */ 1045 if (nl7c_enabled && ((addr != NULL && 1046 (so->so_family == AF_INET || so->so_family == AF_INET6) && 1047 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 1048 sti->sti_nl7c_flags == NL7C_AF_NCA)) { 1049 /* 1050 * NL7C is not supported in non-global zones, 1051 * we enforce this restriction here. 1052 */ 1053 if (so->so_zoneid == GLOBAL_ZONEID) { 1054 /* An NL7C socket, mark it */ 1055 sti->sti_nl7c_flags |= NL7C_ENABLED; 1056 if (nl7c == NULL) { 1057 /* 1058 * Was an AF_NCA bind() so add it to the 1059 * addr list for reporting purposes. 1060 */ 1061 nl7c = nl7c_add_addr(addr, addrlen); 1062 } 1063 } else 1064 nl7c = NULL; 1065 } 1066 1067 /* 1068 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1069 * for other transports we will send in a O_T_BIND_REQ. 1070 */ 1071 if (tcp_udp_xport && 1072 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1073 PRIM_type = T_BIND_REQ; 1074 1075 bind_req.PRIM_type = PRIM_type; 1076 bind_req.ADDR_length = addrlen; 1077 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1078 bind_req.CONIND_number = backlog; 1079 /* NOTE: holding so_lock while sleeping */ 1080 mp = soallocproto2(&bind_req, sizeof (bind_req), 1081 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1082 sti->sti_laddr_valid = 0; 1083 1084 /* Done using sti_laddr_sa - can drop the lock */ 1085 mutex_exit(&so->so_lock); 1086 1087 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1088 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1089 if (error) { 1090 eprintsoline(so, error); 1091 mutex_enter(&so->so_lock); 1092 goto done; 1093 } 1094 1095 mutex_enter(&so->so_lock); 1096 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1097 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1098 if (error) { 1099 eprintsoline(so, error); 1100 goto done; 1101 } 1102 ASSERT(mp); 1103 /* 1104 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1105 * strsock_proto while the lock was dropped above, the bind 1106 * is allowed to complete. 1107 */ 1108 1109 /* Mark as bound. This will be undone if we detect errors below. */ 1110 if (flags & _SOBIND_NOXLATE) { 1111 ASSERT(so->so_family == AF_UNIX); 1112 sti->sti_faddr_noxlate = 1; 1113 } 1114 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1115 so->so_state |= SS_ISBOUND; 1116 ASSERT(sti->sti_unbind_mp); 1117 1118 /* note that we've already set SS_ACCEPTCONN above */ 1119 1120 /* 1121 * Recompute addrlen - an unspecied bind sent down an 1122 * address of length zero but we expect the appropriate length 1123 * in return. 1124 */ 1125 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1126 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1127 1128 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1129 /* 1130 * The alignment restriction is really too strict but 1131 * we want enough alignment to inspect the fields of 1132 * a sockaddr_in. 1133 */ 1134 addr = sogetoff(mp, bind_ack->ADDR_offset, 1135 bind_ack->ADDR_length, 1136 __TPI_ALIGN_SIZE); 1137 if (addr == NULL) { 1138 freemsg(mp); 1139 error = EPROTO; 1140 eprintsoline(so, error); 1141 goto done; 1142 } 1143 if (!(flags & _SOBIND_UNSPEC)) { 1144 /* 1145 * Verify that the transport didn't return something we 1146 * did not want e.g. an address other than what we asked for. 1147 * 1148 * NOTE: These checks would go away if/when we switch to 1149 * using the new TPI (in which the transport would fail 1150 * the request instead of assigning a different address). 1151 * 1152 * NOTE2: For protocols that we don't know (i.e. any 1153 * other than AF_INET6, AF_INET and AF_UNIX), we 1154 * cannot know if the transport should be expected to 1155 * return the same address as that requested. 1156 * 1157 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1158 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1159 * 1160 * For example, in the case of netatalk it may be 1161 * inappropriate for the transport to return the 1162 * requested address (as it may have allocated a local 1163 * port number in behaviour similar to that of an 1164 * AF_INET bind request with a port number of zero). 1165 * 1166 * Given the definition of O_T_BIND_REQ, where the 1167 * transport may bind to an address other than the 1168 * requested address, it's not possible to determine 1169 * whether a returned address that differs from the 1170 * requested address is a reason to fail (because the 1171 * requested address was not available) or succeed 1172 * (because the transport allocated an appropriate 1173 * address and/or port). 1174 * 1175 * sockfs currently requires that the transport return 1176 * the requested address in the T_BIND_ACK, unless 1177 * there is code here to allow for any discrepancy. 1178 * Such code exists for AF_INET and AF_INET6. 1179 * 1180 * Netatalk chooses to return the requested address 1181 * rather than the (correct) allocated address. This 1182 * means that netatalk violates the TPI specification 1183 * (and would not function correctly if used from a 1184 * TLI application), but it does mean that it works 1185 * with sockfs. 1186 * 1187 * As noted above, using the newer XTI bind primitive 1188 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1189 * allow sockfs to be more sure about whether or not 1190 * the bind request had succeeded (as transports are 1191 * not permitted to bind to a different address than 1192 * that requested - they must return failure). 1193 * Unfortunately, support for T_BIND_REQ may not be 1194 * present in all transport implementations (netatalk, 1195 * for example, doesn't have it), making the 1196 * transition difficult. 1197 */ 1198 if (bind_ack->ADDR_length != addrlen) { 1199 /* Assumes that the requested address was in use */ 1200 freemsg(mp); 1201 error = EADDRINUSE; 1202 eprintsoline(so, error); 1203 goto done; 1204 } 1205 1206 switch (so->so_family) { 1207 case AF_INET6: 1208 case AF_INET: { 1209 sin_t *rname, *aname; 1210 1211 rname = (sin_t *)addr; 1212 aname = (sin_t *)sti->sti_laddr_sa; 1213 1214 /* 1215 * Take advantage of the alignment 1216 * of sin_port and sin6_port which fall 1217 * in the same place in their data structures. 1218 * Just use sin_port for either address family. 1219 * 1220 * This may become a problem if (heaven forbid) 1221 * there's a separate ipv6port_reserved... :-P 1222 * 1223 * Binding to port 0 has the semantics of letting 1224 * the transport bind to any port. 1225 * 1226 * If the transport is TCP or UDP since we had sent 1227 * a T_BIND_REQ we would not get a port other than 1228 * what we asked for. 1229 */ 1230 if (tcp_udp_xport) { 1231 /* 1232 * Pick up the new port number if we bound to 1233 * port 0. 1234 */ 1235 if (aname->sin_port == 0) 1236 aname->sin_port = rname->sin_port; 1237 sti->sti_laddr_valid = 1; 1238 break; 1239 } 1240 if (aname->sin_port != 0 && 1241 aname->sin_port != rname->sin_port) { 1242 freemsg(mp); 1243 error = EADDRINUSE; 1244 eprintsoline(so, error); 1245 goto done; 1246 } 1247 /* 1248 * Pick up the new port number if we bound to port 0. 1249 */ 1250 aname->sin_port = rname->sin_port; 1251 1252 /* 1253 * Unfortunately, addresses aren't _quite_ the same. 1254 */ 1255 if (so->so_family == AF_INET) { 1256 if (aname->sin_addr.s_addr != 1257 rname->sin_addr.s_addr) { 1258 freemsg(mp); 1259 error = EADDRNOTAVAIL; 1260 eprintsoline(so, error); 1261 goto done; 1262 } 1263 } else { 1264 sin6_t *rname6 = (sin6_t *)rname; 1265 sin6_t *aname6 = (sin6_t *)aname; 1266 1267 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1268 &rname6->sin6_addr)) { 1269 freemsg(mp); 1270 error = EADDRNOTAVAIL; 1271 eprintsoline(so, error); 1272 goto done; 1273 } 1274 } 1275 break; 1276 } 1277 case AF_UNIX: 1278 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1279 freemsg(mp); 1280 error = EADDRINUSE; 1281 eprintsoline(so, error); 1282 eprintso(so, 1283 ("addrlen %d, addr 0x%x, vp %p\n", 1284 addrlen, *((int *)addr), 1285 (void *)sti->sti_ux_bound_vp)); 1286 goto done; 1287 } 1288 sti->sti_laddr_valid = 1; 1289 break; 1290 default: 1291 /* 1292 * NOTE: This assumes that addresses can be 1293 * byte-compared for equivalence. 1294 */ 1295 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1296 freemsg(mp); 1297 error = EADDRINUSE; 1298 eprintsoline(so, error); 1299 goto done; 1300 } 1301 /* 1302 * Don't mark sti_laddr_valid, as we cannot be 1303 * sure that the returned address is the real 1304 * bound address when talking to an unknown 1305 * transport. 1306 */ 1307 break; 1308 } 1309 } else { 1310 /* 1311 * Save for returned address for getsockname. 1312 * Needed for unspecific bind unless transport supports 1313 * the TI_GETMYNAME ioctl. 1314 * Do this for AF_INET{,6} even though they do, as 1315 * caching info here is much better performance than 1316 * a TPI/STREAMS trip to the transport for getsockname. 1317 * Any which can't for some reason _must_ _not_ set 1318 * sti_laddr_valid here for the caching version of 1319 * getsockname to not break; 1320 */ 1321 switch (so->so_family) { 1322 case AF_UNIX: 1323 /* 1324 * Record the address bound with the transport 1325 * for use by socketpair. 1326 */ 1327 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1328 sti->sti_laddr_valid = 1; 1329 break; 1330 case AF_INET: 1331 case AF_INET6: 1332 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1333 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1334 sti->sti_laddr_valid = 1; 1335 break; 1336 default: 1337 /* 1338 * Don't mark sti_laddr_valid, as we cannot be 1339 * sure that the returned address is the real 1340 * bound address when talking to an unknown 1341 * transport. 1342 */ 1343 break; 1344 } 1345 } 1346 1347 if (nl7c != NULL) { 1348 /* Register listen()er sonode pointer with NL7C */ 1349 nl7c_listener_addr(nl7c, so); 1350 } 1351 1352 freemsg(mp); 1353 1354 done: 1355 if (error) { 1356 /* reset state & backlog to values held on entry */ 1357 if (clear_acceptconn_on_err == B_TRUE) 1358 so->so_state &= ~SS_ACCEPTCONN; 1359 if (restore_backlog_on_err == B_TRUE) 1360 so->so_backlog = save_so_backlog; 1361 1362 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1363 int err; 1364 1365 err = sotpi_unbind(so, 0); 1366 /* LINTED - statement has no consequent: if */ 1367 if (err) { 1368 eprintsoline(so, error); 1369 } else { 1370 ASSERT(!(so->so_state & SS_ISBOUND)); 1371 } 1372 } 1373 } 1374 if (!(flags & _SOBIND_LOCK_HELD)) { 1375 so_unlock_single(so, SOLOCKED); 1376 mutex_exit(&so->so_lock); 1377 } else { 1378 ASSERT(MUTEX_HELD(&so->so_lock)); 1379 ASSERT(so->so_flag & SOLOCKED); 1380 } 1381 return (error); 1382 } 1383 1384 /* bind the socket */ 1385 static int 1386 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1387 int flags, struct cred *cr) 1388 { 1389 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1390 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1391 1392 flags &= ~_SOBIND_SOCKETPAIR; 1393 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1394 } 1395 1396 /* 1397 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1398 * address, or when listen needs to unbind and bind. 1399 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1400 * so that a sobind can pick them up. 1401 */ 1402 static int 1403 sotpi_unbind(struct sonode *so, int flags) 1404 { 1405 struct T_unbind_req unbind_req; 1406 int error = 0; 1407 mblk_t *mp; 1408 sotpi_info_t *sti = SOTOTPI(so); 1409 1410 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1411 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1412 1413 ASSERT(MUTEX_HELD(&so->so_lock)); 1414 ASSERT(so->so_flag & SOLOCKED); 1415 1416 if (!(so->so_state & SS_ISBOUND)) { 1417 error = EINVAL; 1418 eprintsoline(so, error); 1419 goto done; 1420 } 1421 1422 mutex_exit(&so->so_lock); 1423 1424 /* 1425 * Flush the read and write side (except stream head read queue) 1426 * and send down T_UNBIND_REQ. 1427 */ 1428 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1429 1430 unbind_req.PRIM_type = T_UNBIND_REQ; 1431 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1432 0, _ALLOC_SLEEP, CRED()); 1433 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1434 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1435 mutex_enter(&so->so_lock); 1436 if (error) { 1437 eprintsoline(so, error); 1438 goto done; 1439 } 1440 1441 error = sowaitokack(so, T_UNBIND_REQ); 1442 if (error) { 1443 eprintsoline(so, error); 1444 goto done; 1445 } 1446 1447 /* 1448 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1449 * strsock_proto while the lock was dropped above, the unbind 1450 * is allowed to complete. 1451 */ 1452 if (!(flags & _SOUNBIND_REBIND)) { 1453 /* 1454 * Clear out bound address. 1455 */ 1456 vnode_t *vp; 1457 1458 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1459 sti->sti_ux_bound_vp = NULL; 1460 vn_rele_stream(vp); 1461 } 1462 /* Clear out address */ 1463 sti->sti_laddr_len = 0; 1464 } 1465 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1466 sti->sti_laddr_valid = 0; 1467 1468 done: 1469 1470 /* If the caller held the lock don't release it here */ 1471 ASSERT(MUTEX_HELD(&so->so_lock)); 1472 ASSERT(so->so_flag & SOLOCKED); 1473 1474 return (error); 1475 } 1476 1477 /* 1478 * listen on the socket. 1479 * For TPI conforming transports this has to first unbind with the transport 1480 * and then bind again using the new backlog. 1481 */ 1482 /* ARGSUSED */ 1483 int 1484 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1485 { 1486 int error = 0; 1487 sotpi_info_t *sti = SOTOTPI(so); 1488 1489 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1490 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1491 1492 if (sti->sti_serv_type == T_CLTS) 1493 return (EOPNOTSUPP); 1494 1495 /* 1496 * If the socket is ready to accept connections already, then 1497 * return without doing anything. This avoids a problem where 1498 * a second listen() call fails if a connection is pending and 1499 * leaves the socket unbound. Only when we are not unbinding 1500 * with the transport can we safely increase the backlog. 1501 */ 1502 if (so->so_state & SS_ACCEPTCONN && 1503 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1504 /*CONSTCOND*/ 1505 !solisten_tpi_tcp)) 1506 return (0); 1507 1508 if (so->so_state & SS_ISCONNECTED) 1509 return (EINVAL); 1510 1511 mutex_enter(&so->so_lock); 1512 so_lock_single(so); /* Set SOLOCKED */ 1513 1514 /* 1515 * If the listen doesn't change the backlog we do nothing. 1516 * This avoids an EPROTO error from the transport. 1517 */ 1518 if ((so->so_state & SS_ACCEPTCONN) && 1519 so->so_backlog == backlog) 1520 goto done; 1521 1522 if (!(so->so_state & SS_ISBOUND)) { 1523 /* 1524 * Must have been explicitly bound in the UNIX domain. 1525 */ 1526 if (so->so_family == AF_UNIX) { 1527 error = EINVAL; 1528 goto done; 1529 } 1530 error = sotpi_bindlisten(so, NULL, 0, backlog, 1531 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1532 } else if (backlog > 0) { 1533 /* 1534 * AF_INET{,6} hack to avoid losing the port. 1535 * Assumes that all AF_INET{,6} transports can handle a 1536 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1537 * has already bound thus it is possible to avoid the unbind. 1538 */ 1539 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1540 /*CONSTCOND*/ 1541 !solisten_tpi_tcp)) { 1542 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1543 if (error) 1544 goto done; 1545 } 1546 error = sotpi_bindlisten(so, NULL, 0, backlog, 1547 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1548 } else { 1549 so->so_state |= SS_ACCEPTCONN; 1550 so->so_backlog = backlog; 1551 } 1552 if (error) 1553 goto done; 1554 ASSERT(so->so_state & SS_ACCEPTCONN); 1555 done: 1556 so_unlock_single(so, SOLOCKED); 1557 mutex_exit(&so->so_lock); 1558 return (error); 1559 } 1560 1561 /* 1562 * Disconnect either a specified seqno or all (-1). 1563 * The former is used on listening sockets only. 1564 * 1565 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1566 * the current use of sodisconnect(seqno == -1) is only for shutdown 1567 * so there is no point (and potentially incorrect) to unbind. 1568 */ 1569 static int 1570 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1571 { 1572 struct T_discon_req discon_req; 1573 int error = 0; 1574 mblk_t *mp; 1575 1576 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1577 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1578 1579 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1580 mutex_enter(&so->so_lock); 1581 so_lock_single(so); /* Set SOLOCKED */ 1582 } else { 1583 ASSERT(MUTEX_HELD(&so->so_lock)); 1584 ASSERT(so->so_flag & SOLOCKED); 1585 } 1586 1587 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1588 error = EINVAL; 1589 eprintsoline(so, error); 1590 goto done; 1591 } 1592 1593 mutex_exit(&so->so_lock); 1594 /* 1595 * Flush the write side (unless this is a listener) 1596 * and then send down a T_DISCON_REQ. 1597 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1598 * and other messages.) 1599 */ 1600 if (!(so->so_state & SS_ACCEPTCONN)) 1601 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1602 1603 discon_req.PRIM_type = T_DISCON_REQ; 1604 discon_req.SEQ_number = seqno; 1605 mp = soallocproto1(&discon_req, sizeof (discon_req), 1606 0, _ALLOC_SLEEP, CRED()); 1607 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1608 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1609 mutex_enter(&so->so_lock); 1610 if (error) { 1611 eprintsoline(so, error); 1612 goto done; 1613 } 1614 1615 error = sowaitokack(so, T_DISCON_REQ); 1616 if (error) { 1617 eprintsoline(so, error); 1618 goto done; 1619 } 1620 /* 1621 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1622 * strsock_proto while the lock was dropped above, the disconnect 1623 * is allowed to complete. However, it is not possible to 1624 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1625 */ 1626 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1627 SOTOTPI(so)->sti_laddr_valid = 0; 1628 SOTOTPI(so)->sti_faddr_valid = 0; 1629 done: 1630 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1631 so_unlock_single(so, SOLOCKED); 1632 mutex_exit(&so->so_lock); 1633 } else { 1634 /* If the caller held the lock don't release it here */ 1635 ASSERT(MUTEX_HELD(&so->so_lock)); 1636 ASSERT(so->so_flag & SOLOCKED); 1637 } 1638 return (error); 1639 } 1640 1641 /* ARGSUSED */ 1642 int 1643 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1644 struct sonode **nsop) 1645 { 1646 struct T_conn_ind *conn_ind; 1647 struct T_conn_res *conn_res; 1648 int error = 0; 1649 mblk_t *mp, *ack_mp; 1650 struct sonode *nso; 1651 vnode_t *nvp; 1652 void *src; 1653 t_uscalar_t srclen; 1654 void *opt; 1655 t_uscalar_t optlen; 1656 t_scalar_t PRIM_type; 1657 t_scalar_t SEQ_number; 1658 size_t sinlen; 1659 sotpi_info_t *sti = SOTOTPI(so); 1660 sotpi_info_t *nsti; 1661 1662 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1663 (void *)so, fflag, (void *)nsop, 1664 pr_state(so->so_state, so->so_mode))); 1665 1666 /* 1667 * Defer single-threading the accepting socket until 1668 * the T_CONN_IND has been received and parsed and the 1669 * new sonode has been opened. 1670 */ 1671 1672 /* Check that we are not already connected */ 1673 if ((so->so_state & SS_ACCEPTCONN) == 0) 1674 goto conn_bad; 1675 again: 1676 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1677 goto e_bad; 1678 1679 ASSERT(mp != NULL); 1680 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1681 1682 /* 1683 * Save SEQ_number for error paths. 1684 */ 1685 SEQ_number = conn_ind->SEQ_number; 1686 1687 srclen = conn_ind->SRC_length; 1688 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1689 if (src == NULL) { 1690 error = EPROTO; 1691 freemsg(mp); 1692 eprintsoline(so, error); 1693 goto disconnect_unlocked; 1694 } 1695 optlen = conn_ind->OPT_length; 1696 switch (so->so_family) { 1697 case AF_INET: 1698 case AF_INET6: 1699 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1700 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1701 &opt, conn_ind->OPT_length); 1702 } else { 1703 /* 1704 * The transport (in this case TCP) hasn't sent up 1705 * a pointer to an instance for the accept fast-path. 1706 * Disable fast-path completely because the call to 1707 * sotpi_create() below would otherwise create an 1708 * incomplete TCP instance, which would lead to 1709 * problems when sockfs sends a normal T_CONN_RES 1710 * message down the new stream. 1711 */ 1712 if (sti->sti_direct) { 1713 int rval; 1714 /* 1715 * For consistency we inform tcp to disable 1716 * direct interface on the listener, though 1717 * we can certainly live without doing this 1718 * because no data will ever travel upstream 1719 * on the listening socket. 1720 */ 1721 sti->sti_direct = 0; 1722 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1723 0, 0, K_TO_K, cr, &rval); 1724 } 1725 opt = NULL; 1726 optlen = 0; 1727 } 1728 break; 1729 case AF_UNIX: 1730 default: 1731 if (optlen != 0) { 1732 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1733 __TPI_ALIGN_SIZE); 1734 if (opt == NULL) { 1735 error = EPROTO; 1736 freemsg(mp); 1737 eprintsoline(so, error); 1738 goto disconnect_unlocked; 1739 } 1740 } 1741 if (so->so_family == AF_UNIX) { 1742 if (!sti->sti_faddr_noxlate) { 1743 src = NULL; 1744 srclen = 0; 1745 } 1746 /* Extract src address from options */ 1747 if (optlen != 0) 1748 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1749 } 1750 break; 1751 } 1752 1753 /* 1754 * Create the new socket. 1755 */ 1756 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1757 if (nso == NULL) { 1758 ASSERT(error != 0); 1759 /* 1760 * Accept can not fail with ENOBUFS. sotpi_create 1761 * sleeps waiting for memory until a signal is caught 1762 * so return EINTR. 1763 */ 1764 freemsg(mp); 1765 if (error == ENOBUFS) 1766 error = EINTR; 1767 goto e_disc_unl; 1768 } 1769 nvp = SOTOV(nso); 1770 nsti = SOTOTPI(nso); 1771 1772 #ifdef DEBUG 1773 /* 1774 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1775 * it's inherited early to allow debugging of the accept code itself. 1776 */ 1777 nso->so_options |= so->so_options & SO_DEBUG; 1778 #endif /* DEBUG */ 1779 1780 /* 1781 * Save the SRC address from the T_CONN_IND 1782 * for getpeername to work on AF_UNIX and on transports that do not 1783 * support TI_GETPEERNAME. 1784 * 1785 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1786 * copyin_name(). 1787 */ 1788 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1789 error = EINVAL; 1790 freemsg(mp); 1791 eprintsoline(so, error); 1792 goto disconnect_vp_unlocked; 1793 } 1794 nsti->sti_faddr_len = (socklen_t)srclen; 1795 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1796 bcopy(src, nsti->sti_faddr_sa, srclen); 1797 nsti->sti_faddr_valid = 1; 1798 1799 /* 1800 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1801 */ 1802 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1803 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1804 cred_t *cr; 1805 pid_t cpid; 1806 1807 cr = msg_getcred(mp, &cpid); 1808 if (cr != NULL) { 1809 crhold(cr); 1810 nso->so_peercred = cr; 1811 nso->so_cpid = cpid; 1812 } 1813 freemsg(mp); 1814 1815 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1816 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1817 if (mp == NULL) { 1818 /* 1819 * Accept can not fail with ENOBUFS. 1820 * A signal was caught so return EINTR. 1821 */ 1822 error = EINTR; 1823 eprintsoline(so, error); 1824 goto disconnect_vp_unlocked; 1825 } 1826 conn_res = (struct T_conn_res *)mp->b_rptr; 1827 } else { 1828 /* 1829 * For efficency reasons we use msg_extractcred; no crhold 1830 * needed since db_credp is cleared (i.e., we move the cred 1831 * from the message to so_peercred. 1832 */ 1833 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1834 1835 mp->b_rptr = DB_BASE(mp); 1836 conn_res = (struct T_conn_res *)mp->b_rptr; 1837 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1838 1839 mblk_setcred(mp, cr, curproc->p_pid); 1840 } 1841 1842 /* 1843 * New socket must be bound at least in sockfs and, except for AF_INET, 1844 * (or AF_INET6) it also has to be bound in the transport provider. 1845 * We set the local address in the sonode from the T_OK_ACK of the 1846 * T_CONN_RES. For this reason the address we bind to here isn't 1847 * important. 1848 */ 1849 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1850 /*CONSTCOND*/ 1851 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1852 /* 1853 * Optimization for AF_INET{,6} transports 1854 * that can handle a T_CONN_RES without being bound. 1855 */ 1856 mutex_enter(&nso->so_lock); 1857 so_automatic_bind(nso); 1858 mutex_exit(&nso->so_lock); 1859 } else { 1860 /* Perform NULL bind with the transport provider. */ 1861 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1862 cr)) != 0) { 1863 ASSERT(error != ENOBUFS); 1864 freemsg(mp); 1865 eprintsoline(nso, error); 1866 goto disconnect_vp_unlocked; 1867 } 1868 } 1869 1870 /* 1871 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1872 * so that any data arriving on the new socket will cause the 1873 * appropriate signals to be delivered for the new socket. 1874 * 1875 * No other thread (except strsock_proto and strsock_misc) 1876 * can access the new socket thus we relax the locking. 1877 */ 1878 nso->so_pgrp = so->so_pgrp; 1879 nso->so_state |= so->so_state & SS_ASYNC; 1880 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1881 1882 if (nso->so_pgrp != 0) { 1883 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1884 eprintsoline(nso, error); 1885 error = 0; 1886 nso->so_pgrp = 0; 1887 } 1888 } 1889 1890 /* 1891 * Make note of the socket level options. TCP and IP level options 1892 * are already inherited. We could do all this after accept is 1893 * successful but doing it here simplifies code and no harm done 1894 * for error case. 1895 */ 1896 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1897 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1898 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1899 nso->so_sndbuf = so->so_sndbuf; 1900 nso->so_rcvbuf = so->so_rcvbuf; 1901 if (nso->so_options & SO_LINGER) 1902 nso->so_linger = so->so_linger; 1903 1904 /* 1905 * Note that the following sti_direct code path should be 1906 * removed once we are confident that the direct sockets 1907 * do not result in any degradation. 1908 */ 1909 if (sti->sti_direct) { 1910 1911 ASSERT(opt != NULL); 1912 1913 conn_res->OPT_length = optlen; 1914 conn_res->OPT_offset = MBLKL(mp); 1915 bcopy(&opt, mp->b_wptr, optlen); 1916 mp->b_wptr += optlen; 1917 conn_res->PRIM_type = T_CONN_RES; 1918 conn_res->ACCEPTOR_id = 0; 1919 PRIM_type = T_CONN_RES; 1920 1921 /* Send down the T_CONN_RES on acceptor STREAM */ 1922 error = kstrputmsg(SOTOV(nso), mp, NULL, 1923 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1924 if (error) { 1925 mutex_enter(&so->so_lock); 1926 so_lock_single(so); 1927 eprintsoline(so, error); 1928 goto disconnect_vp; 1929 } 1930 mutex_enter(&nso->so_lock); 1931 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1932 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1933 if (error) { 1934 mutex_exit(&nso->so_lock); 1935 mutex_enter(&so->so_lock); 1936 so_lock_single(so); 1937 eprintsoline(so, error); 1938 goto disconnect_vp; 1939 } 1940 if (nso->so_family == AF_INET) { 1941 sin_t *sin; 1942 1943 sin = (sin_t *)(ack_mp->b_rptr + 1944 sizeof (struct T_ok_ack)); 1945 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1946 nsti->sti_laddr_len = sizeof (sin_t); 1947 } else { 1948 sin6_t *sin6; 1949 1950 sin6 = (sin6_t *)(ack_mp->b_rptr + 1951 sizeof (struct T_ok_ack)); 1952 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1953 nsti->sti_laddr_len = sizeof (sin6_t); 1954 } 1955 freemsg(ack_mp); 1956 1957 nso->so_state |= SS_ISCONNECTED; 1958 nso->so_proto_handle = (sock_lower_handle_t)opt; 1959 nsti->sti_laddr_valid = 1; 1960 1961 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 1962 /* 1963 * A NL7C marked listen()er so the new socket 1964 * inherits the listen()er's NL7C state, except 1965 * for NL7C_POLLIN. 1966 * 1967 * Only call NL7C to process the new socket if 1968 * the listen socket allows blocking i/o. 1969 */ 1970 nsti->sti_nl7c_flags = 1971 sti->sti_nl7c_flags & (~NL7C_POLLIN); 1972 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1973 /* 1974 * Nonblocking accept() just make it 1975 * persist to defer processing to the 1976 * read-side syscall (e.g. read). 1977 */ 1978 nsti->sti_nl7c_flags |= NL7C_SOPERSIST; 1979 } else if (nl7c_process(nso, B_FALSE)) { 1980 /* 1981 * NL7C has completed processing on the 1982 * socket, close the socket and back to 1983 * the top to await the next T_CONN_IND. 1984 */ 1985 mutex_exit(&nso->so_lock); 1986 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1987 cr, NULL); 1988 VN_RELE(nvp); 1989 goto again; 1990 } 1991 /* Pass the new socket out */ 1992 } 1993 1994 mutex_exit(&nso->so_lock); 1995 1996 /* 1997 * It's possible, through the use of autopush for example, 1998 * that the acceptor stream may not support sti_direct 1999 * semantics. If the new socket does not support sti_direct 2000 * we issue a _SIOCSOCKFALLBACK to inform the transport 2001 * as we would in the I_PUSH case. 2002 */ 2003 if (nsti->sti_direct == 0) { 2004 int rval; 2005 2006 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 2007 0, 0, K_TO_K, cr, &rval)) != 0) { 2008 mutex_enter(&so->so_lock); 2009 so_lock_single(so); 2010 eprintsoline(so, error); 2011 goto disconnect_vp; 2012 } 2013 } 2014 2015 /* 2016 * Pass out new socket. 2017 */ 2018 if (nsop != NULL) 2019 *nsop = nso; 2020 2021 return (0); 2022 } 2023 2024 /* 2025 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 2026 * which don't support the FireEngine accept fast-path. It is also 2027 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 2028 * again. Neither sockfs nor TCP attempt to find out if some other 2029 * random module has been inserted in between (in which case we 2030 * should follow TLI accept behaviour). We blindly assume the worst 2031 * case and revert back to old behaviour i.e. TCP will not send us 2032 * any option (eager) and the accept should happen on the listener 2033 * queue. Any queued T_conn_ind have already got their options removed 2034 * by so_sock2_stream() when "sockmod" was I_POP'd. 2035 */ 2036 /* 2037 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 2038 */ 2039 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 2040 #ifdef _ILP32 2041 queue_t *q; 2042 2043 /* 2044 * Find read queue in driver 2045 * Can safely do this since we "own" nso/nvp. 2046 */ 2047 q = strvp2wq(nvp)->q_next; 2048 while (SAMESTR(q)) 2049 q = q->q_next; 2050 q = RD(q); 2051 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 2052 #else 2053 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 2054 #endif /* _ILP32 */ 2055 conn_res->PRIM_type = O_T_CONN_RES; 2056 PRIM_type = O_T_CONN_RES; 2057 } else { 2058 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 2059 conn_res->PRIM_type = T_CONN_RES; 2060 PRIM_type = T_CONN_RES; 2061 } 2062 conn_res->SEQ_number = SEQ_number; 2063 conn_res->OPT_length = 0; 2064 conn_res->OPT_offset = 0; 2065 2066 mutex_enter(&so->so_lock); 2067 so_lock_single(so); /* Set SOLOCKED */ 2068 mutex_exit(&so->so_lock); 2069 2070 error = kstrputmsg(SOTOV(so), mp, NULL, 2071 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2072 mutex_enter(&so->so_lock); 2073 if (error) { 2074 eprintsoline(so, error); 2075 goto disconnect_vp; 2076 } 2077 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2078 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2079 if (error) { 2080 eprintsoline(so, error); 2081 goto disconnect_vp; 2082 } 2083 mutex_exit(&so->so_lock); 2084 /* 2085 * If there is a sin/sin6 appended onto the T_OK_ACK use 2086 * that to set the local address. If this is not present 2087 * then we zero out the address and don't set the 2088 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2089 * the pathname from the listening socket. 2090 * In the case where this is TCP or an AF_UNIX socket the 2091 * client side may have queued data or a T_ORDREL in the 2092 * transport. Having now sent the T_CONN_RES we may receive 2093 * those queued messages at any time. Hold the acceptor 2094 * so_lock until its state and laddr are finalized. 2095 */ 2096 mutex_enter(&nso->so_lock); 2097 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2098 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2099 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2100 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2101 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2102 nsti->sti_laddr_len = sinlen; 2103 nsti->sti_laddr_valid = 1; 2104 } else if (nso->so_family == AF_UNIX) { 2105 ASSERT(so->so_family == AF_UNIX); 2106 nsti->sti_laddr_len = sti->sti_laddr_len; 2107 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2108 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2109 nsti->sti_laddr_len); 2110 nsti->sti_laddr_valid = 1; 2111 } else { 2112 nsti->sti_laddr_len = sti->sti_laddr_len; 2113 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2114 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2115 nsti->sti_laddr_sa->sa_family = nso->so_family; 2116 } 2117 nso->so_state |= SS_ISCONNECTED; 2118 mutex_exit(&nso->so_lock); 2119 2120 freemsg(ack_mp); 2121 2122 mutex_enter(&so->so_lock); 2123 so_unlock_single(so, SOLOCKED); 2124 mutex_exit(&so->so_lock); 2125 2126 /* 2127 * Pass out new socket. 2128 */ 2129 if (nsop != NULL) 2130 *nsop = nso; 2131 2132 return (0); 2133 2134 2135 eproto_disc_unl: 2136 error = EPROTO; 2137 e_disc_unl: 2138 eprintsoline(so, error); 2139 goto disconnect_unlocked; 2140 2141 pr_disc_vp_unl: 2142 eprintsoline(so, error); 2143 disconnect_vp_unlocked: 2144 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2145 VN_RELE(nvp); 2146 disconnect_unlocked: 2147 (void) sodisconnect(so, SEQ_number, 0); 2148 return (error); 2149 2150 pr_disc_vp: 2151 eprintsoline(so, error); 2152 disconnect_vp: 2153 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2154 so_unlock_single(so, SOLOCKED); 2155 mutex_exit(&so->so_lock); 2156 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2157 VN_RELE(nvp); 2158 return (error); 2159 2160 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2161 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2162 ? EOPNOTSUPP : EINVAL; 2163 e_bad: 2164 eprintsoline(so, error); 2165 return (error); 2166 } 2167 2168 /* 2169 * connect a socket. 2170 * 2171 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2172 * unconnect (by specifying a null address). 2173 */ 2174 int 2175 sotpi_connect(struct sonode *so, 2176 struct sockaddr *name, 2177 socklen_t namelen, 2178 int fflag, 2179 int flags, 2180 struct cred *cr) 2181 { 2182 struct T_conn_req conn_req; 2183 int error = 0; 2184 mblk_t *mp; 2185 void *src; 2186 socklen_t srclen; 2187 void *addr; 2188 socklen_t addrlen; 2189 boolean_t need_unlock; 2190 sotpi_info_t *sti = SOTOTPI(so); 2191 2192 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2193 (void *)so, (void *)name, namelen, fflag, flags, 2194 pr_state(so->so_state, so->so_mode))); 2195 2196 /* 2197 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2198 * avoid sleeping for memory with SOLOCKED held. 2199 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2200 * + sizeof (struct T_opthdr). 2201 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2202 * exceed sti_faddr_maxlen). 2203 */ 2204 mp = soallocproto(sizeof (struct T_conn_req) + 2205 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2206 cr); 2207 if (mp == NULL) { 2208 /* 2209 * Connect can not fail with ENOBUFS. A signal was 2210 * caught so return EINTR. 2211 */ 2212 error = EINTR; 2213 eprintsoline(so, error); 2214 return (error); 2215 } 2216 2217 mutex_enter(&so->so_lock); 2218 /* 2219 * Make sure there is a preallocated T_unbind_req message 2220 * before any binding. This message is allocated when the 2221 * socket is created. Since another thread can consume 2222 * so_unbind_mp by the time we return from so_lock_single(), 2223 * we should check the availability of so_unbind_mp after 2224 * we return from so_lock_single(). 2225 */ 2226 2227 so_lock_single(so); /* Set SOLOCKED */ 2228 need_unlock = B_TRUE; 2229 2230 if (sti->sti_unbind_mp == NULL) { 2231 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2232 /* NOTE: holding so_lock while sleeping */ 2233 sti->sti_unbind_mp = 2234 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2235 if (sti->sti_unbind_mp == NULL) { 2236 error = EINTR; 2237 goto done; 2238 } 2239 } 2240 2241 /* 2242 * Can't have done a listen before connecting. 2243 */ 2244 if (so->so_state & SS_ACCEPTCONN) { 2245 error = EOPNOTSUPP; 2246 goto done; 2247 } 2248 2249 /* 2250 * Must be bound with the transport 2251 */ 2252 if (!(so->so_state & SS_ISBOUND)) { 2253 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2254 /*CONSTCOND*/ 2255 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2256 /* 2257 * Optimization for AF_INET{,6} transports 2258 * that can handle a T_CONN_REQ without being bound. 2259 */ 2260 so_automatic_bind(so); 2261 } else { 2262 error = sotpi_bind(so, NULL, 0, 2263 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2264 if (error) 2265 goto done; 2266 } 2267 ASSERT(so->so_state & SS_ISBOUND); 2268 flags |= _SOCONNECT_DID_BIND; 2269 } 2270 2271 /* 2272 * Handle a connect to a name parameter of type AF_UNSPEC like a 2273 * connect to a null address. This is the portable method to 2274 * unconnect a socket. 2275 */ 2276 if ((namelen >= sizeof (sa_family_t)) && 2277 (name->sa_family == AF_UNSPEC)) { 2278 name = NULL; 2279 namelen = 0; 2280 } 2281 2282 /* 2283 * Check that we are not already connected. 2284 * A connection-oriented socket cannot be reconnected. 2285 * A connected connection-less socket can be 2286 * - connected to a different address by a subsequent connect 2287 * - "unconnected" by a connect to the NULL address 2288 */ 2289 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2290 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2291 if (so->so_mode & SM_CONNREQUIRED) { 2292 /* Connection-oriented socket */ 2293 error = so->so_state & SS_ISCONNECTED ? 2294 EISCONN : EALREADY; 2295 goto done; 2296 } 2297 /* Connection-less socket */ 2298 if (name == NULL) { 2299 /* 2300 * Remove the connected state and clear SO_DGRAM_ERRIND 2301 * since it was set when the socket was connected. 2302 * If this is UDP also send down a T_DISCON_REQ. 2303 */ 2304 int val; 2305 2306 if ((so->so_family == AF_INET || 2307 so->so_family == AF_INET6) && 2308 (so->so_type == SOCK_DGRAM || 2309 so->so_type == SOCK_RAW) && 2310 /*CONSTCOND*/ 2311 !soconnect_tpi_udp) { 2312 /* XXX What about implicitly unbinding here? */ 2313 error = sodisconnect(so, -1, 2314 _SODISCONNECT_LOCK_HELD); 2315 } else { 2316 so->so_state &= 2317 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2318 sti->sti_faddr_valid = 0; 2319 sti->sti_faddr_len = 0; 2320 } 2321 2322 /* Remove SOLOCKED since setsockopt will grab it */ 2323 so_unlock_single(so, SOLOCKED); 2324 mutex_exit(&so->so_lock); 2325 2326 val = 0; 2327 (void) sotpi_setsockopt(so, SOL_SOCKET, 2328 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2329 cr); 2330 2331 mutex_enter(&so->so_lock); 2332 so_lock_single(so); /* Set SOLOCKED */ 2333 goto done; 2334 } 2335 } 2336 ASSERT(so->so_state & SS_ISBOUND); 2337 2338 if (name == NULL || namelen == 0) { 2339 error = EINVAL; 2340 goto done; 2341 } 2342 /* 2343 * Mark the socket if sti_faddr_sa represents the transport level 2344 * address. 2345 */ 2346 if (flags & _SOCONNECT_NOXLATE) { 2347 struct sockaddr_ux *soaddr_ux; 2348 2349 ASSERT(so->so_family == AF_UNIX); 2350 if (namelen != sizeof (struct sockaddr_ux)) { 2351 error = EINVAL; 2352 goto done; 2353 } 2354 soaddr_ux = (struct sockaddr_ux *)name; 2355 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2356 namelen = sizeof (soaddr_ux->sou_addr); 2357 sti->sti_faddr_noxlate = 1; 2358 } 2359 2360 /* 2361 * Length and family checks. 2362 */ 2363 error = so_addr_verify(so, name, namelen); 2364 if (error) 2365 goto bad; 2366 2367 /* 2368 * Save foreign address. Needed for AF_UNIX as well as 2369 * transport providers that do not support TI_GETPEERNAME. 2370 * Also used for cached foreign address for TCP and UDP. 2371 */ 2372 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2373 error = EINVAL; 2374 goto done; 2375 } 2376 sti->sti_faddr_len = (socklen_t)namelen; 2377 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2378 bcopy(name, sti->sti_faddr_sa, namelen); 2379 sti->sti_faddr_valid = 1; 2380 2381 if (so->so_family == AF_UNIX) { 2382 if (sti->sti_faddr_noxlate) { 2383 /* 2384 * Already have a transport internal address. Do not 2385 * pass any (transport internal) source address. 2386 */ 2387 addr = sti->sti_faddr_sa; 2388 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2389 src = NULL; 2390 srclen = 0; 2391 } else { 2392 /* 2393 * Pass the sockaddr_un source address as an option 2394 * and translate the remote address. 2395 * Holding so_lock thus sti_laddr_sa can not change. 2396 */ 2397 src = sti->sti_laddr_sa; 2398 srclen = (t_uscalar_t)sti->sti_laddr_len; 2399 dprintso(so, 1, 2400 ("sotpi_connect UNIX: srclen %d, src %p\n", 2401 srclen, src)); 2402 error = so_ux_addr_xlate(so, 2403 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2404 (flags & _SOCONNECT_XPG4_2), 2405 &addr, &addrlen); 2406 if (error) 2407 goto bad; 2408 } 2409 } else { 2410 addr = sti->sti_faddr_sa; 2411 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2412 src = NULL; 2413 srclen = 0; 2414 } 2415 /* 2416 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2417 * option which asks the transport provider to send T_UDERR_IND 2418 * messages. These T_UDERR_IND messages are used to return connected 2419 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2420 * 2421 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2422 * we send down a T_CONN_REQ. This is needed to let the 2423 * transport assign a local address that is consistent with 2424 * the remote address. Applications depend on a getsockname() 2425 * after a connect() to retrieve the "source" IP address for 2426 * the connected socket. Invalidate the cached local address 2427 * to force getsockname() to enquire of the transport. 2428 */ 2429 if (!(so->so_mode & SM_CONNREQUIRED)) { 2430 /* 2431 * Datagram socket. 2432 */ 2433 int32_t val; 2434 2435 so_unlock_single(so, SOLOCKED); 2436 mutex_exit(&so->so_lock); 2437 2438 val = 1; 2439 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2440 &val, (t_uscalar_t)sizeof (val), cr); 2441 2442 mutex_enter(&so->so_lock); 2443 so_lock_single(so); /* Set SOLOCKED */ 2444 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2445 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2446 soconnect_tpi_udp) { 2447 soisconnected(so); 2448 goto done; 2449 } 2450 /* 2451 * Send down T_CONN_REQ etc. 2452 * Clear fflag to avoid returning EWOULDBLOCK. 2453 */ 2454 fflag = 0; 2455 ASSERT(so->so_family != AF_UNIX); 2456 sti->sti_laddr_valid = 0; 2457 } else if (sti->sti_laddr_len != 0) { 2458 /* 2459 * If the local address or port was "any" then it may be 2460 * changed by the transport as a result of the 2461 * connect. Invalidate the cached version if we have one. 2462 */ 2463 switch (so->so_family) { 2464 case AF_INET: 2465 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2466 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2467 INADDR_ANY || 2468 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2469 sti->sti_laddr_valid = 0; 2470 break; 2471 2472 case AF_INET6: 2473 ASSERT(sti->sti_laddr_len == 2474 (socklen_t)sizeof (sin6_t)); 2475 if (IN6_IS_ADDR_UNSPECIFIED( 2476 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2477 IN6_IS_ADDR_V4MAPPED_ANY( 2478 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2479 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2480 sti->sti_laddr_valid = 0; 2481 break; 2482 2483 default: 2484 break; 2485 } 2486 } 2487 2488 /* 2489 * Check for failure of an earlier call 2490 */ 2491 if (so->so_error != 0) 2492 goto so_bad; 2493 2494 /* 2495 * Send down T_CONN_REQ. Message was allocated above. 2496 */ 2497 conn_req.PRIM_type = T_CONN_REQ; 2498 conn_req.DEST_length = addrlen; 2499 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2500 if (srclen == 0) { 2501 conn_req.OPT_length = 0; 2502 conn_req.OPT_offset = 0; 2503 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2504 soappendmsg(mp, addr, addrlen); 2505 } else { 2506 /* 2507 * There is a AF_UNIX sockaddr_un to include as a source 2508 * address option. 2509 */ 2510 struct T_opthdr toh; 2511 2512 toh.level = SOL_SOCKET; 2513 toh.name = SO_SRCADDR; 2514 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2515 toh.status = 0; 2516 conn_req.OPT_length = 2517 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2518 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2519 _TPI_ALIGN_TOPT(addrlen)); 2520 2521 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2522 soappendmsg(mp, addr, addrlen); 2523 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2524 soappendmsg(mp, &toh, sizeof (toh)); 2525 soappendmsg(mp, src, srclen); 2526 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2527 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2528 } 2529 /* 2530 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2531 * in order to have the right state when the T_CONN_CON shows up. 2532 */ 2533 soisconnecting(so); 2534 mutex_exit(&so->so_lock); 2535 2536 if (AU_AUDITING()) 2537 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2538 2539 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2540 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2541 mp = NULL; 2542 mutex_enter(&so->so_lock); 2543 if (error != 0) 2544 goto bad; 2545 2546 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2547 goto bad; 2548 2549 /* Allow other threads to access the socket */ 2550 so_unlock_single(so, SOLOCKED); 2551 need_unlock = B_FALSE; 2552 2553 /* 2554 * Wait until we get a T_CONN_CON or an error 2555 */ 2556 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2557 so_lock_single(so); /* Set SOLOCKED */ 2558 need_unlock = B_TRUE; 2559 } 2560 2561 done: 2562 freemsg(mp); 2563 switch (error) { 2564 case EINPROGRESS: 2565 case EALREADY: 2566 case EISCONN: 2567 case EINTR: 2568 /* Non-fatal errors */ 2569 sti->sti_laddr_valid = 0; 2570 /* FALLTHRU */ 2571 case 0: 2572 break; 2573 default: 2574 ASSERT(need_unlock); 2575 /* 2576 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2577 * and invalidate local-address cache 2578 */ 2579 so->so_state &= ~SS_ISCONNECTING; 2580 sti->sti_laddr_valid = 0; 2581 /* A discon_ind might have already unbound us */ 2582 if ((flags & _SOCONNECT_DID_BIND) && 2583 (so->so_state & SS_ISBOUND)) { 2584 int err; 2585 2586 err = sotpi_unbind(so, 0); 2587 /* LINTED - statement has no conseq */ 2588 if (err) { 2589 eprintsoline(so, err); 2590 } 2591 } 2592 break; 2593 } 2594 if (need_unlock) 2595 so_unlock_single(so, SOLOCKED); 2596 mutex_exit(&so->so_lock); 2597 return (error); 2598 2599 so_bad: error = sogeterr(so, B_TRUE); 2600 bad: eprintsoline(so, error); 2601 goto done; 2602 } 2603 2604 /* ARGSUSED */ 2605 int 2606 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2607 { 2608 struct T_ordrel_req ordrel_req; 2609 mblk_t *mp; 2610 uint_t old_state, state_change; 2611 int error = 0; 2612 sotpi_info_t *sti = SOTOTPI(so); 2613 2614 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2615 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2616 2617 mutex_enter(&so->so_lock); 2618 so_lock_single(so); /* Set SOLOCKED */ 2619 2620 /* 2621 * SunOS 4.X has no check for datagram sockets. 2622 * 5.X checks that it is connected (ENOTCONN) 2623 * X/Open requires that we check the connected state. 2624 */ 2625 if (!(so->so_state & SS_ISCONNECTED)) { 2626 if (!xnet_skip_checks) { 2627 error = ENOTCONN; 2628 if (xnet_check_print) { 2629 printf("sockfs: X/Open shutdown check " 2630 "caused ENOTCONN\n"); 2631 } 2632 } 2633 goto done; 2634 } 2635 /* 2636 * Record the current state and then perform any state changes. 2637 * Then use the difference between the old and new states to 2638 * determine which messages need to be sent. 2639 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2640 * duplicate calls to shutdown(). 2641 */ 2642 old_state = so->so_state; 2643 2644 switch (how) { 2645 case 0: 2646 socantrcvmore(so); 2647 break; 2648 case 1: 2649 socantsendmore(so); 2650 break; 2651 case 2: 2652 socantsendmore(so); 2653 socantrcvmore(so); 2654 break; 2655 default: 2656 error = EINVAL; 2657 goto done; 2658 } 2659 2660 /* 2661 * Assumes that the SS_CANT* flags are never cleared in the above code. 2662 */ 2663 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2664 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2665 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2666 2667 switch (state_change) { 2668 case 0: 2669 dprintso(so, 1, 2670 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2671 so->so_state)); 2672 goto done; 2673 2674 case SS_CANTRCVMORE: 2675 mutex_exit(&so->so_lock); 2676 strseteof(SOTOV(so), 1); 2677 /* 2678 * strseteof takes care of read side wakeups, 2679 * pollwakeups, and signals. 2680 */ 2681 /* 2682 * Get the read lock before flushing data to avoid problems 2683 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2684 */ 2685 mutex_enter(&so->so_lock); 2686 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2687 mutex_exit(&so->so_lock); 2688 2689 /* Flush read side queue */ 2690 strflushrq(SOTOV(so), FLUSHALL); 2691 2692 mutex_enter(&so->so_lock); 2693 so_unlock_read(so); /* Clear SOREADLOCKED */ 2694 break; 2695 2696 case SS_CANTSENDMORE: 2697 mutex_exit(&so->so_lock); 2698 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2699 mutex_enter(&so->so_lock); 2700 break; 2701 2702 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2703 mutex_exit(&so->so_lock); 2704 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2705 strseteof(SOTOV(so), 1); 2706 /* 2707 * strseteof takes care of read side wakeups, 2708 * pollwakeups, and signals. 2709 */ 2710 /* 2711 * Get the read lock before flushing data to avoid problems 2712 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2713 */ 2714 mutex_enter(&so->so_lock); 2715 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2716 mutex_exit(&so->so_lock); 2717 2718 /* Flush read side queue */ 2719 strflushrq(SOTOV(so), FLUSHALL); 2720 2721 mutex_enter(&so->so_lock); 2722 so_unlock_read(so); /* Clear SOREADLOCKED */ 2723 break; 2724 } 2725 2726 ASSERT(MUTEX_HELD(&so->so_lock)); 2727 2728 /* 2729 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2730 * was set due to this call and the new state has both of them set: 2731 * Send the AF_UNIX close indication 2732 * For T_COTS send a discon_ind 2733 * 2734 * If cantsend was set due to this call: 2735 * For T_COTSORD send an ordrel_ind 2736 * 2737 * Note that for T_CLTS there is no message sent here. 2738 */ 2739 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2740 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2741 /* 2742 * For SunOS 4.X compatibility we tell the other end 2743 * that we are unable to receive at this point. 2744 */ 2745 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2746 so_unix_close(so); 2747 2748 if (sti->sti_serv_type == T_COTS) 2749 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2750 } 2751 if ((state_change & SS_CANTSENDMORE) && 2752 (sti->sti_serv_type == T_COTS_ORD)) { 2753 /* Send an orderly release */ 2754 ordrel_req.PRIM_type = T_ORDREL_REQ; 2755 2756 mutex_exit(&so->so_lock); 2757 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2758 0, _ALLOC_SLEEP, cr); 2759 /* 2760 * Send down the T_ORDREL_REQ even if there is flow control. 2761 * This prevents shutdown from blocking. 2762 * Note that there is no T_OK_ACK for ordrel_req. 2763 */ 2764 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2765 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2766 mutex_enter(&so->so_lock); 2767 if (error) { 2768 eprintsoline(so, error); 2769 goto done; 2770 } 2771 } 2772 2773 done: 2774 so_unlock_single(so, SOLOCKED); 2775 mutex_exit(&so->so_lock); 2776 return (error); 2777 } 2778 2779 /* 2780 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2781 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2782 * that we have closed. 2783 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2784 * T_UNITDATA_REQ containing the same option. 2785 * 2786 * For SOCK_DGRAM half-connections (somebody connected to this end 2787 * but this end is not connect) we don't know where to send any 2788 * SO_UNIX_CLOSE. 2789 * 2790 * We have to ignore stream head errors just in case there has been 2791 * a shutdown(output). 2792 * Ignore any flow control to try to get the message more quickly to the peer. 2793 * While locally ignoring flow control solves the problem when there 2794 * is only the loopback transport on the stream it would not provide 2795 * the correct AF_UNIX socket semantics when one or more modules have 2796 * been pushed. 2797 */ 2798 void 2799 so_unix_close(struct sonode *so) 2800 { 2801 int error; 2802 struct T_opthdr toh; 2803 mblk_t *mp; 2804 sotpi_info_t *sti = SOTOTPI(so); 2805 2806 ASSERT(MUTEX_HELD(&so->so_lock)); 2807 2808 ASSERT(so->so_family == AF_UNIX); 2809 2810 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2811 (SS_ISCONNECTED|SS_ISBOUND)) 2812 return; 2813 2814 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2815 (void *)so, pr_state(so->so_state, so->so_mode))); 2816 2817 toh.level = SOL_SOCKET; 2818 toh.name = SO_UNIX_CLOSE; 2819 2820 /* zero length + header */ 2821 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2822 toh.status = 0; 2823 2824 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2825 struct T_optdata_req tdr; 2826 2827 tdr.PRIM_type = T_OPTDATA_REQ; 2828 tdr.DATA_flag = 0; 2829 2830 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2831 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2832 2833 /* NOTE: holding so_lock while sleeping */ 2834 mp = soallocproto2(&tdr, sizeof (tdr), 2835 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2836 } else { 2837 struct T_unitdata_req tudr; 2838 void *addr; 2839 socklen_t addrlen; 2840 void *src; 2841 socklen_t srclen; 2842 struct T_opthdr toh2; 2843 t_scalar_t size; 2844 2845 /* Connecteded DGRAM socket */ 2846 2847 /* 2848 * For AF_UNIX the destination address is translated to 2849 * an internal name and the source address is passed as 2850 * an option. 2851 */ 2852 /* 2853 * Length and family checks. 2854 */ 2855 error = so_addr_verify(so, sti->sti_faddr_sa, 2856 (t_uscalar_t)sti->sti_faddr_len); 2857 if (error) { 2858 eprintsoline(so, error); 2859 return; 2860 } 2861 if (sti->sti_faddr_noxlate) { 2862 /* 2863 * Already have a transport internal address. Do not 2864 * pass any (transport internal) source address. 2865 */ 2866 addr = sti->sti_faddr_sa; 2867 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2868 src = NULL; 2869 srclen = 0; 2870 } else { 2871 /* 2872 * Pass the sockaddr_un source address as an option 2873 * and translate the remote address. 2874 * Holding so_lock thus sti_laddr_sa can not change. 2875 */ 2876 src = sti->sti_laddr_sa; 2877 srclen = (socklen_t)sti->sti_laddr_len; 2878 dprintso(so, 1, 2879 ("so_ux_close: srclen %d, src %p\n", 2880 srclen, src)); 2881 error = so_ux_addr_xlate(so, 2882 sti->sti_faddr_sa, 2883 (socklen_t)sti->sti_faddr_len, 0, 2884 &addr, &addrlen); 2885 if (error) { 2886 eprintsoline(so, error); 2887 return; 2888 } 2889 } 2890 tudr.PRIM_type = T_UNITDATA_REQ; 2891 tudr.DEST_length = addrlen; 2892 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2893 if (srclen == 0) { 2894 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2895 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2896 _TPI_ALIGN_TOPT(addrlen)); 2897 2898 size = tudr.OPT_offset + tudr.OPT_length; 2899 /* NOTE: holding so_lock while sleeping */ 2900 mp = soallocproto2(&tudr, sizeof (tudr), 2901 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2902 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2903 soappendmsg(mp, &toh, sizeof (toh)); 2904 } else { 2905 /* 2906 * There is a AF_UNIX sockaddr_un to include as a 2907 * source address option. 2908 */ 2909 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2910 _TPI_ALIGN_TOPT(srclen)); 2911 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2912 _TPI_ALIGN_TOPT(addrlen)); 2913 2914 toh2.level = SOL_SOCKET; 2915 toh2.name = SO_SRCADDR; 2916 toh2.len = (t_uscalar_t)(srclen + 2917 sizeof (struct T_opthdr)); 2918 toh2.status = 0; 2919 2920 size = tudr.OPT_offset + tudr.OPT_length; 2921 2922 /* NOTE: holding so_lock while sleeping */ 2923 mp = soallocproto2(&tudr, sizeof (tudr), 2924 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2925 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2926 soappendmsg(mp, &toh, sizeof (toh)); 2927 soappendmsg(mp, &toh2, sizeof (toh2)); 2928 soappendmsg(mp, src, srclen); 2929 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2930 } 2931 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2932 } 2933 mutex_exit(&so->so_lock); 2934 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2935 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2936 mutex_enter(&so->so_lock); 2937 } 2938 2939 /* 2940 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2941 * In addition, the caller typically verifies that there is some 2942 * potential state to clear by checking 2943 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2944 * before calling this routine. 2945 * Note that such a check can be made without holding so_lock since 2946 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2947 * decrements sti_oobsigcnt. 2948 * 2949 * When data is read *after* the point that all pending 2950 * oob data has been consumed the oob indication is cleared. 2951 * 2952 * This logic keeps select/poll returning POLLRDBAND and 2953 * SIOCATMARK returning true until we have read past 2954 * the mark. 2955 */ 2956 static void 2957 sorecv_update_oobstate(struct sonode *so) 2958 { 2959 sotpi_info_t *sti = SOTOTPI(so); 2960 2961 mutex_enter(&so->so_lock); 2962 ASSERT(so_verify_oobstate(so)); 2963 dprintso(so, 1, 2964 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2965 sti->sti_oobsigcnt, 2966 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2967 if (sti->sti_oobsigcnt == 0) { 2968 /* No more pending oob indications */ 2969 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2970 freemsg(so->so_oobmsg); 2971 so->so_oobmsg = NULL; 2972 } 2973 ASSERT(so_verify_oobstate(so)); 2974 mutex_exit(&so->so_lock); 2975 } 2976 2977 /* 2978 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2979 */ 2980 static int 2981 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2982 { 2983 sotpi_info_t *sti = SOTOTPI(so); 2984 int error = 0; 2985 mblk_t *tmp = NULL; 2986 mblk_t *pmp = NULL; 2987 mblk_t *nmp = sti->sti_nl7c_rcv_mp; 2988 2989 ASSERT(nmp != NULL); 2990 2991 while (nmp != NULL && uiop->uio_resid > 0) { 2992 ssize_t n; 2993 2994 if (DB_TYPE(nmp) == M_DATA) { 2995 /* 2996 * We have some data, uiomove up to resid bytes. 2997 */ 2998 n = MIN(MBLKL(nmp), uiop->uio_resid); 2999 if (n > 0) 3000 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 3001 nmp->b_rptr += n; 3002 if (nmp->b_rptr == nmp->b_wptr) { 3003 pmp = nmp; 3004 nmp = nmp->b_cont; 3005 } 3006 if (error) 3007 break; 3008 } else { 3009 /* 3010 * We only handle data, save for caller to handle. 3011 */ 3012 if (pmp != NULL) { 3013 pmp->b_cont = nmp->b_cont; 3014 } 3015 nmp->b_cont = NULL; 3016 if (*rmp == NULL) { 3017 *rmp = nmp; 3018 } else { 3019 tmp->b_cont = nmp; 3020 } 3021 nmp = nmp->b_cont; 3022 tmp = nmp; 3023 } 3024 } 3025 if (pmp != NULL) { 3026 /* Free any mblk_t(s) which we have consumed */ 3027 pmp->b_cont = NULL; 3028 freemsg(sti->sti_nl7c_rcv_mp); 3029 } 3030 if ((sti->sti_nl7c_rcv_mp = nmp) == NULL) { 3031 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 3032 if (error == 0) { 3033 rval_t *p = (rval_t *)&sti->sti_nl7c_rcv_rval; 3034 3035 error = p->r_v.r_v2; 3036 p->r_v.r_v2 = 0; 3037 } 3038 rp->r_vals = sti->sti_nl7c_rcv_rval; 3039 sti->sti_nl7c_rcv_rval = 0; 3040 } else { 3041 /* More mblk_t(s) to process so no rval to return */ 3042 rp->r_vals = 0; 3043 } 3044 return (error); 3045 } 3046 /* 3047 * Receive the next message on the queue. 3048 * If msg_controllen is non-zero when called the caller is interested in 3049 * any received control info (options). 3050 * If msg_namelen is non-zero when called the caller is interested in 3051 * any received source address. 3052 * The routine returns with msg_control and msg_name pointing to 3053 * kmem_alloc'ed memory which the caller has to free. 3054 */ 3055 /* ARGSUSED */ 3056 int 3057 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 3058 struct cred *cr) 3059 { 3060 union T_primitives *tpr; 3061 mblk_t *mp; 3062 uchar_t pri; 3063 int pflag, opflag; 3064 void *control; 3065 t_uscalar_t controllen; 3066 t_uscalar_t namelen; 3067 int so_state = so->so_state; /* Snapshot */ 3068 ssize_t saved_resid; 3069 rval_t rval; 3070 int flags; 3071 clock_t timout; 3072 int error = 0; 3073 sotpi_info_t *sti = SOTOTPI(so); 3074 3075 flags = msg->msg_flags; 3076 msg->msg_flags = 0; 3077 3078 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 3079 (void *)so, (void *)msg, flags, 3080 pr_state(so->so_state, so->so_mode), so->so_error)); 3081 3082 if (so->so_version == SOV_STREAM) { 3083 so_update_attrs(so, SOACC); 3084 /* The imaginary "sockmod" has been popped - act as a stream */ 3085 return (strread(SOTOV(so), uiop, cr)); 3086 } 3087 3088 /* 3089 * If we are not connected because we have never been connected 3090 * we return ENOTCONN. If we have been connected (but are no longer 3091 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 3092 * the EOF. 3093 * 3094 * An alternative would be to post an ENOTCONN error in stream head 3095 * (read+write) and clear it when we're connected. However, that error 3096 * would cause incorrect poll/select behavior! 3097 */ 3098 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 3099 (so->so_mode & SM_CONNREQUIRED)) { 3100 return (ENOTCONN); 3101 } 3102 3103 /* 3104 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 3105 * after checking that the read queue is empty) and returns zero. 3106 * This implementation will sleep (in kstrgetmsg) even if uio_resid 3107 * is zero. 3108 */ 3109 3110 if (flags & MSG_OOB) { 3111 /* Check that the transport supports OOB */ 3112 if (!(so->so_mode & SM_EXDATA)) 3113 return (EOPNOTSUPP); 3114 so_update_attrs(so, SOACC); 3115 return (sorecvoob(so, msg, uiop, flags, 3116 (so->so_options & SO_OOBINLINE))); 3117 } 3118 3119 so_update_attrs(so, SOACC); 3120 3121 /* 3122 * Set msg_controllen and msg_namelen to zero here to make it 3123 * simpler in the cases that no control or name is returned. 3124 */ 3125 controllen = msg->msg_controllen; 3126 namelen = msg->msg_namelen; 3127 msg->msg_controllen = 0; 3128 msg->msg_namelen = 0; 3129 3130 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 3131 namelen, controllen)); 3132 3133 mutex_enter(&so->so_lock); 3134 /* 3135 * If an NL7C enabled socket and not waiting for write data. 3136 */ 3137 if ((sti->sti_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 3138 NL7C_ENABLED) { 3139 if (sti->sti_nl7c_uri) { 3140 /* Close uri processing for a previous request */ 3141 nl7c_close(so); 3142 } 3143 if ((so_state & SS_CANTRCVMORE) && 3144 sti->sti_nl7c_rcv_mp == NULL) { 3145 /* Nothing to process, EOF */ 3146 mutex_exit(&so->so_lock); 3147 return (0); 3148 } else if (sti->sti_nl7c_flags & NL7C_SOPERSIST) { 3149 /* Persistent NL7C socket, try to process request */ 3150 boolean_t ret; 3151 3152 ret = nl7c_process(so, 3153 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3154 rval.r_vals = sti->sti_nl7c_rcv_rval; 3155 error = rval.r_v.r_v2; 3156 if (error) { 3157 /* Error of some sort, return it */ 3158 mutex_exit(&so->so_lock); 3159 return (error); 3160 } 3161 if (sti->sti_nl7c_flags && 3162 ! (sti->sti_nl7c_flags & NL7C_WAITWRITE)) { 3163 /* 3164 * Still an NL7C socket and no data 3165 * to pass up to the caller. 3166 */ 3167 mutex_exit(&so->so_lock); 3168 if (ret) { 3169 /* EOF */ 3170 return (0); 3171 } else { 3172 /* Need more data */ 3173 return (EAGAIN); 3174 } 3175 } 3176 } else { 3177 /* 3178 * Not persistent so no further NL7C processing. 3179 */ 3180 sti->sti_nl7c_flags = 0; 3181 } 3182 } 3183 /* 3184 * Only one reader is allowed at any given time. This is needed 3185 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3186 * 3187 * This is slightly different that BSD behavior in that it fails with 3188 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3189 * is single-threaded using sblock(), which is dropped while waiting 3190 * for data to appear. The difference shows up e.g. if one 3191 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3192 * does use nonblocking io and different threads are reading each 3193 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3194 * in this case as long as the read queue doesn't get empty. 3195 * In this implementation the thread using nonblocking io can 3196 * get an EWOULDBLOCK error due to the blocking thread executing 3197 * e.g. in the uiomove in kstrgetmsg. 3198 * This difference is not believed to be significant. 3199 */ 3200 /* Set SOREADLOCKED */ 3201 error = so_lock_read_intr(so, 3202 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3203 mutex_exit(&so->so_lock); 3204 if (error) 3205 return (error); 3206 3207 /* 3208 * Tell kstrgetmsg to not inspect the stream head errors until all 3209 * queued data has been consumed. 3210 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3211 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3212 * 3213 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3214 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3215 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3216 */ 3217 pflag = MSG_ANY | MSG_DELAYERROR; 3218 if (flags & MSG_PEEK) { 3219 pflag |= MSG_IPEEK; 3220 flags &= ~MSG_WAITALL; 3221 } 3222 if (so->so_mode & SM_ATOMIC) 3223 pflag |= MSG_DISCARDTAIL; 3224 3225 if (flags & MSG_DONTWAIT) 3226 timout = 0; 3227 else if (so->so_rcvtimeo != 0) 3228 timout = TICK_TO_MSEC(so->so_rcvtimeo); 3229 else 3230 timout = -1; 3231 opflag = pflag; 3232 retry: 3233 saved_resid = uiop->uio_resid; 3234 pri = 0; 3235 mp = NULL; 3236 if (sti->sti_nl7c_rcv_mp != NULL) { 3237 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3238 error = nl7c_sorecv(so, &mp, uiop, &rval); 3239 } else { 3240 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3241 timout, &rval); 3242 } 3243 if (error != 0) { 3244 /* kstrgetmsg returns ETIME when timeout expires */ 3245 if (error == ETIME) 3246 error = EWOULDBLOCK; 3247 goto out; 3248 } 3249 /* 3250 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3251 * For non-datagrams MOREDATA is used to set MSG_EOR. 3252 */ 3253 ASSERT(!(rval.r_val1 & MORECTL)); 3254 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3255 msg->msg_flags |= MSG_TRUNC; 3256 3257 if (mp == NULL) { 3258 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3259 /* 3260 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3261 * The draft Posix socket spec states that the mark should 3262 * not be cleared when peeking. We follow the latter. 3263 */ 3264 if ((so->so_state & 3265 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3266 (uiop->uio_resid != saved_resid) && 3267 !(flags & MSG_PEEK)) { 3268 sorecv_update_oobstate(so); 3269 } 3270 3271 mutex_enter(&so->so_lock); 3272 /* Set MSG_EOR based on MOREDATA */ 3273 if (!(rval.r_val1 & MOREDATA)) { 3274 if (so->so_state & SS_SAVEDEOR) { 3275 msg->msg_flags |= MSG_EOR; 3276 so->so_state &= ~SS_SAVEDEOR; 3277 } 3278 } 3279 /* 3280 * If some data was received (i.e. not EOF) and the 3281 * read/recv* has not been satisfied wait for some more. 3282 */ 3283 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3284 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3285 mutex_exit(&so->so_lock); 3286 pflag = opflag | MSG_NOMARK; 3287 goto retry; 3288 } 3289 goto out_locked; 3290 } 3291 3292 /* strsock_proto has already verified length and alignment */ 3293 tpr = (union T_primitives *)mp->b_rptr; 3294 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3295 3296 switch (tpr->type) { 3297 case T_DATA_IND: { 3298 if ((so->so_state & 3299 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3300 (uiop->uio_resid != saved_resid) && 3301 !(flags & MSG_PEEK)) { 3302 sorecv_update_oobstate(so); 3303 } 3304 3305 /* 3306 * Set msg_flags to MSG_EOR based on 3307 * MORE_flag and MOREDATA. 3308 */ 3309 mutex_enter(&so->so_lock); 3310 so->so_state &= ~SS_SAVEDEOR; 3311 if (!(tpr->data_ind.MORE_flag & 1)) { 3312 if (!(rval.r_val1 & MOREDATA)) 3313 msg->msg_flags |= MSG_EOR; 3314 else 3315 so->so_state |= SS_SAVEDEOR; 3316 } 3317 freemsg(mp); 3318 /* 3319 * If some data was received (i.e. not EOF) and the 3320 * read/recv* has not been satisfied wait for some more. 3321 */ 3322 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3323 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3324 mutex_exit(&so->so_lock); 3325 pflag = opflag | MSG_NOMARK; 3326 goto retry; 3327 } 3328 goto out_locked; 3329 } 3330 case T_UNITDATA_IND: { 3331 void *addr; 3332 t_uscalar_t addrlen; 3333 void *abuf; 3334 t_uscalar_t optlen; 3335 void *opt; 3336 3337 if ((so->so_state & 3338 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3339 (uiop->uio_resid != saved_resid) && 3340 !(flags & MSG_PEEK)) { 3341 sorecv_update_oobstate(so); 3342 } 3343 3344 if (namelen != 0) { 3345 /* Caller wants source address */ 3346 addrlen = tpr->unitdata_ind.SRC_length; 3347 addr = sogetoff(mp, 3348 tpr->unitdata_ind.SRC_offset, 3349 addrlen, 1); 3350 if (addr == NULL) { 3351 freemsg(mp); 3352 error = EPROTO; 3353 eprintsoline(so, error); 3354 goto out; 3355 } 3356 if (so->so_family == AF_UNIX) { 3357 /* 3358 * Can not use the transport level address. 3359 * If there is a SO_SRCADDR option carrying 3360 * the socket level address it will be 3361 * extracted below. 3362 */ 3363 addr = NULL; 3364 addrlen = 0; 3365 } 3366 } 3367 optlen = tpr->unitdata_ind.OPT_length; 3368 if (optlen != 0) { 3369 t_uscalar_t ncontrollen; 3370 3371 /* 3372 * Extract any source address option. 3373 * Determine how large cmsg buffer is needed. 3374 */ 3375 opt = sogetoff(mp, 3376 tpr->unitdata_ind.OPT_offset, 3377 optlen, __TPI_ALIGN_SIZE); 3378 3379 if (opt == NULL) { 3380 freemsg(mp); 3381 error = EPROTO; 3382 eprintsoline(so, error); 3383 goto out; 3384 } 3385 if (so->so_family == AF_UNIX) 3386 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3387 ncontrollen = so_cmsglen(mp, opt, optlen, 3388 !(flags & MSG_XPG4_2)); 3389 if (controllen != 0) 3390 controllen = ncontrollen; 3391 else if (ncontrollen != 0) 3392 msg->msg_flags |= MSG_CTRUNC; 3393 } else { 3394 controllen = 0; 3395 } 3396 3397 if (namelen != 0) { 3398 /* 3399 * Return address to caller. 3400 * Caller handles truncation if length 3401 * exceeds msg_namelen. 3402 * NOTE: AF_UNIX NUL termination is ensured by 3403 * the sender's copyin_name(). 3404 */ 3405 abuf = kmem_alloc(addrlen, KM_SLEEP); 3406 3407 bcopy(addr, abuf, addrlen); 3408 msg->msg_name = abuf; 3409 msg->msg_namelen = addrlen; 3410 } 3411 3412 if (controllen != 0) { 3413 /* 3414 * Return control msg to caller. 3415 * Caller handles truncation if length 3416 * exceeds msg_controllen. 3417 */ 3418 control = kmem_zalloc(controllen, KM_SLEEP); 3419 3420 error = so_opt2cmsg(mp, opt, optlen, 3421 !(flags & MSG_XPG4_2), 3422 control, controllen); 3423 if (error) { 3424 freemsg(mp); 3425 if (msg->msg_namelen != 0) 3426 kmem_free(msg->msg_name, 3427 msg->msg_namelen); 3428 kmem_free(control, controllen); 3429 eprintsoline(so, error); 3430 goto out; 3431 } 3432 msg->msg_control = control; 3433 msg->msg_controllen = controllen; 3434 } 3435 3436 freemsg(mp); 3437 goto out; 3438 } 3439 case T_OPTDATA_IND: { 3440 struct T_optdata_req *tdr; 3441 void *opt; 3442 t_uscalar_t optlen; 3443 3444 if ((so->so_state & 3445 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3446 (uiop->uio_resid != saved_resid) && 3447 !(flags & MSG_PEEK)) { 3448 sorecv_update_oobstate(so); 3449 } 3450 3451 tdr = (struct T_optdata_req *)mp->b_rptr; 3452 optlen = tdr->OPT_length; 3453 if (optlen != 0) { 3454 t_uscalar_t ncontrollen; 3455 /* 3456 * Determine how large cmsg buffer is needed. 3457 */ 3458 opt = sogetoff(mp, 3459 tpr->optdata_ind.OPT_offset, 3460 optlen, __TPI_ALIGN_SIZE); 3461 3462 if (opt == NULL) { 3463 freemsg(mp); 3464 error = EPROTO; 3465 eprintsoline(so, error); 3466 goto out; 3467 } 3468 3469 ncontrollen = so_cmsglen(mp, opt, optlen, 3470 !(flags & MSG_XPG4_2)); 3471 if (controllen != 0) 3472 controllen = ncontrollen; 3473 else if (ncontrollen != 0) 3474 msg->msg_flags |= MSG_CTRUNC; 3475 } else { 3476 controllen = 0; 3477 } 3478 3479 if (controllen != 0) { 3480 /* 3481 * Return control msg to caller. 3482 * Caller handles truncation if length 3483 * exceeds msg_controllen. 3484 */ 3485 control = kmem_zalloc(controllen, KM_SLEEP); 3486 3487 error = so_opt2cmsg(mp, opt, optlen, 3488 !(flags & MSG_XPG4_2), 3489 control, controllen); 3490 if (error) { 3491 freemsg(mp); 3492 kmem_free(control, controllen); 3493 eprintsoline(so, error); 3494 goto out; 3495 } 3496 msg->msg_control = control; 3497 msg->msg_controllen = controllen; 3498 } 3499 3500 /* 3501 * Set msg_flags to MSG_EOR based on 3502 * DATA_flag and MOREDATA. 3503 */ 3504 mutex_enter(&so->so_lock); 3505 so->so_state &= ~SS_SAVEDEOR; 3506 if (!(tpr->data_ind.MORE_flag & 1)) { 3507 if (!(rval.r_val1 & MOREDATA)) 3508 msg->msg_flags |= MSG_EOR; 3509 else 3510 so->so_state |= SS_SAVEDEOR; 3511 } 3512 freemsg(mp); 3513 /* 3514 * If some data was received (i.e. not EOF) and the 3515 * read/recv* has not been satisfied wait for some more. 3516 * Not possible to wait if control info was received. 3517 */ 3518 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3519 controllen == 0 && 3520 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3521 mutex_exit(&so->so_lock); 3522 pflag = opflag | MSG_NOMARK; 3523 goto retry; 3524 } 3525 goto out_locked; 3526 } 3527 case T_EXDATA_IND: { 3528 dprintso(so, 1, 3529 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3530 "state %s\n", 3531 sti->sti_oobsigcnt, sti->sti_oobcnt, 3532 saved_resid - uiop->uio_resid, 3533 pr_state(so->so_state, so->so_mode))); 3534 /* 3535 * kstrgetmsg handles MSGMARK so there is nothing to 3536 * inspect in the T_EXDATA_IND. 3537 * strsock_proto makes the stream head queue the T_EXDATA_IND 3538 * as a separate message with no M_DATA component. Furthermore, 3539 * the stream head does not consolidate M_DATA messages onto 3540 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3541 * remains a message by itself. This is needed since MSGMARK 3542 * marks both the whole message as well as the last byte 3543 * of the message. 3544 */ 3545 freemsg(mp); 3546 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3547 if (flags & MSG_PEEK) { 3548 /* 3549 * Even though we are peeking we consume the 3550 * T_EXDATA_IND thereby moving the mark information 3551 * to SS_RCVATMARK. Then the oob code below will 3552 * retry the peeking kstrgetmsg. 3553 * Note that the stream head read queue is 3554 * never flushed without holding SOREADLOCKED 3555 * thus the T_EXDATA_IND can not disappear 3556 * underneath us. 3557 */ 3558 dprintso(so, 1, 3559 ("sotpi_recvmsg: consume EXDATA_IND " 3560 "counts %d/%d state %s\n", 3561 sti->sti_oobsigcnt, 3562 sti->sti_oobcnt, 3563 pr_state(so->so_state, so->so_mode))); 3564 3565 pflag = MSG_ANY | MSG_DELAYERROR; 3566 if (so->so_mode & SM_ATOMIC) 3567 pflag |= MSG_DISCARDTAIL; 3568 3569 pri = 0; 3570 mp = NULL; 3571 3572 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3573 &pri, &pflag, (clock_t)-1, &rval); 3574 ASSERT(uiop->uio_resid == saved_resid); 3575 3576 if (error) { 3577 #ifdef SOCK_DEBUG 3578 if (error != EWOULDBLOCK && error != EINTR) { 3579 eprintsoline(so, error); 3580 } 3581 #endif /* SOCK_DEBUG */ 3582 goto out; 3583 } 3584 ASSERT(mp); 3585 tpr = (union T_primitives *)mp->b_rptr; 3586 ASSERT(tpr->type == T_EXDATA_IND); 3587 freemsg(mp); 3588 } /* end "if (flags & MSG_PEEK)" */ 3589 3590 /* 3591 * Decrement the number of queued and pending oob. 3592 * 3593 * SS_RCVATMARK is cleared when we read past a mark. 3594 * SS_HAVEOOBDATA is cleared when we've read past the 3595 * last mark. 3596 * SS_OOBPEND is cleared if we've read past the last 3597 * mark and no (new) SIGURG has been posted. 3598 */ 3599 mutex_enter(&so->so_lock); 3600 ASSERT(so_verify_oobstate(so)); 3601 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3602 ASSERT(sti->sti_oobsigcnt > 0); 3603 sti->sti_oobsigcnt--; 3604 ASSERT(sti->sti_oobcnt > 0); 3605 sti->sti_oobcnt--; 3606 /* 3607 * Since the T_EXDATA_IND has been removed from the stream 3608 * head, but we have not read data past the mark, 3609 * sockfs needs to track that the socket is still at the mark. 3610 * 3611 * Since no data was received call kstrgetmsg again to wait 3612 * for data. 3613 */ 3614 so->so_state |= SS_RCVATMARK; 3615 mutex_exit(&so->so_lock); 3616 dprintso(so, 1, 3617 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3618 sti->sti_oobsigcnt, sti->sti_oobcnt, 3619 pr_state(so->so_state, so->so_mode))); 3620 pflag = opflag; 3621 goto retry; 3622 } 3623 default: 3624 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3625 (void *)so, tpr->type, (void *)mp); 3626 ASSERT(0); 3627 freemsg(mp); 3628 error = EPROTO; 3629 eprintsoline(so, error); 3630 goto out; 3631 } 3632 /* NOTREACHED */ 3633 out: 3634 mutex_enter(&so->so_lock); 3635 out_locked: 3636 so_unlock_read(so); /* Clear SOREADLOCKED */ 3637 mutex_exit(&so->so_lock); 3638 return (error); 3639 } 3640 3641 /* 3642 * Sending data with options on a datagram socket. 3643 * Assumes caller has verified that SS_ISBOUND etc. are set. 3644 */ 3645 static int 3646 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3647 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3648 { 3649 struct T_unitdata_req tudr; 3650 mblk_t *mp; 3651 int error; 3652 void *addr; 3653 socklen_t addrlen; 3654 void *src; 3655 socklen_t srclen; 3656 ssize_t len; 3657 int size; 3658 struct T_opthdr toh; 3659 struct fdbuf *fdbuf; 3660 t_uscalar_t optlen; 3661 void *fds; 3662 int fdlen; 3663 sotpi_info_t *sti = SOTOTPI(so); 3664 3665 ASSERT(name && namelen); 3666 ASSERT(control && controllen); 3667 3668 len = uiop->uio_resid; 3669 if (len > (ssize_t)sti->sti_tidu_size) { 3670 return (EMSGSIZE); 3671 } 3672 3673 /* 3674 * For AF_UNIX the destination address is translated to an internal 3675 * name and the source address is passed as an option. 3676 * Also, file descriptors are passed as file pointers in an 3677 * option. 3678 */ 3679 3680 /* 3681 * Length and family checks. 3682 */ 3683 error = so_addr_verify(so, name, namelen); 3684 if (error) { 3685 eprintsoline(so, error); 3686 return (error); 3687 } 3688 if (so->so_family == AF_UNIX) { 3689 if (sti->sti_faddr_noxlate) { 3690 /* 3691 * Already have a transport internal address. Do not 3692 * pass any (transport internal) source address. 3693 */ 3694 addr = name; 3695 addrlen = namelen; 3696 src = NULL; 3697 srclen = 0; 3698 } else { 3699 /* 3700 * Pass the sockaddr_un source address as an option 3701 * and translate the remote address. 3702 * 3703 * Note that this code does not prevent sti_laddr_sa 3704 * from changing while it is being used. Thus 3705 * if an unbind+bind occurs concurrently with this 3706 * send the peer might see a partially new and a 3707 * partially old "from" address. 3708 */ 3709 src = sti->sti_laddr_sa; 3710 srclen = (t_uscalar_t)sti->sti_laddr_len; 3711 dprintso(so, 1, 3712 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3713 srclen, src)); 3714 error = so_ux_addr_xlate(so, name, namelen, 3715 (flags & MSG_XPG4_2), 3716 &addr, &addrlen); 3717 if (error) { 3718 eprintsoline(so, error); 3719 return (error); 3720 } 3721 } 3722 } else { 3723 addr = name; 3724 addrlen = namelen; 3725 src = NULL; 3726 srclen = 0; 3727 } 3728 optlen = so_optlen(control, controllen, 3729 !(flags & MSG_XPG4_2)); 3730 tudr.PRIM_type = T_UNITDATA_REQ; 3731 tudr.DEST_length = addrlen; 3732 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3733 if (srclen != 0) 3734 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3735 _TPI_ALIGN_TOPT(srclen)); 3736 else 3737 tudr.OPT_length = optlen; 3738 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3739 _TPI_ALIGN_TOPT(addrlen)); 3740 3741 size = tudr.OPT_offset + tudr.OPT_length; 3742 3743 /* 3744 * File descriptors only when SM_FDPASSING set. 3745 */ 3746 error = so_getfdopt(control, controllen, 3747 !(flags & MSG_XPG4_2), &fds, &fdlen); 3748 if (error) 3749 return (error); 3750 if (fdlen != -1) { 3751 if (!(so->so_mode & SM_FDPASSING)) 3752 return (EOPNOTSUPP); 3753 3754 error = fdbuf_create(fds, fdlen, &fdbuf); 3755 if (error) 3756 return (error); 3757 3758 /* 3759 * Pre-allocate enough additional space for lower level modules 3760 * to append an option (e.g. see tl_unitdata). The following 3761 * is enough extra space for the largest option we might append. 3762 */ 3763 size += sizeof (struct T_opthdr) + ucredsize; 3764 mp = fdbuf_allocmsg(size, fdbuf); 3765 } else { 3766 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3767 if (mp == NULL) { 3768 /* 3769 * Caught a signal waiting for memory. 3770 * Let send* return EINTR. 3771 */ 3772 return (EINTR); 3773 } 3774 } 3775 soappendmsg(mp, &tudr, sizeof (tudr)); 3776 soappendmsg(mp, addr, addrlen); 3777 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3778 3779 if (fdlen != -1) { 3780 ASSERT(fdbuf != NULL); 3781 toh.level = SOL_SOCKET; 3782 toh.name = SO_FILEP; 3783 toh.len = fdbuf->fd_size + 3784 (t_uscalar_t)sizeof (struct T_opthdr); 3785 toh.status = 0; 3786 soappendmsg(mp, &toh, sizeof (toh)); 3787 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3788 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3789 } 3790 if (srclen != 0) { 3791 /* 3792 * There is a AF_UNIX sockaddr_un to include as a source 3793 * address option. 3794 */ 3795 toh.level = SOL_SOCKET; 3796 toh.name = SO_SRCADDR; 3797 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3798 toh.status = 0; 3799 soappendmsg(mp, &toh, sizeof (toh)); 3800 soappendmsg(mp, src, srclen); 3801 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3802 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3803 } 3804 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3805 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3806 /* 3807 * Normally at most 3 bytes left in the message, but we might have 3808 * allowed for extra space if we're passing fd's through. 3809 */ 3810 ASSERT(MBLKL(mp) <= (ssize_t)size); 3811 3812 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3813 if (AU_AUDITING()) 3814 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3815 3816 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3817 #ifdef SOCK_DEBUG 3818 if (error) { 3819 eprintsoline(so, error); 3820 } 3821 #endif /* SOCK_DEBUG */ 3822 return (error); 3823 } 3824 3825 /* 3826 * Sending data with options on a connected stream socket. 3827 * Assumes caller has verified that SS_ISCONNECTED is set. 3828 */ 3829 static int 3830 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3831 t_uscalar_t controllen, int flags) 3832 { 3833 struct T_optdata_req tdr; 3834 mblk_t *mp; 3835 int error; 3836 ssize_t iosize; 3837 int size; 3838 struct fdbuf *fdbuf; 3839 t_uscalar_t optlen; 3840 void *fds; 3841 int fdlen; 3842 struct T_opthdr toh; 3843 sotpi_info_t *sti = SOTOTPI(so); 3844 3845 dprintso(so, 1, 3846 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3847 3848 /* 3849 * Has to be bound and connected. However, since no locks are 3850 * held the state could have changed after sotpi_sendmsg checked it 3851 * thus it is not possible to ASSERT on the state. 3852 */ 3853 3854 /* Options on connection-oriented only when SM_OPTDATA set. */ 3855 if (!(so->so_mode & SM_OPTDATA)) 3856 return (EOPNOTSUPP); 3857 3858 do { 3859 /* 3860 * Set the MORE flag if uio_resid does not fit in this 3861 * message or if the caller passed in "more". 3862 * Error for transports with zero tidu_size. 3863 */ 3864 tdr.PRIM_type = T_OPTDATA_REQ; 3865 iosize = sti->sti_tidu_size; 3866 if (iosize <= 0) 3867 return (EMSGSIZE); 3868 if (uiop->uio_resid > iosize) { 3869 tdr.DATA_flag = 1; 3870 } else { 3871 if (more) 3872 tdr.DATA_flag = 1; 3873 else 3874 tdr.DATA_flag = 0; 3875 iosize = uiop->uio_resid; 3876 } 3877 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3878 tdr.DATA_flag, iosize)); 3879 3880 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3881 tdr.OPT_length = optlen; 3882 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3883 3884 size = (int)sizeof (tdr) + optlen; 3885 /* 3886 * File descriptors only when SM_FDPASSING set. 3887 */ 3888 error = so_getfdopt(control, controllen, 3889 !(flags & MSG_XPG4_2), &fds, &fdlen); 3890 if (error) 3891 return (error); 3892 if (fdlen != -1) { 3893 if (!(so->so_mode & SM_FDPASSING)) 3894 return (EOPNOTSUPP); 3895 3896 error = fdbuf_create(fds, fdlen, &fdbuf); 3897 if (error) 3898 return (error); 3899 3900 /* 3901 * Pre-allocate enough additional space for lower level 3902 * modules to append an option (e.g. see tl_unitdata). 3903 * The following is enough extra space for the largest 3904 * option we might append. 3905 */ 3906 size += sizeof (struct T_opthdr) + ucredsize; 3907 mp = fdbuf_allocmsg(size, fdbuf); 3908 } else { 3909 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3910 if (mp == NULL) { 3911 /* 3912 * Caught a signal waiting for memory. 3913 * Let send* return EINTR. 3914 */ 3915 return (EINTR); 3916 } 3917 } 3918 soappendmsg(mp, &tdr, sizeof (tdr)); 3919 3920 if (fdlen != -1) { 3921 ASSERT(fdbuf != NULL); 3922 toh.level = SOL_SOCKET; 3923 toh.name = SO_FILEP; 3924 toh.len = fdbuf->fd_size + 3925 (t_uscalar_t)sizeof (struct T_opthdr); 3926 toh.status = 0; 3927 soappendmsg(mp, &toh, sizeof (toh)); 3928 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3929 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3930 } 3931 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3932 /* 3933 * Normally at most 3 bytes left in the message, but we might 3934 * have allowed for extra space if we're passing fd's through. 3935 */ 3936 ASSERT(MBLKL(mp) <= (ssize_t)size); 3937 3938 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3939 3940 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3941 0, MSG_BAND, 0); 3942 if (error) { 3943 eprintsoline(so, error); 3944 return (error); 3945 } 3946 control = NULL; 3947 if (uiop->uio_resid > 0) { 3948 /* 3949 * Recheck for fatal errors. Fail write even though 3950 * some data have been written. This is consistent 3951 * with strwrite semantics and BSD sockets semantics. 3952 */ 3953 if (so->so_state & SS_CANTSENDMORE) { 3954 eprintsoline(so, error); 3955 return (EPIPE); 3956 } 3957 if (so->so_error != 0) { 3958 mutex_enter(&so->so_lock); 3959 error = sogeterr(so, B_TRUE); 3960 mutex_exit(&so->so_lock); 3961 if (error != 0) { 3962 eprintsoline(so, error); 3963 return (error); 3964 } 3965 } 3966 } 3967 } while (uiop->uio_resid > 0); 3968 return (0); 3969 } 3970 3971 /* 3972 * Sending data on a datagram socket. 3973 * Assumes caller has verified that SS_ISBOUND etc. are set. 3974 * 3975 * For AF_UNIX the destination address is translated to an internal 3976 * name and the source address is passed as an option. 3977 */ 3978 int 3979 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3980 struct uio *uiop, int flags) 3981 { 3982 struct T_unitdata_req tudr; 3983 mblk_t *mp; 3984 int error; 3985 void *addr; 3986 socklen_t addrlen; 3987 void *src; 3988 socklen_t srclen; 3989 ssize_t len; 3990 sotpi_info_t *sti = SOTOTPI(so); 3991 3992 ASSERT(name != NULL && namelen != 0); 3993 3994 len = uiop->uio_resid; 3995 if (len > sti->sti_tidu_size) { 3996 error = EMSGSIZE; 3997 goto done; 3998 } 3999 4000 /* Length and family checks */ 4001 error = so_addr_verify(so, name, namelen); 4002 if (error != 0) 4003 goto done; 4004 4005 if (sti->sti_direct) 4006 return (sodgram_direct(so, name, namelen, uiop, flags)); 4007 4008 if (so->so_family == AF_UNIX) { 4009 if (sti->sti_faddr_noxlate) { 4010 /* 4011 * Already have a transport internal address. Do not 4012 * pass any (transport internal) source address. 4013 */ 4014 addr = name; 4015 addrlen = namelen; 4016 src = NULL; 4017 srclen = 0; 4018 } else { 4019 /* 4020 * Pass the sockaddr_un source address as an option 4021 * and translate the remote address. 4022 * 4023 * Note that this code does not prevent sti_laddr_sa 4024 * from changing while it is being used. Thus 4025 * if an unbind+bind occurs concurrently with this 4026 * send the peer might see a partially new and a 4027 * partially old "from" address. 4028 */ 4029 src = sti->sti_laddr_sa; 4030 srclen = (socklen_t)sti->sti_laddr_len; 4031 dprintso(so, 1, 4032 ("sosend_dgram UNIX: srclen %d, src %p\n", 4033 srclen, src)); 4034 error = so_ux_addr_xlate(so, name, namelen, 4035 (flags & MSG_XPG4_2), 4036 &addr, &addrlen); 4037 if (error) { 4038 eprintsoline(so, error); 4039 goto done; 4040 } 4041 } 4042 } else { 4043 addr = name; 4044 addrlen = namelen; 4045 src = NULL; 4046 srclen = 0; 4047 } 4048 tudr.PRIM_type = T_UNITDATA_REQ; 4049 tudr.DEST_length = addrlen; 4050 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4051 if (srclen == 0) { 4052 tudr.OPT_length = 0; 4053 tudr.OPT_offset = 0; 4054 4055 mp = soallocproto2(&tudr, sizeof (tudr), 4056 addr, addrlen, 0, _ALLOC_INTR, CRED()); 4057 if (mp == NULL) { 4058 /* 4059 * Caught a signal waiting for memory. 4060 * Let send* return EINTR. 4061 */ 4062 error = EINTR; 4063 goto done; 4064 } 4065 } else { 4066 /* 4067 * There is a AF_UNIX sockaddr_un to include as a source 4068 * address option. 4069 */ 4070 struct T_opthdr toh; 4071 ssize_t size; 4072 4073 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4074 _TPI_ALIGN_TOPT(srclen)); 4075 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4076 _TPI_ALIGN_TOPT(addrlen)); 4077 4078 toh.level = SOL_SOCKET; 4079 toh.name = SO_SRCADDR; 4080 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4081 toh.status = 0; 4082 4083 size = tudr.OPT_offset + tudr.OPT_length; 4084 mp = soallocproto2(&tudr, sizeof (tudr), 4085 addr, addrlen, size, _ALLOC_INTR, CRED()); 4086 if (mp == NULL) { 4087 /* 4088 * Caught a signal waiting for memory. 4089 * Let send* return EINTR. 4090 */ 4091 error = EINTR; 4092 goto done; 4093 } 4094 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4095 soappendmsg(mp, &toh, sizeof (toh)); 4096 soappendmsg(mp, src, srclen); 4097 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4098 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4099 } 4100 4101 if (AU_AUDITING()) 4102 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4103 4104 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4105 done: 4106 #ifdef SOCK_DEBUG 4107 if (error) { 4108 eprintsoline(so, error); 4109 } 4110 #endif /* SOCK_DEBUG */ 4111 return (error); 4112 } 4113 4114 /* 4115 * Sending data on a connected stream socket. 4116 * Assumes caller has verified that SS_ISCONNECTED is set. 4117 */ 4118 int 4119 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 4120 int sflag) 4121 { 4122 struct T_data_req tdr; 4123 mblk_t *mp; 4124 int error; 4125 ssize_t iosize; 4126 sotpi_info_t *sti = SOTOTPI(so); 4127 4128 dprintso(so, 1, 4129 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4130 (void *)so, uiop->uio_resid, prim, sflag)); 4131 4132 /* 4133 * Has to be bound and connected. However, since no locks are 4134 * held the state could have changed after sotpi_sendmsg checked it 4135 * thus it is not possible to ASSERT on the state. 4136 */ 4137 4138 do { 4139 /* 4140 * Set the MORE flag if uio_resid does not fit in this 4141 * message or if the caller passed in "more". 4142 * Error for transports with zero tidu_size. 4143 */ 4144 tdr.PRIM_type = prim; 4145 iosize = sti->sti_tidu_size; 4146 if (iosize <= 0) 4147 return (EMSGSIZE); 4148 if (uiop->uio_resid > iosize) { 4149 tdr.MORE_flag = 1; 4150 } else { 4151 if (more) 4152 tdr.MORE_flag = 1; 4153 else 4154 tdr.MORE_flag = 0; 4155 iosize = uiop->uio_resid; 4156 } 4157 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4158 prim, tdr.MORE_flag, iosize)); 4159 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4160 if (mp == NULL) { 4161 /* 4162 * Caught a signal waiting for memory. 4163 * Let send* return EINTR. 4164 */ 4165 return (EINTR); 4166 } 4167 4168 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4169 0, sflag | MSG_BAND, 0); 4170 if (error) { 4171 eprintsoline(so, error); 4172 return (error); 4173 } 4174 if (uiop->uio_resid > 0) { 4175 /* 4176 * Recheck for fatal errors. Fail write even though 4177 * some data have been written. This is consistent 4178 * with strwrite semantics and BSD sockets semantics. 4179 */ 4180 if (so->so_state & SS_CANTSENDMORE) { 4181 eprintsoline(so, error); 4182 return (EPIPE); 4183 } 4184 if (so->so_error != 0) { 4185 mutex_enter(&so->so_lock); 4186 error = sogeterr(so, B_TRUE); 4187 mutex_exit(&so->so_lock); 4188 if (error != 0) { 4189 eprintsoline(so, error); 4190 return (error); 4191 } 4192 } 4193 } 4194 } while (uiop->uio_resid > 0); 4195 return (0); 4196 } 4197 4198 /* 4199 * Check the state for errors and call the appropriate send function. 4200 * 4201 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4202 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4203 * after sending the message. 4204 */ 4205 static int 4206 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4207 struct cred *cr) 4208 { 4209 int so_state; 4210 int so_mode; 4211 int error; 4212 struct sockaddr *name; 4213 t_uscalar_t namelen; 4214 int dontroute; 4215 int flags; 4216 sotpi_info_t *sti = SOTOTPI(so); 4217 4218 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4219 (void *)so, (void *)msg, msg->msg_flags, 4220 pr_state(so->so_state, so->so_mode), so->so_error)); 4221 4222 if (so->so_version == SOV_STREAM) { 4223 /* The imaginary "sockmod" has been popped - act as a stream */ 4224 so_update_attrs(so, SOMOD); 4225 return (strwrite(SOTOV(so), uiop, cr)); 4226 } 4227 4228 mutex_enter(&so->so_lock); 4229 so_state = so->so_state; 4230 4231 if (so_state & SS_CANTSENDMORE) { 4232 mutex_exit(&so->so_lock); 4233 return (EPIPE); 4234 } 4235 4236 if (so->so_error != 0) { 4237 error = sogeterr(so, B_TRUE); 4238 if (error != 0) { 4239 mutex_exit(&so->so_lock); 4240 return (error); 4241 } 4242 } 4243 4244 name = (struct sockaddr *)msg->msg_name; 4245 namelen = msg->msg_namelen; 4246 4247 so_mode = so->so_mode; 4248 4249 if (name == NULL) { 4250 if (!(so_state & SS_ISCONNECTED)) { 4251 mutex_exit(&so->so_lock); 4252 if (so_mode & SM_CONNREQUIRED) 4253 return (ENOTCONN); 4254 else 4255 return (EDESTADDRREQ); 4256 } 4257 if (so_mode & SM_CONNREQUIRED) { 4258 name = NULL; 4259 namelen = 0; 4260 } else { 4261 /* 4262 * Note that this code does not prevent sti_faddr_sa 4263 * from changing while it is being used. Thus 4264 * if an "unconnect"+connect occurs concurrently with 4265 * this send the datagram might be delivered to a 4266 * garbaled address. 4267 */ 4268 ASSERT(sti->sti_faddr_sa); 4269 name = sti->sti_faddr_sa; 4270 namelen = (t_uscalar_t)sti->sti_faddr_len; 4271 } 4272 } else { 4273 if (!(so_state & SS_ISCONNECTED) && 4274 (so_mode & SM_CONNREQUIRED)) { 4275 /* Required but not connected */ 4276 mutex_exit(&so->so_lock); 4277 return (ENOTCONN); 4278 } 4279 /* 4280 * Ignore the address on connection-oriented sockets. 4281 * Just like BSD this code does not generate an error for 4282 * TCP (a CONNREQUIRED socket) when sending to an address 4283 * passed in with sendto/sendmsg. Instead the data is 4284 * delivered on the connection as if no address had been 4285 * supplied. 4286 */ 4287 if ((so_state & SS_ISCONNECTED) && 4288 !(so_mode & SM_CONNREQUIRED)) { 4289 mutex_exit(&so->so_lock); 4290 return (EISCONN); 4291 } 4292 if (!(so_state & SS_ISBOUND)) { 4293 so_lock_single(so); /* Set SOLOCKED */ 4294 error = sotpi_bind(so, NULL, 0, 4295 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4296 so_unlock_single(so, SOLOCKED); 4297 if (error) { 4298 mutex_exit(&so->so_lock); 4299 eprintsoline(so, error); 4300 return (error); 4301 } 4302 } 4303 /* 4304 * Handle delayed datagram errors. These are only queued 4305 * when the application sets SO_DGRAM_ERRIND. 4306 * Return the error if we are sending to the address 4307 * that was returned in the last T_UDERROR_IND. 4308 * If sending to some other address discard the delayed 4309 * error indication. 4310 */ 4311 if (sti->sti_delayed_error) { 4312 struct T_uderror_ind *tudi; 4313 void *addr; 4314 t_uscalar_t addrlen; 4315 boolean_t match = B_FALSE; 4316 4317 ASSERT(sti->sti_eaddr_mp); 4318 error = sti->sti_delayed_error; 4319 sti->sti_delayed_error = 0; 4320 tudi = 4321 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4322 addrlen = tudi->DEST_length; 4323 addr = sogetoff(sti->sti_eaddr_mp, 4324 tudi->DEST_offset, addrlen, 1); 4325 ASSERT(addr); /* Checked by strsock_proto */ 4326 switch (so->so_family) { 4327 case AF_INET: { 4328 /* Compare just IP address and port */ 4329 sin_t *sin1 = (sin_t *)name; 4330 sin_t *sin2 = (sin_t *)addr; 4331 4332 if (addrlen == sizeof (sin_t) && 4333 namelen == addrlen && 4334 sin1->sin_port == sin2->sin_port && 4335 sin1->sin_addr.s_addr == 4336 sin2->sin_addr.s_addr) 4337 match = B_TRUE; 4338 break; 4339 } 4340 case AF_INET6: { 4341 /* Compare just IP address and port. Not flow */ 4342 sin6_t *sin1 = (sin6_t *)name; 4343 sin6_t *sin2 = (sin6_t *)addr; 4344 4345 if (addrlen == sizeof (sin6_t) && 4346 namelen == addrlen && 4347 sin1->sin6_port == sin2->sin6_port && 4348 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4349 &sin2->sin6_addr)) 4350 match = B_TRUE; 4351 break; 4352 } 4353 case AF_UNIX: 4354 default: 4355 if (namelen == addrlen && 4356 bcmp(name, addr, namelen) == 0) 4357 match = B_TRUE; 4358 } 4359 if (match) { 4360 freemsg(sti->sti_eaddr_mp); 4361 sti->sti_eaddr_mp = NULL; 4362 mutex_exit(&so->so_lock); 4363 #ifdef DEBUG 4364 dprintso(so, 0, 4365 ("sockfs delayed error %d for %s\n", 4366 error, 4367 pr_addr(so->so_family, name, namelen))); 4368 #endif /* DEBUG */ 4369 return (error); 4370 } 4371 freemsg(sti->sti_eaddr_mp); 4372 sti->sti_eaddr_mp = NULL; 4373 } 4374 } 4375 mutex_exit(&so->so_lock); 4376 4377 flags = msg->msg_flags; 4378 dontroute = 0; 4379 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4380 uint32_t val; 4381 4382 val = 1; 4383 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4384 &val, (t_uscalar_t)sizeof (val), cr); 4385 if (error) 4386 return (error); 4387 dontroute = 1; 4388 } 4389 4390 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4391 error = EOPNOTSUPP; 4392 goto done; 4393 } 4394 if (msg->msg_controllen != 0) { 4395 if (!(so_mode & SM_CONNREQUIRED)) { 4396 so_update_attrs(so, SOMOD); 4397 error = sosend_dgramcmsg(so, name, namelen, uiop, 4398 msg->msg_control, msg->msg_controllen, flags); 4399 } else { 4400 if (flags & MSG_OOB) { 4401 /* Can't generate T_EXDATA_REQ with options */ 4402 error = EOPNOTSUPP; 4403 goto done; 4404 } 4405 so_update_attrs(so, SOMOD); 4406 error = sosend_svccmsg(so, uiop, 4407 !(flags & MSG_EOR), 4408 msg->msg_control, msg->msg_controllen, 4409 flags); 4410 } 4411 goto done; 4412 } 4413 4414 so_update_attrs(so, SOMOD); 4415 if (!(so_mode & SM_CONNREQUIRED)) { 4416 /* 4417 * If there is no SO_DONTROUTE to turn off return immediately 4418 * from send_dgram. This can allow tail-call optimizations. 4419 */ 4420 if (!dontroute) { 4421 return (sosend_dgram(so, name, namelen, uiop, flags)); 4422 } 4423 error = sosend_dgram(so, name, namelen, uiop, flags); 4424 } else { 4425 t_scalar_t prim; 4426 int sflag; 4427 4428 /* Ignore msg_name in the connected state */ 4429 if (flags & MSG_OOB) { 4430 prim = T_EXDATA_REQ; 4431 /* 4432 * Send down T_EXDATA_REQ even if there is flow 4433 * control for data. 4434 */ 4435 sflag = MSG_IGNFLOW; 4436 } else { 4437 if (so_mode & SM_BYTESTREAM) { 4438 /* Byte stream transport - use write */ 4439 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4440 4441 /* Send M_DATA messages */ 4442 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 4443 (error = nl7c_data(so, uiop)) >= 0) { 4444 /* NL7C consumed the data */ 4445 return (error); 4446 } 4447 /* 4448 * If there is no SO_DONTROUTE to turn off, 4449 * sti_direct is on, and there is no flow 4450 * control, we can take the fast path. 4451 */ 4452 if (!dontroute && sti->sti_direct != 0 && 4453 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4454 return (sostream_direct(so, uiop, 4455 NULL, cr)); 4456 } 4457 error = strwrite(SOTOV(so), uiop, cr); 4458 goto done; 4459 } 4460 prim = T_DATA_REQ; 4461 sflag = 0; 4462 } 4463 /* 4464 * If there is no SO_DONTROUTE to turn off return immediately 4465 * from sosend_svc. This can allow tail-call optimizations. 4466 */ 4467 if (!dontroute) 4468 return (sosend_svc(so, uiop, prim, 4469 !(flags & MSG_EOR), sflag)); 4470 error = sosend_svc(so, uiop, prim, 4471 !(flags & MSG_EOR), sflag); 4472 } 4473 ASSERT(dontroute); 4474 done: 4475 if (dontroute) { 4476 uint32_t val; 4477 4478 val = 0; 4479 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4480 &val, (t_uscalar_t)sizeof (val), cr); 4481 } 4482 return (error); 4483 } 4484 4485 /* 4486 * kstrwritemp() has very similar semantics as that of strwrite(). 4487 * The main difference is it obtains mblks from the caller and also 4488 * does not do any copy as done in strwrite() from user buffers to 4489 * kernel buffers. 4490 * 4491 * Currently, this routine is used by sendfile to send data allocated 4492 * within the kernel without any copying. This interface does not use the 4493 * synchronous stream interface as synch. stream interface implies 4494 * copying. 4495 */ 4496 int 4497 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4498 { 4499 struct stdata *stp; 4500 struct queue *wqp; 4501 mblk_t *newmp; 4502 char waitflag; 4503 int tempmode; 4504 int error = 0; 4505 int done = 0; 4506 struct sonode *so; 4507 boolean_t direct; 4508 4509 ASSERT(vp->v_stream); 4510 stp = vp->v_stream; 4511 4512 so = VTOSO(vp); 4513 direct = _SOTOTPI(so)->sti_direct; 4514 4515 /* 4516 * This is the sockfs direct fast path. canputnext() need 4517 * not be accurate so we don't grab the sd_lock here. If 4518 * we get flow-controlled, we grab sd_lock just before the 4519 * do..while loop below to emulate what strwrite() does. 4520 */ 4521 wqp = stp->sd_wrq; 4522 if (canputnext(wqp) && direct && 4523 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4524 return (sostream_direct(so, NULL, mp, CRED())); 4525 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4526 /* Fast check of flags before acquiring the lock */ 4527 mutex_enter(&stp->sd_lock); 4528 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4529 mutex_exit(&stp->sd_lock); 4530 if (error != 0) { 4531 if (!(stp->sd_flag & STPLEX) && 4532 (stp->sd_wput_opt & SW_SIGPIPE)) { 4533 error = EPIPE; 4534 } 4535 return (error); 4536 } 4537 } 4538 4539 waitflag = WRITEWAIT; 4540 if (stp->sd_flag & OLDNDELAY) 4541 tempmode = fmode & ~FNDELAY; 4542 else 4543 tempmode = fmode; 4544 4545 mutex_enter(&stp->sd_lock); 4546 do { 4547 if (canputnext(wqp)) { 4548 mutex_exit(&stp->sd_lock); 4549 if (stp->sd_wputdatafunc != NULL) { 4550 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4551 NULL, NULL, NULL); 4552 if (newmp == NULL) { 4553 /* The caller will free mp */ 4554 return (ECOMM); 4555 } 4556 mp = newmp; 4557 } 4558 putnext(wqp, mp); 4559 return (0); 4560 } 4561 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4562 &done); 4563 } while (error == 0 && !done); 4564 4565 mutex_exit(&stp->sd_lock); 4566 /* 4567 * EAGAIN tells the application to try again. ENOMEM 4568 * is returned only if the memory allocation size 4569 * exceeds the physical limits of the system. ENOMEM 4570 * can't be true here. 4571 */ 4572 if (error == ENOMEM) 4573 error = EAGAIN; 4574 return (error); 4575 } 4576 4577 /* ARGSUSED */ 4578 static int 4579 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4580 struct cred *cr, mblk_t **mpp) 4581 { 4582 int error; 4583 4584 if (so->so_family != AF_INET && so->so_family != AF_INET6) 4585 return (EAFNOSUPPORT); 4586 4587 if (so->so_state & SS_CANTSENDMORE) 4588 return (EPIPE); 4589 4590 if (so->so_type != SOCK_STREAM) 4591 return (EOPNOTSUPP); 4592 4593 if ((so->so_state & SS_ISCONNECTED) == 0) 4594 return (ENOTCONN); 4595 4596 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4597 if (error == 0) 4598 *mpp = NULL; 4599 return (error); 4600 } 4601 4602 /* 4603 * Sending data on a datagram socket. 4604 * Assumes caller has verified that SS_ISBOUND etc. are set. 4605 */ 4606 /* ARGSUSED */ 4607 static int 4608 sodgram_direct(struct sonode *so, struct sockaddr *name, 4609 socklen_t namelen, struct uio *uiop, int flags) 4610 { 4611 struct T_unitdata_req tudr; 4612 mblk_t *mp = NULL; 4613 int error = 0; 4614 void *addr; 4615 socklen_t addrlen; 4616 ssize_t len; 4617 struct stdata *stp = SOTOV(so)->v_stream; 4618 int so_state; 4619 queue_t *udp_wq; 4620 boolean_t connected; 4621 mblk_t *mpdata = NULL; 4622 sotpi_info_t *sti = SOTOTPI(so); 4623 uint32_t auditing = AU_AUDITING(); 4624 4625 ASSERT(name != NULL && namelen != 0); 4626 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4627 ASSERT(!(so->so_mode & SM_EXDATA)); 4628 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4629 ASSERT(SOTOV(so)->v_type == VSOCK); 4630 4631 /* Caller checked for proper length */ 4632 len = uiop->uio_resid; 4633 ASSERT(len <= sti->sti_tidu_size); 4634 4635 /* Length and family checks have been done by caller */ 4636 ASSERT(name->sa_family == so->so_family); 4637 ASSERT(so->so_family == AF_INET || 4638 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4639 ASSERT(so->so_family == AF_INET6 || 4640 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4641 4642 addr = name; 4643 addrlen = namelen; 4644 4645 if (stp->sd_sidp != NULL && 4646 (error = straccess(stp, JCWRITE)) != 0) 4647 goto done; 4648 4649 so_state = so->so_state; 4650 4651 connected = so_state & SS_ISCONNECTED; 4652 if (!connected) { 4653 tudr.PRIM_type = T_UNITDATA_REQ; 4654 tudr.DEST_length = addrlen; 4655 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4656 tudr.OPT_length = 0; 4657 tudr.OPT_offset = 0; 4658 4659 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4660 _ALLOC_INTR, CRED()); 4661 if (mp == NULL) { 4662 /* 4663 * Caught a signal waiting for memory. 4664 * Let send* return EINTR. 4665 */ 4666 error = EINTR; 4667 goto done; 4668 } 4669 } 4670 4671 /* 4672 * For UDP we don't break up the copyin into smaller pieces 4673 * as in the TCP case. That means if ENOMEM is returned by 4674 * mcopyinuio() then the uio vector has not been modified at 4675 * all and we fallback to either strwrite() or kstrputmsg() 4676 * below. Note also that we never generate priority messages 4677 * from here. 4678 */ 4679 udp_wq = stp->sd_wrq->q_next; 4680 if (canput(udp_wq) && 4681 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4682 ASSERT(DB_TYPE(mpdata) == M_DATA); 4683 ASSERT(uiop->uio_resid == 0); 4684 if (!connected) 4685 linkb(mp, mpdata); 4686 else 4687 mp = mpdata; 4688 if (auditing) 4689 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4690 4691 udp_wput(udp_wq, mp); 4692 return (0); 4693 } 4694 4695 ASSERT(mpdata == NULL); 4696 if (error != 0 && error != ENOMEM) { 4697 freemsg(mp); 4698 return (error); 4699 } 4700 4701 /* 4702 * For connected, let strwrite() handle the blocking case. 4703 * Otherwise we fall thru and use kstrputmsg(). 4704 */ 4705 if (connected) 4706 return (strwrite(SOTOV(so), uiop, CRED())); 4707 4708 if (auditing) 4709 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4710 4711 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4712 done: 4713 #ifdef SOCK_DEBUG 4714 if (error != 0) { 4715 eprintsoline(so, error); 4716 } 4717 #endif /* SOCK_DEBUG */ 4718 return (error); 4719 } 4720 4721 int 4722 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4723 { 4724 struct stdata *stp = SOTOV(so)->v_stream; 4725 ssize_t iosize, rmax, maxblk; 4726 queue_t *tcp_wq = stp->sd_wrq->q_next; 4727 mblk_t *newmp; 4728 int error = 0, wflag = 0; 4729 4730 ASSERT(so->so_mode & SM_BYTESTREAM); 4731 ASSERT(SOTOV(so)->v_type == VSOCK); 4732 4733 if (stp->sd_sidp != NULL && 4734 (error = straccess(stp, JCWRITE)) != 0) 4735 return (error); 4736 4737 if (uiop == NULL) { 4738 /* 4739 * kstrwritemp() should have checked sd_flag and 4740 * flow-control before coming here. If we end up 4741 * here it means that we can simply pass down the 4742 * data to tcp. 4743 */ 4744 ASSERT(mp != NULL); 4745 if (stp->sd_wputdatafunc != NULL) { 4746 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4747 NULL, NULL, NULL); 4748 if (newmp == NULL) { 4749 /* The caller will free mp */ 4750 return (ECOMM); 4751 } 4752 mp = newmp; 4753 } 4754 tcp_wput(tcp_wq, mp); 4755 return (0); 4756 } 4757 4758 /* Fallback to strwrite() to do proper error handling */ 4759 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4760 return (strwrite(SOTOV(so), uiop, cr)); 4761 4762 rmax = stp->sd_qn_maxpsz; 4763 ASSERT(rmax >= 0 || rmax == INFPSZ); 4764 if (rmax == 0 || uiop->uio_resid <= 0) 4765 return (0); 4766 4767 if (rmax == INFPSZ) 4768 rmax = uiop->uio_resid; 4769 4770 maxblk = stp->sd_maxblk; 4771 4772 for (;;) { 4773 iosize = MIN(uiop->uio_resid, rmax); 4774 4775 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4776 if (mp == NULL) { 4777 /* 4778 * Fallback to strwrite() for ENOMEM; if this 4779 * is our first time in this routine and the uio 4780 * vector has not been modified, we will end up 4781 * calling strwrite() without any flag set. 4782 */ 4783 if (error == ENOMEM) 4784 goto slow_send; 4785 else 4786 return (error); 4787 } 4788 ASSERT(uiop->uio_resid >= 0); 4789 /* 4790 * If mp is non-NULL and ENOMEM is set, it means that 4791 * mcopyinuio() was able to break down some of the user 4792 * data into one or more mblks. Send the partial data 4793 * to tcp and let the rest be handled in strwrite(). 4794 */ 4795 ASSERT(error == 0 || error == ENOMEM); 4796 if (stp->sd_wputdatafunc != NULL) { 4797 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4798 NULL, NULL, NULL); 4799 if (newmp == NULL) { 4800 /* The caller will free mp */ 4801 return (ECOMM); 4802 } 4803 mp = newmp; 4804 } 4805 tcp_wput(tcp_wq, mp); 4806 4807 wflag |= NOINTR; 4808 4809 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4810 ASSERT(error == 0); 4811 break; 4812 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4813 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4814 slow_send: 4815 /* 4816 * We were able to send down partial data using 4817 * the direct call interface, but are now relying 4818 * on strwrite() to handle the non-fastpath cases. 4819 * If the socket is blocking we will sleep in 4820 * strwaitq() until write is permitted, otherwise, 4821 * we will need to return the amount of bytes 4822 * written so far back to the app. This is the 4823 * reason why we pass NOINTR flag to strwrite() 4824 * for non-blocking socket, because we don't want 4825 * to return EAGAIN when portion of the user data 4826 * has actually been sent down. 4827 */ 4828 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4829 } 4830 } 4831 return (0); 4832 } 4833 4834 /* 4835 * Update sti_faddr by asking the transport (unless AF_UNIX). 4836 */ 4837 /* ARGSUSED */ 4838 int 4839 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4840 boolean_t accept, struct cred *cr) 4841 { 4842 struct strbuf strbuf; 4843 int error = 0, res; 4844 void *addr; 4845 t_uscalar_t addrlen; 4846 k_sigset_t smask; 4847 sotpi_info_t *sti = SOTOTPI(so); 4848 4849 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4850 (void *)so, pr_state(so->so_state, so->so_mode))); 4851 4852 ASSERT(*namelen > 0); 4853 mutex_enter(&so->so_lock); 4854 so_lock_single(so); /* Set SOLOCKED */ 4855 4856 if (accept) { 4857 bcopy(sti->sti_faddr_sa, name, 4858 MIN(*namelen, sti->sti_faddr_len)); 4859 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4860 goto done; 4861 } 4862 4863 if (!(so->so_state & SS_ISCONNECTED)) { 4864 error = ENOTCONN; 4865 goto done; 4866 } 4867 /* Added this check for X/Open */ 4868 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4869 error = EINVAL; 4870 if (xnet_check_print) { 4871 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4872 } 4873 goto done; 4874 } 4875 4876 if (sti->sti_faddr_valid) { 4877 bcopy(sti->sti_faddr_sa, name, 4878 MIN(*namelen, sti->sti_faddr_len)); 4879 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4880 goto done; 4881 } 4882 4883 #ifdef DEBUG 4884 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4885 pr_addr(so->so_family, sti->sti_faddr_sa, 4886 (t_uscalar_t)sti->sti_faddr_len))); 4887 #endif /* DEBUG */ 4888 4889 if (so->so_family == AF_UNIX) { 4890 /* Transport has different name space - return local info */ 4891 if (sti->sti_faddr_noxlate) 4892 *namelen = 0; 4893 error = 0; 4894 goto done; 4895 } 4896 4897 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4898 4899 ASSERT(sti->sti_faddr_sa); 4900 /* Allocate local buffer to use with ioctl */ 4901 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4902 mutex_exit(&so->so_lock); 4903 addr = kmem_alloc(addrlen, KM_SLEEP); 4904 4905 /* 4906 * Issue TI_GETPEERNAME with signals masked. 4907 * Put the result in sti_faddr_sa so that getpeername works after 4908 * a shutdown(output). 4909 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4910 * back to the socket. 4911 */ 4912 strbuf.buf = addr; 4913 strbuf.maxlen = addrlen; 4914 strbuf.len = 0; 4915 4916 sigintr(&smask, 0); 4917 res = 0; 4918 ASSERT(cr); 4919 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4920 0, K_TO_K, cr, &res); 4921 sigunintr(&smask); 4922 4923 mutex_enter(&so->so_lock); 4924 /* 4925 * If there is an error record the error in so_error put don't fail 4926 * the getpeername. Instead fallback on the recorded 4927 * sti->sti_faddr_sa. 4928 */ 4929 if (error) { 4930 /* 4931 * Various stream head errors can be returned to the ioctl. 4932 * However, it is impossible to determine which ones of 4933 * these are really socket level errors that were incorrectly 4934 * consumed by the ioctl. Thus this code silently ignores the 4935 * error - to code explicitly does not reinstate the error 4936 * using soseterror(). 4937 * Experiments have shows that at least this set of 4938 * errors are reported and should not be reinstated on the 4939 * socket: 4940 * EINVAL E.g. if an I_LINK was in effect when 4941 * getpeername was called. 4942 * EPIPE The ioctl error semantics prefer the write 4943 * side error over the read side error. 4944 * ENOTCONN The transport just got disconnected but 4945 * sockfs had not yet seen the T_DISCON_IND 4946 * when issuing the ioctl. 4947 */ 4948 error = 0; 4949 } else if (res == 0 && strbuf.len > 0 && 4950 (so->so_state & SS_ISCONNECTED)) { 4951 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4952 sti->sti_faddr_len = (socklen_t)strbuf.len; 4953 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4954 sti->sti_faddr_valid = 1; 4955 4956 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4957 *namelen = sti->sti_faddr_len; 4958 } 4959 kmem_free(addr, addrlen); 4960 #ifdef DEBUG 4961 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4962 pr_addr(so->so_family, sti->sti_faddr_sa, 4963 (t_uscalar_t)sti->sti_faddr_len))); 4964 #endif /* DEBUG */ 4965 done: 4966 so_unlock_single(so, SOLOCKED); 4967 mutex_exit(&so->so_lock); 4968 return (error); 4969 } 4970 4971 /* 4972 * Update sti_laddr by asking the transport (unless AF_UNIX). 4973 */ 4974 int 4975 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4976 struct cred *cr) 4977 { 4978 struct strbuf strbuf; 4979 int error = 0, res; 4980 void *addr; 4981 t_uscalar_t addrlen; 4982 k_sigset_t smask; 4983 sotpi_info_t *sti = SOTOTPI(so); 4984 4985 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4986 (void *)so, pr_state(so->so_state, so->so_mode))); 4987 4988 ASSERT(*namelen > 0); 4989 mutex_enter(&so->so_lock); 4990 so_lock_single(so); /* Set SOLOCKED */ 4991 4992 #ifdef DEBUG 4993 4994 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4995 pr_addr(so->so_family, sti->sti_laddr_sa, 4996 (t_uscalar_t)sti->sti_laddr_len))); 4997 #endif /* DEBUG */ 4998 if (sti->sti_laddr_valid) { 4999 bcopy(sti->sti_laddr_sa, name, 5000 MIN(*namelen, sti->sti_laddr_len)); 5001 *namelen = sti->sti_laddr_len; 5002 goto done; 5003 } 5004 5005 if (so->so_family == AF_UNIX) { 5006 /* 5007 * Transport has different name space - return local info. If we 5008 * have enough space, let consumers know the family. 5009 */ 5010 if (*namelen >= sizeof (sa_family_t)) { 5011 name->sa_family = AF_UNIX; 5012 *namelen = sizeof (sa_family_t); 5013 } else { 5014 *namelen = 0; 5015 } 5016 error = 0; 5017 goto done; 5018 } 5019 if (!(so->so_state & SS_ISBOUND)) { 5020 /* If not bound, then nothing to return. */ 5021 error = 0; 5022 goto done; 5023 } 5024 5025 /* Allocate local buffer to use with ioctl */ 5026 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 5027 mutex_exit(&so->so_lock); 5028 addr = kmem_alloc(addrlen, KM_SLEEP); 5029 5030 /* 5031 * Issue TI_GETMYNAME with signals masked. 5032 * Put the result in sti_laddr_sa so that getsockname works after 5033 * a shutdown(output). 5034 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 5035 * back to the socket. 5036 */ 5037 strbuf.buf = addr; 5038 strbuf.maxlen = addrlen; 5039 strbuf.len = 0; 5040 5041 sigintr(&smask, 0); 5042 res = 0; 5043 ASSERT(cr); 5044 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 5045 0, K_TO_K, cr, &res); 5046 sigunintr(&smask); 5047 5048 mutex_enter(&so->so_lock); 5049 /* 5050 * If there is an error record the error in so_error put don't fail 5051 * the getsockname. Instead fallback on the recorded 5052 * sti->sti_laddr_sa. 5053 */ 5054 if (error) { 5055 /* 5056 * Various stream head errors can be returned to the ioctl. 5057 * However, it is impossible to determine which ones of 5058 * these are really socket level errors that were incorrectly 5059 * consumed by the ioctl. Thus this code silently ignores the 5060 * error - to code explicitly does not reinstate the error 5061 * using soseterror(). 5062 * Experiments have shows that at least this set of 5063 * errors are reported and should not be reinstated on the 5064 * socket: 5065 * EINVAL E.g. if an I_LINK was in effect when 5066 * getsockname was called. 5067 * EPIPE The ioctl error semantics prefer the write 5068 * side error over the read side error. 5069 */ 5070 error = 0; 5071 } else if (res == 0 && strbuf.len > 0 && 5072 (so->so_state & SS_ISBOUND)) { 5073 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 5074 sti->sti_laddr_len = (socklen_t)strbuf.len; 5075 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 5076 sti->sti_laddr_valid = 1; 5077 5078 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 5079 *namelen = sti->sti_laddr_len; 5080 } 5081 kmem_free(addr, addrlen); 5082 #ifdef DEBUG 5083 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5084 pr_addr(so->so_family, sti->sti_laddr_sa, 5085 (t_uscalar_t)sti->sti_laddr_len))); 5086 #endif /* DEBUG */ 5087 done: 5088 so_unlock_single(so, SOLOCKED); 5089 mutex_exit(&so->so_lock); 5090 return (error); 5091 } 5092 5093 /* 5094 * Get socket options. For SOL_SOCKET options some options are handled 5095 * by the sockfs while others use the value recorded in the sonode as a 5096 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5097 * 5098 * On the return most *optlenp bytes are copied to optval. 5099 */ 5100 /* ARGSUSED */ 5101 int 5102 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5103 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5104 { 5105 struct T_optmgmt_req optmgmt_req; 5106 struct T_optmgmt_ack *optmgmt_ack; 5107 struct opthdr oh; 5108 struct opthdr *opt_res; 5109 mblk_t *mp = NULL; 5110 int error = 0; 5111 void *option = NULL; /* Set if fallback value */ 5112 t_uscalar_t maxlen = *optlenp; 5113 t_uscalar_t len; 5114 uint32_t value; 5115 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5116 struct timeval32 tmo_val32; 5117 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5118 5119 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5120 (void *)so, level, option_name, optval, (void *)optlenp, 5121 pr_state(so->so_state, so->so_mode))); 5122 5123 mutex_enter(&so->so_lock); 5124 so_lock_single(so); /* Set SOLOCKED */ 5125 5126 /* 5127 * Check for SOL_SOCKET options. 5128 * Certain SOL_SOCKET options are returned directly whereas 5129 * others only provide a default (fallback) value should 5130 * the T_SVR4_OPTMGMT_REQ fail. 5131 */ 5132 if (level == SOL_SOCKET) { 5133 /* Check parameters */ 5134 switch (option_name) { 5135 case SO_TYPE: 5136 case SO_ERROR: 5137 case SO_DEBUG: 5138 case SO_ACCEPTCONN: 5139 case SO_REUSEADDR: 5140 case SO_KEEPALIVE: 5141 case SO_DONTROUTE: 5142 case SO_BROADCAST: 5143 case SO_USELOOPBACK: 5144 case SO_OOBINLINE: 5145 case SO_SNDBUF: 5146 case SO_RCVBUF: 5147 #ifdef notyet 5148 case SO_SNDLOWAT: 5149 case SO_RCVLOWAT: 5150 #endif /* notyet */ 5151 case SO_DOMAIN: 5152 case SO_DGRAM_ERRIND: 5153 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5154 error = EINVAL; 5155 eprintsoline(so, error); 5156 goto done2; 5157 } 5158 break; 5159 case SO_RCVTIMEO: 5160 case SO_SNDTIMEO: 5161 if (get_udatamodel() == DATAMODEL_NONE || 5162 get_udatamodel() == DATAMODEL_NATIVE) { 5163 if (maxlen < sizeof (struct timeval)) { 5164 error = EINVAL; 5165 eprintsoline(so, error); 5166 goto done2; 5167 } 5168 } else { 5169 if (maxlen < sizeof (struct timeval32)) { 5170 error = EINVAL; 5171 eprintsoline(so, error); 5172 goto done2; 5173 } 5174 5175 } 5176 break; 5177 case SO_LINGER: 5178 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5179 error = EINVAL; 5180 eprintsoline(so, error); 5181 goto done2; 5182 } 5183 break; 5184 case SO_SND_BUFINFO: 5185 if (maxlen < (t_uscalar_t) 5186 sizeof (struct so_snd_bufinfo)) { 5187 error = EINVAL; 5188 eprintsoline(so, error); 5189 goto done2; 5190 } 5191 break; 5192 } 5193 5194 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5195 5196 switch (option_name) { 5197 case SO_TYPE: 5198 value = so->so_type; 5199 option = &value; 5200 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5201 5202 case SO_ERROR: 5203 value = sogeterr(so, B_TRUE); 5204 option = &value; 5205 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5206 5207 case SO_ACCEPTCONN: 5208 if (so->so_state & SS_ACCEPTCONN) 5209 value = SO_ACCEPTCONN; 5210 else 5211 value = 0; 5212 #ifdef DEBUG 5213 if (value) { 5214 dprintso(so, 1, 5215 ("sotpi_getsockopt: 0x%x is set\n", 5216 option_name)); 5217 } else { 5218 dprintso(so, 1, 5219 ("sotpi_getsockopt: 0x%x not set\n", 5220 option_name)); 5221 } 5222 #endif /* DEBUG */ 5223 option = &value; 5224 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5225 5226 case SO_DEBUG: 5227 case SO_REUSEADDR: 5228 case SO_KEEPALIVE: 5229 case SO_DONTROUTE: 5230 case SO_BROADCAST: 5231 case SO_USELOOPBACK: 5232 case SO_OOBINLINE: 5233 case SO_DGRAM_ERRIND: 5234 value = (so->so_options & option_name); 5235 #ifdef DEBUG 5236 if (value) { 5237 dprintso(so, 1, 5238 ("sotpi_getsockopt: 0x%x is set\n", 5239 option_name)); 5240 } else { 5241 dprintso(so, 1, 5242 ("sotpi_getsockopt: 0x%x not set\n", 5243 option_name)); 5244 } 5245 #endif /* DEBUG */ 5246 option = &value; 5247 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5248 5249 /* 5250 * The following options are only returned by sockfs when the 5251 * T_SVR4_OPTMGMT_REQ fails. 5252 */ 5253 case SO_LINGER: 5254 option = &so->so_linger; 5255 len = (t_uscalar_t)sizeof (struct linger); 5256 break; 5257 case SO_SNDBUF: { 5258 ssize_t lvalue; 5259 5260 /* 5261 * If the option has not been set then get a default 5262 * value from the read queue. This value is 5263 * returned if the transport fails 5264 * the T_SVR4_OPTMGMT_REQ. 5265 */ 5266 lvalue = so->so_sndbuf; 5267 if (lvalue == 0) { 5268 mutex_exit(&so->so_lock); 5269 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5270 QHIWAT, 0, &lvalue); 5271 mutex_enter(&so->so_lock); 5272 dprintso(so, 1, 5273 ("got SO_SNDBUF %ld from q\n", lvalue)); 5274 } 5275 value = (int)lvalue; 5276 option = &value; 5277 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5278 break; 5279 } 5280 case SO_RCVBUF: { 5281 ssize_t lvalue; 5282 5283 /* 5284 * If the option has not been set then get a default 5285 * value from the read queue. This value is 5286 * returned if the transport fails 5287 * the T_SVR4_OPTMGMT_REQ. 5288 * 5289 * XXX If SO_RCVBUF has been set and this is an 5290 * XPG 4.2 application then do not ask the transport 5291 * since the transport might adjust the value and not 5292 * return exactly what was set by the application. 5293 * For non-XPG 4.2 application we return the value 5294 * that the transport is actually using. 5295 */ 5296 lvalue = so->so_rcvbuf; 5297 if (lvalue == 0) { 5298 mutex_exit(&so->so_lock); 5299 (void) strqget(RD(strvp2wq(SOTOV(so))), 5300 QHIWAT, 0, &lvalue); 5301 mutex_enter(&so->so_lock); 5302 dprintso(so, 1, 5303 ("got SO_RCVBUF %ld from q\n", lvalue)); 5304 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5305 value = (int)lvalue; 5306 option = &value; 5307 goto copyout; /* skip asking transport */ 5308 } 5309 value = (int)lvalue; 5310 option = &value; 5311 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5312 break; 5313 } 5314 case SO_DOMAIN: 5315 value = so->so_family; 5316 option = &value; 5317 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5318 5319 #ifdef notyet 5320 /* 5321 * We do not implement the semantics of these options 5322 * thus we shouldn't implement the options either. 5323 */ 5324 case SO_SNDLOWAT: 5325 value = so->so_sndlowat; 5326 option = &value; 5327 break; 5328 case SO_RCVLOWAT: 5329 value = so->so_rcvlowat; 5330 option = &value; 5331 break; 5332 #endif /* notyet */ 5333 case SO_SNDTIMEO: 5334 case SO_RCVTIMEO: { 5335 clock_t val; 5336 5337 if (option_name == SO_RCVTIMEO) 5338 val = drv_hztousec(so->so_rcvtimeo); 5339 else 5340 val = drv_hztousec(so->so_sndtimeo); 5341 tmo_val.tv_sec = val / (1000 * 1000); 5342 tmo_val.tv_usec = val % (1000 * 1000); 5343 if (get_udatamodel() == DATAMODEL_NONE || 5344 get_udatamodel() == DATAMODEL_NATIVE) { 5345 option = &tmo_val; 5346 len = sizeof (struct timeval); 5347 } else { 5348 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5349 option = &tmo_val32; 5350 len = sizeof (struct timeval32); 5351 } 5352 break; 5353 } 5354 case SO_SND_BUFINFO: { 5355 snd_bufinfo.sbi_wroff = 5356 (so->so_proto_props).sopp_wroff; 5357 snd_bufinfo.sbi_maxblk = 5358 (so->so_proto_props).sopp_maxblk; 5359 snd_bufinfo.sbi_maxpsz = 5360 (so->so_proto_props).sopp_maxpsz; 5361 snd_bufinfo.sbi_tail = 5362 (so->so_proto_props).sopp_tail; 5363 option = &snd_bufinfo; 5364 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5365 break; 5366 } 5367 } 5368 } 5369 5370 mutex_exit(&so->so_lock); 5371 5372 /* Send request */ 5373 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5374 optmgmt_req.MGMT_flags = T_CHECK; 5375 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5376 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5377 5378 oh.level = level; 5379 oh.name = option_name; 5380 oh.len = maxlen; 5381 5382 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5383 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5384 /* Let option management work in the presence of data flow control */ 5385 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5386 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5387 mp = NULL; 5388 mutex_enter(&so->so_lock); 5389 if (error) { 5390 eprintsoline(so, error); 5391 goto done2; 5392 } 5393 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5394 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5395 if (error) { 5396 if (option != NULL) { 5397 /* We have a fallback value */ 5398 error = 0; 5399 goto copyout; 5400 } 5401 eprintsoline(so, error); 5402 goto done2; 5403 } 5404 ASSERT(mp); 5405 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5406 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5407 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5408 if (opt_res == NULL) { 5409 if (option != NULL) { 5410 /* We have a fallback value */ 5411 error = 0; 5412 goto copyout; 5413 } 5414 error = EPROTO; 5415 eprintsoline(so, error); 5416 goto done; 5417 } 5418 option = &opt_res[1]; 5419 5420 /* check to ensure that the option is within bounds */ 5421 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5422 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5423 if (option != NULL) { 5424 /* We have a fallback value */ 5425 error = 0; 5426 goto copyout; 5427 } 5428 error = EPROTO; 5429 eprintsoline(so, error); 5430 goto done; 5431 } 5432 5433 len = opt_res->len; 5434 5435 copyout: { 5436 t_uscalar_t size = MIN(len, maxlen); 5437 bcopy(option, optval, size); 5438 bcopy(&size, optlenp, sizeof (size)); 5439 } 5440 done: 5441 freemsg(mp); 5442 done2: 5443 so_unlock_single(so, SOLOCKED); 5444 mutex_exit(&so->so_lock); 5445 5446 return (error); 5447 } 5448 5449 /* 5450 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5451 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5452 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5453 * setsockopt has to work even if the transport does not support the option. 5454 */ 5455 /* ARGSUSED */ 5456 int 5457 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5458 const void *optval, t_uscalar_t optlen, struct cred *cr) 5459 { 5460 struct T_optmgmt_req optmgmt_req; 5461 struct opthdr oh; 5462 mblk_t *mp; 5463 int error = 0; 5464 boolean_t handled = B_FALSE; 5465 5466 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5467 (void *)so, level, option_name, optval, optlen, 5468 pr_state(so->so_state, so->so_mode))); 5469 5470 /* X/Open requires this check */ 5471 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5472 if (xnet_check_print) 5473 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5474 return (EINVAL); 5475 } 5476 5477 mutex_enter(&so->so_lock); 5478 so_lock_single(so); /* Set SOLOCKED */ 5479 mutex_exit(&so->so_lock); 5480 5481 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5482 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5483 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5484 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5485 5486 oh.level = level; 5487 oh.name = option_name; 5488 oh.len = optlen; 5489 5490 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5491 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5492 /* Let option management work in the presence of data flow control */ 5493 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5494 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5495 mp = NULL; 5496 mutex_enter(&so->so_lock); 5497 if (error) { 5498 eprintsoline(so, error); 5499 goto done2; 5500 } 5501 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5502 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5503 if (error) { 5504 eprintsoline(so, error); 5505 goto done; 5506 } 5507 ASSERT(mp); 5508 /* No need to verify T_optmgmt_ack */ 5509 freemsg(mp); 5510 done: 5511 /* 5512 * Check for SOL_SOCKET options and record their values. 5513 * If we know about a SOL_SOCKET parameter and the transport 5514 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5515 * EPROTO) we let the setsockopt succeed. 5516 */ 5517 if (level == SOL_SOCKET) { 5518 /* Check parameters */ 5519 switch (option_name) { 5520 case SO_DEBUG: 5521 case SO_REUSEADDR: 5522 case SO_KEEPALIVE: 5523 case SO_DONTROUTE: 5524 case SO_BROADCAST: 5525 case SO_USELOOPBACK: 5526 case SO_OOBINLINE: 5527 case SO_SNDBUF: 5528 case SO_RCVBUF: 5529 #ifdef notyet 5530 case SO_SNDLOWAT: 5531 case SO_RCVLOWAT: 5532 #endif /* notyet */ 5533 case SO_DGRAM_ERRIND: 5534 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5535 error = EINVAL; 5536 eprintsoline(so, error); 5537 goto done2; 5538 } 5539 ASSERT(optval); 5540 handled = B_TRUE; 5541 break; 5542 case SO_SNDTIMEO: 5543 case SO_RCVTIMEO: 5544 if (get_udatamodel() == DATAMODEL_NONE || 5545 get_udatamodel() == DATAMODEL_NATIVE) { 5546 if (optlen != sizeof (struct timeval)) { 5547 error = EINVAL; 5548 eprintsoline(so, error); 5549 goto done2; 5550 } 5551 } else { 5552 if (optlen != sizeof (struct timeval32)) { 5553 error = EINVAL; 5554 eprintsoline(so, error); 5555 goto done2; 5556 } 5557 } 5558 ASSERT(optval); 5559 handled = B_TRUE; 5560 break; 5561 case SO_LINGER: 5562 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5563 error = EINVAL; 5564 eprintsoline(so, error); 5565 goto done2; 5566 } 5567 ASSERT(optval); 5568 handled = B_TRUE; 5569 break; 5570 } 5571 5572 #define intvalue (*(int32_t *)optval) 5573 5574 switch (option_name) { 5575 case SO_TYPE: 5576 case SO_ERROR: 5577 case SO_ACCEPTCONN: 5578 /* Can't be set */ 5579 error = ENOPROTOOPT; 5580 goto done2; 5581 case SO_LINGER: { 5582 struct linger *l = (struct linger *)optval; 5583 5584 so->so_linger.l_linger = l->l_linger; 5585 if (l->l_onoff) { 5586 so->so_linger.l_onoff = SO_LINGER; 5587 so->so_options |= SO_LINGER; 5588 } else { 5589 so->so_linger.l_onoff = 0; 5590 so->so_options &= ~SO_LINGER; 5591 } 5592 break; 5593 } 5594 5595 case SO_DEBUG: 5596 #ifdef SOCK_TEST 5597 if (intvalue & 2) 5598 sock_test_timelimit = 10 * hz; 5599 else 5600 sock_test_timelimit = 0; 5601 5602 if (intvalue & 4) 5603 do_useracc = 0; 5604 else 5605 do_useracc = 1; 5606 #endif /* SOCK_TEST */ 5607 /* FALLTHRU */ 5608 case SO_REUSEADDR: 5609 case SO_KEEPALIVE: 5610 case SO_DONTROUTE: 5611 case SO_BROADCAST: 5612 case SO_USELOOPBACK: 5613 case SO_OOBINLINE: 5614 case SO_DGRAM_ERRIND: 5615 if (intvalue != 0) { 5616 dprintso(so, 1, 5617 ("socket_setsockopt: setting 0x%x\n", 5618 option_name)); 5619 so->so_options |= option_name; 5620 } else { 5621 dprintso(so, 1, 5622 ("socket_setsockopt: clearing 0x%x\n", 5623 option_name)); 5624 so->so_options &= ~option_name; 5625 } 5626 break; 5627 /* 5628 * The following options are only returned by us when the 5629 * transport layer fails. 5630 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5631 * since the transport might adjust the value and not 5632 * return exactly what was set by the application. 5633 */ 5634 case SO_SNDBUF: 5635 so->so_sndbuf = intvalue; 5636 break; 5637 case SO_RCVBUF: 5638 so->so_rcvbuf = intvalue; 5639 break; 5640 case SO_RCVPSH: 5641 so->so_rcv_timer_interval = intvalue; 5642 break; 5643 #ifdef notyet 5644 /* 5645 * We do not implement the semantics of these options 5646 * thus we shouldn't implement the options either. 5647 */ 5648 case SO_SNDLOWAT: 5649 so->so_sndlowat = intvalue; 5650 break; 5651 case SO_RCVLOWAT: 5652 so->so_rcvlowat = intvalue; 5653 break; 5654 #endif /* notyet */ 5655 case SO_SNDTIMEO: 5656 case SO_RCVTIMEO: { 5657 struct timeval tl; 5658 clock_t val; 5659 5660 if (get_udatamodel() == DATAMODEL_NONE || 5661 get_udatamodel() == DATAMODEL_NATIVE) 5662 bcopy(&tl, (struct timeval *)optval, 5663 sizeof (struct timeval)); 5664 else 5665 TIMEVAL32_TO_TIMEVAL(&tl, 5666 (struct timeval32 *)optval); 5667 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5668 if (option_name == SO_RCVTIMEO) 5669 so->so_rcvtimeo = drv_usectohz(val); 5670 else 5671 so->so_sndtimeo = drv_usectohz(val); 5672 break; 5673 } 5674 } 5675 #undef intvalue 5676 5677 if (error) { 5678 if ((error == ENOPROTOOPT || error == EPROTO || 5679 error == EINVAL) && handled) { 5680 dprintso(so, 1, 5681 ("setsockopt: ignoring error %d for 0x%x\n", 5682 error, option_name)); 5683 error = 0; 5684 } 5685 } 5686 } 5687 done2: 5688 so_unlock_single(so, SOLOCKED); 5689 mutex_exit(&so->so_lock); 5690 return (error); 5691 } 5692 5693 /* 5694 * sotpi_close() is called when the last open reference goes away. 5695 */ 5696 /* ARGSUSED */ 5697 int 5698 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5699 { 5700 struct vnode *vp = SOTOV(so); 5701 dev_t dev; 5702 int error = 0; 5703 sotpi_info_t *sti = SOTOTPI(so); 5704 5705 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5706 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5707 5708 dev = sti->sti_dev; 5709 5710 ASSERT(STREAMSTAB(getmajor(dev))); 5711 5712 mutex_enter(&so->so_lock); 5713 so_lock_single(so); /* Set SOLOCKED */ 5714 5715 ASSERT(so_verify_oobstate(so)); 5716 5717 if (sti->sti_nl7c_flags & NL7C_ENABLED) { 5718 sti->sti_nl7c_flags = 0; 5719 nl7c_close(so); 5720 } 5721 5722 if (vp->v_stream != NULL) { 5723 vnode_t *ux_vp; 5724 5725 if (so->so_family == AF_UNIX) { 5726 /* Could avoid this when CANTSENDMORE for !dgram */ 5727 so_unix_close(so); 5728 } 5729 5730 mutex_exit(&so->so_lock); 5731 /* 5732 * Disassemble the linkage from the AF_UNIX underlying file 5733 * system vnode to this socket (by atomically clearing 5734 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5735 * and frees the stream head. 5736 */ 5737 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5738 ASSERT(ux_vp->v_stream); 5739 sti->sti_ux_bound_vp = NULL; 5740 vn_rele_stream(ux_vp); 5741 } 5742 error = strclose(vp, flag, cr); 5743 vp->v_stream = NULL; 5744 mutex_enter(&so->so_lock); 5745 } 5746 5747 /* 5748 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5749 */ 5750 so_flush_discon_ind(so); 5751 5752 so_unlock_single(so, SOLOCKED); 5753 mutex_exit(&so->so_lock); 5754 5755 /* 5756 * Needed for STREAMs. 5757 * Decrement the device driver's reference count for streams 5758 * opened via the clone dip. The driver was held in clone_open(). 5759 * The absence of clone_close() forces this asymmetry. 5760 */ 5761 if (so->so_flag & SOCLONE) 5762 ddi_rele_driver(getmajor(dev)); 5763 5764 return (error); 5765 } 5766 5767 static int 5768 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5769 struct cred *cr, int32_t *rvalp) 5770 { 5771 struct vnode *vp = SOTOV(so); 5772 sotpi_info_t *sti = SOTOTPI(so); 5773 int error = 0; 5774 5775 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5776 cmd, arg, pr_state(so->so_state, so->so_mode))); 5777 5778 switch (cmd) { 5779 case SIOCSQPTR: 5780 /* 5781 * SIOCSQPTR is valid only when helper stream is created 5782 * by the protocol. 5783 */ 5784 case _I_INSERT: 5785 case _I_REMOVE: 5786 /* 5787 * Since there's no compelling reason to support these ioctls 5788 * on sockets, and doing so would increase the complexity 5789 * markedly, prevent it. 5790 */ 5791 return (EOPNOTSUPP); 5792 5793 case I_FIND: 5794 case I_LIST: 5795 case I_LOOK: 5796 case I_POP: 5797 case I_PUSH: 5798 /* 5799 * To prevent races and inconsistencies between the actual 5800 * state of the stream and the state according to the sonode, 5801 * we serialize all operations which modify or operate on the 5802 * list of modules on the socket's stream. 5803 */ 5804 mutex_enter(&sti->sti_plumb_lock); 5805 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5806 mutex_exit(&sti->sti_plumb_lock); 5807 return (error); 5808 5809 default: 5810 if (so->so_version != SOV_STREAM) 5811 break; 5812 5813 /* 5814 * The imaginary "sockmod" has been popped; act as a stream. 5815 */ 5816 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5817 } 5818 5819 ASSERT(so->so_version != SOV_STREAM); 5820 5821 /* 5822 * Process socket-specific ioctls. 5823 */ 5824 switch (cmd) { 5825 case FIONBIO: { 5826 int32_t value; 5827 5828 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5829 (mode & (int)FKIOCTL))) 5830 return (EFAULT); 5831 5832 mutex_enter(&so->so_lock); 5833 if (value) { 5834 so->so_state |= SS_NDELAY; 5835 } else { 5836 so->so_state &= ~SS_NDELAY; 5837 } 5838 mutex_exit(&so->so_lock); 5839 return (0); 5840 } 5841 5842 case FIOASYNC: { 5843 int32_t value; 5844 5845 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5846 (mode & (int)FKIOCTL))) 5847 return (EFAULT); 5848 5849 mutex_enter(&so->so_lock); 5850 /* 5851 * SS_ASYNC flag not already set correctly? 5852 * (!value != !(so->so_state & SS_ASYNC)) 5853 * but some engineers find that too hard to read. 5854 */ 5855 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5856 value != 0 && (so->so_state & SS_ASYNC) == 0) 5857 error = so_flip_async(so, vp, mode, cr); 5858 mutex_exit(&so->so_lock); 5859 return (error); 5860 } 5861 5862 case SIOCSPGRP: 5863 case FIOSETOWN: { 5864 pid_t pgrp; 5865 5866 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5867 (mode & (int)FKIOCTL))) 5868 return (EFAULT); 5869 5870 mutex_enter(&so->so_lock); 5871 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5872 /* Any change? */ 5873 if (pgrp != so->so_pgrp) 5874 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5875 mutex_exit(&so->so_lock); 5876 return (error); 5877 } 5878 case SIOCGPGRP: 5879 case FIOGETOWN: 5880 if (so_copyout(&so->so_pgrp, (void *)arg, 5881 sizeof (pid_t), (mode & (int)FKIOCTL))) 5882 return (EFAULT); 5883 return (0); 5884 5885 case SIOCATMARK: { 5886 int retval; 5887 uint_t so_state; 5888 5889 /* 5890 * strwaitmark has a finite timeout after which it 5891 * returns -1 if the mark state is undetermined. 5892 * In order to avoid any race between the mark state 5893 * in sockfs and the mark state in the stream head this 5894 * routine loops until the mark state can be determined 5895 * (or the urgent data indication has been removed by some 5896 * other thread). 5897 */ 5898 do { 5899 mutex_enter(&so->so_lock); 5900 so_state = so->so_state; 5901 mutex_exit(&so->so_lock); 5902 if (so_state & SS_RCVATMARK) { 5903 retval = 1; 5904 } else if (!(so_state & SS_OOBPEND)) { 5905 /* 5906 * No SIGURG has been generated -- there is no 5907 * pending or present urgent data. Thus can't 5908 * possibly be at the mark. 5909 */ 5910 retval = 0; 5911 } else { 5912 /* 5913 * Have the stream head wait until there is 5914 * either some messages on the read queue, or 5915 * STRATMARK or STRNOTATMARK gets set. The 5916 * STRNOTATMARK flag is used so that the 5917 * transport can send up a MSGNOTMARKNEXT 5918 * M_DATA to indicate that it is not 5919 * at the mark and additional data is not about 5920 * to be send upstream. 5921 * 5922 * If the mark state is undetermined this will 5923 * return -1 and we will loop rechecking the 5924 * socket state. 5925 */ 5926 retval = strwaitmark(vp); 5927 } 5928 } while (retval == -1); 5929 5930 if (so_copyout(&retval, (void *)arg, sizeof (int), 5931 (mode & (int)FKIOCTL))) 5932 return (EFAULT); 5933 return (0); 5934 } 5935 5936 case I_FDINSERT: 5937 case I_SENDFD: 5938 case I_RECVFD: 5939 case I_ATMARK: 5940 case _SIOCSOCKFALLBACK: 5941 /* 5942 * These ioctls do not apply to sockets. I_FDINSERT can be 5943 * used to send M_PROTO messages without modifying the socket 5944 * state. I_SENDFD/RECVFD should not be used for socket file 5945 * descriptor passing since they assume a twisted stream. 5946 * SIOCATMARK must be used instead of I_ATMARK. 5947 * 5948 * _SIOCSOCKFALLBACK from an application should never be 5949 * processed. It is only generated by socktpi_open() or 5950 * in response to I_POP or I_PUSH. 5951 */ 5952 #ifdef DEBUG 5953 zcmn_err(getzoneid(), CE_WARN, 5954 "Unsupported STREAMS ioctl 0x%x on socket. " 5955 "Pid = %d\n", cmd, curproc->p_pid); 5956 #endif /* DEBUG */ 5957 return (EOPNOTSUPP); 5958 5959 case _I_GETPEERCRED: 5960 if ((mode & FKIOCTL) == 0) 5961 return (EINVAL); 5962 5963 mutex_enter(&so->so_lock); 5964 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 5965 error = ENOTSUP; 5966 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 5967 error = ENOTCONN; 5968 } else if (so->so_peercred != NULL) { 5969 k_peercred_t *kp = (k_peercred_t *)arg; 5970 kp->pc_cr = so->so_peercred; 5971 kp->pc_cpid = so->so_cpid; 5972 crhold(so->so_peercred); 5973 } else { 5974 error = EINVAL; 5975 } 5976 mutex_exit(&so->so_lock); 5977 return (error); 5978 5979 default: 5980 /* 5981 * Do the higher-order bits of the ioctl cmd indicate 5982 * that it is an I_* streams ioctl? 5983 */ 5984 if ((cmd & 0xffffff00U) == STR && 5985 so->so_version == SOV_SOCKBSD) { 5986 #ifdef DEBUG 5987 zcmn_err(getzoneid(), CE_WARN, 5988 "Unsupported STREAMS ioctl 0x%x on socket. " 5989 "Pid = %d\n", cmd, curproc->p_pid); 5990 #endif /* DEBUG */ 5991 return (EOPNOTSUPP); 5992 } 5993 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5994 } 5995 } 5996 5997 /* 5998 * Handle plumbing-related ioctls. 5999 */ 6000 static int 6001 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 6002 struct cred *cr, int32_t *rvalp) 6003 { 6004 static const char sockmod_name[] = "sockmod"; 6005 struct sonode *so = VTOSO(vp); 6006 char mname[FMNAMESZ + 1]; 6007 int error; 6008 sotpi_info_t *sti = SOTOTPI(so); 6009 6010 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 6011 6012 if (so->so_version == SOV_SOCKBSD) 6013 return (EOPNOTSUPP); 6014 6015 if (so->so_version == SOV_STREAM) { 6016 /* 6017 * The imaginary "sockmod" has been popped - act as a stream. 6018 * If this is a push of sockmod then change back to a socket. 6019 */ 6020 if (cmd == I_PUSH) { 6021 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6022 (void *)arg, mname, sizeof (mname), NULL); 6023 6024 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 6025 dprintso(so, 0, ("socktpi_ioctl: going to " 6026 "socket version\n")); 6027 so_stream2sock(so); 6028 return (0); 6029 } 6030 } 6031 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6032 } 6033 6034 switch (cmd) { 6035 case I_PUSH: 6036 if (sti->sti_direct) { 6037 mutex_enter(&so->so_lock); 6038 so_lock_single(so); 6039 mutex_exit(&so->so_lock); 6040 6041 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 6042 cr, rvalp); 6043 6044 mutex_enter(&so->so_lock); 6045 if (error == 0) 6046 sti->sti_direct = 0; 6047 so_unlock_single(so, SOLOCKED); 6048 mutex_exit(&so->so_lock); 6049 6050 if (error != 0) 6051 return (error); 6052 } 6053 6054 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6055 if (error == 0) 6056 sti->sti_pushcnt++; 6057 return (error); 6058 6059 case I_POP: 6060 if (sti->sti_pushcnt == 0) { 6061 /* Emulate sockmod being popped */ 6062 dprintso(so, 0, 6063 ("socktpi_ioctl: going to STREAMS version\n")); 6064 return (so_sock2stream(so)); 6065 } 6066 6067 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6068 if (error == 0) 6069 sti->sti_pushcnt--; 6070 return (error); 6071 6072 case I_LIST: { 6073 struct str_mlist *kmlistp, *umlistp; 6074 struct str_list kstrlist; 6075 ssize_t kstrlistsize; 6076 int i, nmods; 6077 6078 STRUCT_DECL(str_list, ustrlist); 6079 STRUCT_INIT(ustrlist, mode); 6080 6081 if (arg == NULL) { 6082 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6083 if (error == 0) 6084 (*rvalp)++; /* Add one for sockmod */ 6085 return (error); 6086 } 6087 6088 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6089 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6090 if (error != 0) 6091 return (error); 6092 6093 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6094 if (nmods <= 0) 6095 return (EINVAL); 6096 /* 6097 * Ceiling nmods at nstrpush to prevent someone from 6098 * maliciously consuming lots of kernel memory. 6099 */ 6100 nmods = MIN(nmods, nstrpush); 6101 6102 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6103 kstrlist.sl_nmods = nmods; 6104 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6105 6106 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6107 cr, rvalp); 6108 if (error != 0) 6109 goto done; 6110 6111 /* 6112 * Considering the module list as a 0-based array of sl_nmods 6113 * modules, sockmod should conceptually exist at slot 6114 * sti_pushcnt. Insert sockmod at this location by sliding all 6115 * of the module names after so_pushcnt over by one. We know 6116 * that there will be room to do this since we allocated 6117 * sl_modlist with an additional slot. 6118 */ 6119 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6120 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6121 6122 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6123 kstrlist.sl_nmods++; 6124 6125 /* 6126 * Copy all of the entries out to ustrlist. 6127 */ 6128 kmlistp = kstrlist.sl_modlist; 6129 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6130 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6131 error = so_copyout(kmlistp++, umlistp++, 6132 sizeof (struct str_mlist), mode & FKIOCTL); 6133 if (error != 0) 6134 goto done; 6135 } 6136 6137 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6138 mode & FKIOCTL); 6139 if (error == 0) 6140 *rvalp = 0; 6141 done: 6142 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6143 return (error); 6144 } 6145 case I_LOOK: 6146 if (sti->sti_pushcnt == 0) { 6147 return (so_copyout(sockmod_name, (void *)arg, 6148 sizeof (sockmod_name), mode & FKIOCTL)); 6149 } 6150 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6151 6152 case I_FIND: 6153 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6154 if (error && error != EINVAL) 6155 return (error); 6156 6157 /* if not found and string was sockmod return 1 */ 6158 if (*rvalp == 0 || error == EINVAL) { 6159 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6160 (void *)arg, mname, sizeof (mname), NULL); 6161 if (error == ENAMETOOLONG) 6162 error = EINVAL; 6163 6164 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6165 *rvalp = 1; 6166 } 6167 return (error); 6168 6169 default: 6170 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6171 break; 6172 } 6173 6174 return (0); 6175 } 6176 6177 /* 6178 * Wrapper around the streams poll routine that implements socket poll 6179 * semantics. 6180 * The sockfs never calls pollwakeup itself - the stream head take care 6181 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6182 * stream head there can never be a deadlock due to holding so_lock across 6183 * pollwakeup and acquiring so_lock in this routine. 6184 * 6185 * However, since the performance of VOP_POLL is critical we avoid 6186 * acquiring so_lock here. This is based on two assumptions: 6187 * - The poll implementation holds locks to serialize the VOP_POLL call 6188 * and a pollwakeup for the same pollhead. This ensures that should 6189 * e.g. so_state change during a socktpi_poll call the pollwakeup 6190 * (which strsock_* and strrput conspire to issue) is issued after 6191 * the state change. Thus the pollwakeup will block until VOP_POLL has 6192 * returned and then wake up poll and have it call VOP_POLL again. 6193 * - The reading of so_state without holding so_lock does not result in 6194 * stale data that is older than the latest state change that has dropped 6195 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6196 * memory barrier to force the data into the coherency domain. 6197 */ 6198 static int 6199 sotpi_poll( 6200 struct sonode *so, 6201 short events, 6202 int anyyet, 6203 short *reventsp, 6204 struct pollhead **phpp) 6205 { 6206 short origevents = events; 6207 struct vnode *vp = SOTOV(so); 6208 int error; 6209 int so_state = so->so_state; /* snapshot */ 6210 sotpi_info_t *sti = SOTOTPI(so); 6211 6212 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6213 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6214 6215 ASSERT(vp->v_type == VSOCK); 6216 ASSERT(vp->v_stream != NULL); 6217 6218 if (so->so_version == SOV_STREAM) { 6219 /* The imaginary "sockmod" has been popped - act as a stream */ 6220 return (strpoll(vp->v_stream, events, anyyet, 6221 reventsp, phpp)); 6222 } 6223 6224 if (!(so_state & SS_ISCONNECTED) && 6225 (so->so_mode & SM_CONNREQUIRED)) { 6226 /* Not connected yet - turn off write side events */ 6227 events &= ~(POLLOUT|POLLWRBAND); 6228 } 6229 /* 6230 * Check for errors without calling strpoll if the caller wants them. 6231 * In sockets the errors are represented as input/output events 6232 * and there is no need to ask the stream head for this information. 6233 */ 6234 if (so->so_error != 0 && 6235 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6236 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6237 return (0); 6238 } 6239 /* 6240 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6241 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6242 * will not trigger a POLLIN event with POLLRDDATA set. 6243 * The handling of urgent data (causing POLLRDBAND) is done by 6244 * inspecting SS_OOBPEND below. 6245 */ 6246 events |= POLLRDDATA; 6247 6248 /* 6249 * After shutdown(output) a stream head write error is set. 6250 * However, we should not return output events. 6251 */ 6252 events |= POLLNOERR; 6253 error = strpoll(vp->v_stream, events, anyyet, 6254 reventsp, phpp); 6255 if (error) 6256 return (error); 6257 6258 ASSERT(!(*reventsp & POLLERR)); 6259 6260 /* 6261 * Notes on T_CONN_IND handling for sockets. 6262 * 6263 * If strpoll() returned without events, SR_POLLIN is guaranteed 6264 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6265 * 6266 * Since the so_lock is not held, soqueueconnind() may have run 6267 * and a T_CONN_IND may be waiting. We now check for any queued 6268 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6269 * to ensure poll returns. 6270 * 6271 * However: 6272 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6273 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6274 * the following actions will occur; taken together they ensure the 6275 * syscall will return. 6276 * 6277 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6278 * the accept() was run on a non-blocking socket sowaitconnind() 6279 * may have already returned EWOULDBLOCK, so not be waiting to 6280 * process the message. Additionally socktpi_poll() has probably 6281 * proceeded past the sti_conn_ind_head check below. 6282 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6283 * this thread, however that could occur before poll_common() 6284 * has entered cv_wait. 6285 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6286 * 6287 * Before proceeding to cv_wait() in poll_common() for an event, 6288 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6289 * and if set, re-calls strpoll() to ensure the late arriving 6290 * T_CONN_IND is recognized, and pollsys() returns. 6291 */ 6292 6293 if (sti->sti_conn_ind_head != NULL) 6294 *reventsp |= (POLLIN|POLLRDNORM) & events; 6295 6296 if (so->so_state & SS_CANTRCVMORE) { 6297 *reventsp |= POLLRDHUP & events; 6298 6299 if (so->so_state & SS_CANTSENDMORE) 6300 *reventsp |= POLLHUP; 6301 } 6302 6303 if (so->so_state & SS_OOBPEND) 6304 *reventsp |= POLLRDBAND & events; 6305 6306 if (sti->sti_nl7c_rcv_mp != NULL) { 6307 *reventsp |= (POLLIN|POLLRDNORM) & events; 6308 } 6309 if ((sti->sti_nl7c_flags & NL7C_ENABLED) && 6310 ((POLLIN|POLLRDNORM) & *reventsp)) { 6311 sti->sti_nl7c_flags |= NL7C_POLLIN; 6312 } 6313 6314 return (0); 6315 } 6316 6317 /*ARGSUSED*/ 6318 static int 6319 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6320 { 6321 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6322 int error = 0; 6323 6324 error = sonode_constructor(buf, cdrarg, kmflags); 6325 if (error != 0) 6326 return (error); 6327 6328 error = i_sotpi_info_constructor(&st->st_info); 6329 if (error != 0) 6330 sonode_destructor(buf, cdrarg); 6331 6332 st->st_sonode.so_priv = &st->st_info; 6333 6334 return (error); 6335 } 6336 6337 /*ARGSUSED1*/ 6338 static void 6339 socktpi_destructor(void *buf, void *cdrarg) 6340 { 6341 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6342 6343 ASSERT(st->st_sonode.so_priv == &st->st_info); 6344 st->st_sonode.so_priv = NULL; 6345 6346 i_sotpi_info_destructor(&st->st_info); 6347 sonode_destructor(buf, cdrarg); 6348 } 6349 6350 static int 6351 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6352 { 6353 int retval; 6354 6355 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6356 struct sonode *so = (struct sonode *)buf; 6357 sotpi_info_t *sti = SOTOTPI(so); 6358 6359 mutex_enter(&socklist.sl_lock); 6360 6361 sti->sti_next_so = socklist.sl_list; 6362 sti->sti_prev_so = NULL; 6363 if (sti->sti_next_so != NULL) 6364 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6365 socklist.sl_list = so; 6366 6367 mutex_exit(&socklist.sl_lock); 6368 6369 } 6370 return (retval); 6371 } 6372 6373 static void 6374 socktpi_unix_destructor(void *buf, void *cdrarg) 6375 { 6376 struct sonode *so = (struct sonode *)buf; 6377 sotpi_info_t *sti = SOTOTPI(so); 6378 6379 mutex_enter(&socklist.sl_lock); 6380 6381 if (sti->sti_next_so != NULL) 6382 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6383 if (sti->sti_prev_so != NULL) 6384 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6385 else 6386 socklist.sl_list = sti->sti_next_so; 6387 6388 mutex_exit(&socklist.sl_lock); 6389 6390 socktpi_destructor(buf, cdrarg); 6391 } 6392 6393 int 6394 socktpi_init(void) 6395 { 6396 /* 6397 * Create sonode caches. We create a special one for AF_UNIX so 6398 * that we can track them for netstat(1m). 6399 */ 6400 socktpi_cache = kmem_cache_create("socktpi_cache", 6401 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6402 socktpi_destructor, NULL, NULL, NULL, 0); 6403 6404 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6405 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6406 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6407 6408 return (0); 6409 } 6410 6411 /* 6412 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6413 * 6414 * Caller must still update state and mode using sotpi_update_state(). 6415 */ 6416 int 6417 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6418 boolean_t *direct, queue_t **qp, struct cred *cr) 6419 { 6420 sotpi_info_t *sti; 6421 struct sockparams *origsp = so->so_sockparams; 6422 sock_lower_handle_t handle = so->so_proto_handle; 6423 struct stdata *stp; 6424 struct vnode *vp; 6425 queue_t *q; 6426 int error = 0; 6427 6428 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6429 SS_FALLBACK_PENDING); 6430 ASSERT(SOCK_IS_NONSTR(so)); 6431 6432 *qp = NULL; 6433 *direct = B_FALSE; 6434 so->so_sockparams = newsp; 6435 /* 6436 * Allocate and initalize fields required by TPI. 6437 */ 6438 (void) sotpi_info_create(so, KM_SLEEP); 6439 sotpi_info_init(so); 6440 6441 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6442 sotpi_info_fini(so); 6443 sotpi_info_destroy(so); 6444 return (error); 6445 } 6446 ASSERT(handle == so->so_proto_handle); 6447 sti = SOTOTPI(so); 6448 if (sti->sti_direct != 0) 6449 *direct = B_TRUE; 6450 6451 /* 6452 * Keep the original sp around so we can properly dispose of the 6453 * sonode when the socket is being closed. 6454 */ 6455 sti->sti_orig_sp = origsp; 6456 6457 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6458 so_alloc_addr(so, so->so_max_addr_len); 6459 6460 /* 6461 * If the application has done a SIOCSPGRP, make sure the 6462 * STREAM head is aware. This needs to take place before 6463 * the protocol start sending up messages. Otherwise we 6464 * might miss to generate SIGPOLL. 6465 * 6466 * It is possible that the application will receive duplicate 6467 * signals if some were already generated for either data or 6468 * connection indications. 6469 */ 6470 if (so->so_pgrp != 0) { 6471 if (so_set_events(so, so->so_vnode, cr) != 0) 6472 so->so_pgrp = 0; 6473 } 6474 6475 /* 6476 * Determine which queue to use. 6477 */ 6478 vp = SOTOV(so); 6479 stp = vp->v_stream; 6480 ASSERT(stp != NULL); 6481 q = stp->sd_wrq->q_next; 6482 6483 /* 6484 * Skip any modules that may have been auto pushed when the device 6485 * was opened 6486 */ 6487 while (q->q_next != NULL) 6488 q = q->q_next; 6489 *qp = _RD(q); 6490 6491 /* This is now a STREAMS sockets */ 6492 so->so_not_str = B_FALSE; 6493 6494 return (error); 6495 } 6496 6497 /* 6498 * Revert a TPI sonode. It is only allowed to revert the sonode during 6499 * the fallback process. 6500 */ 6501 void 6502 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6503 { 6504 vnode_t *vp = SOTOV(so); 6505 6506 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6507 SS_FALLBACK_PENDING); 6508 ASSERT(!SOCK_IS_NONSTR(so)); 6509 ASSERT(vp->v_stream != NULL); 6510 6511 strclean(vp); 6512 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6513 6514 /* 6515 * Restore the original sockparams. The caller is responsible for 6516 * dropping the ref to the new sp. 6517 */ 6518 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6519 6520 sotpi_info_fini(so); 6521 sotpi_info_destroy(so); 6522 6523 /* This is no longer a STREAMS sockets */ 6524 so->so_not_str = B_TRUE; 6525 } 6526 6527 void 6528 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6529 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6530 socklen_t faddrlen, short opts) 6531 { 6532 sotpi_info_t *sti = SOTOTPI(so); 6533 6534 so_proc_tcapability_ack(so, tcap); 6535 6536 so->so_options |= opts; 6537 6538 /* 6539 * Determine whether the foreign and local address are valid 6540 */ 6541 if (laddrlen != 0) { 6542 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6543 sti->sti_laddr_len = laddrlen; 6544 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6545 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6546 } 6547 6548 if (faddrlen != 0) { 6549 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6550 sti->sti_faddr_len = faddrlen; 6551 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6552 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6553 } 6554 6555 } 6556 6557 /* 6558 * Allocate enough space to cache the local and foreign addresses. 6559 */ 6560 void 6561 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6562 { 6563 sotpi_info_t *sti = SOTOTPI(so); 6564 6565 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6566 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6567 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6568 P2ROUNDUP(maxlen, KMEM_ALIGN); 6569 so->so_max_addr_len = sti->sti_laddr_maxlen; 6570 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6571 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6572 + sti->sti_laddr_maxlen); 6573 6574 if (so->so_family == AF_UNIX) { 6575 /* 6576 * Initialize AF_UNIX related fields. 6577 */ 6578 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6579 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6580 } 6581 } 6582 6583 6584 sotpi_info_t * 6585 sotpi_sototpi(struct sonode *so) 6586 { 6587 sotpi_info_t *sti; 6588 6589 ASSERT(so != NULL); 6590 6591 sti = (sotpi_info_t *)so->so_priv; 6592 6593 ASSERT(sti != NULL); 6594 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6595 6596 return (sti); 6597 } 6598 6599 static int 6600 i_sotpi_info_constructor(sotpi_info_t *sti) 6601 { 6602 sti->sti_magic = SOTPI_INFO_MAGIC; 6603 sti->sti_ack_mp = NULL; 6604 sti->sti_discon_ind_mp = NULL; 6605 sti->sti_ux_bound_vp = NULL; 6606 sti->sti_unbind_mp = NULL; 6607 6608 sti->sti_conn_ind_head = NULL; 6609 sti->sti_conn_ind_tail = NULL; 6610 6611 sti->sti_laddr_sa = NULL; 6612 sti->sti_faddr_sa = NULL; 6613 6614 sti->sti_nl7c_flags = 0; 6615 sti->sti_nl7c_uri = NULL; 6616 sti->sti_nl7c_rcv_mp = NULL; 6617 6618 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6619 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6620 6621 return (0); 6622 } 6623 6624 static void 6625 i_sotpi_info_destructor(sotpi_info_t *sti) 6626 { 6627 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6628 ASSERT(sti->sti_ack_mp == NULL); 6629 ASSERT(sti->sti_discon_ind_mp == NULL); 6630 ASSERT(sti->sti_ux_bound_vp == NULL); 6631 ASSERT(sti->sti_unbind_mp == NULL); 6632 6633 ASSERT(sti->sti_conn_ind_head == NULL); 6634 ASSERT(sti->sti_conn_ind_tail == NULL); 6635 6636 ASSERT(sti->sti_laddr_sa == NULL); 6637 ASSERT(sti->sti_faddr_sa == NULL); 6638 6639 ASSERT(sti->sti_nl7c_flags == 0); 6640 ASSERT(sti->sti_nl7c_uri == NULL); 6641 ASSERT(sti->sti_nl7c_rcv_mp == NULL); 6642 6643 mutex_destroy(&sti->sti_plumb_lock); 6644 cv_destroy(&sti->sti_ack_cv); 6645 } 6646 6647 /* 6648 * Creates and attaches TPI information to the given sonode 6649 */ 6650 static boolean_t 6651 sotpi_info_create(struct sonode *so, int kmflags) 6652 { 6653 sotpi_info_t *sti; 6654 6655 ASSERT(so->so_priv == NULL); 6656 6657 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6658 return (B_FALSE); 6659 6660 if (i_sotpi_info_constructor(sti) != 0) { 6661 kmem_free(sti, sizeof (*sti)); 6662 return (B_FALSE); 6663 } 6664 6665 so->so_priv = (void *)sti; 6666 return (B_TRUE); 6667 } 6668 6669 /* 6670 * Initializes the TPI information. 6671 */ 6672 static void 6673 sotpi_info_init(struct sonode *so) 6674 { 6675 struct vnode *vp = SOTOV(so); 6676 sotpi_info_t *sti = SOTOTPI(so); 6677 time_t now; 6678 6679 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6680 vp->v_rdev = sti->sti_dev; 6681 6682 sti->sti_orig_sp = NULL; 6683 6684 sti->sti_pushcnt = 0; 6685 6686 now = gethrestime_sec(); 6687 sti->sti_atime = now; 6688 sti->sti_mtime = now; 6689 sti->sti_ctime = now; 6690 6691 sti->sti_eaddr_mp = NULL; 6692 sti->sti_delayed_error = 0; 6693 6694 sti->sti_provinfo = NULL; 6695 6696 sti->sti_oobcnt = 0; 6697 sti->sti_oobsigcnt = 0; 6698 6699 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6700 6701 sti->sti_laddr_sa = 0; 6702 sti->sti_faddr_sa = 0; 6703 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6704 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6705 6706 sti->sti_laddr_valid = 0; 6707 sti->sti_faddr_valid = 0; 6708 sti->sti_faddr_noxlate = 0; 6709 6710 sti->sti_direct = 0; 6711 6712 ASSERT(sti->sti_ack_mp == NULL); 6713 ASSERT(sti->sti_ux_bound_vp == NULL); 6714 ASSERT(sti->sti_unbind_mp == NULL); 6715 6716 ASSERT(sti->sti_conn_ind_head == NULL); 6717 ASSERT(sti->sti_conn_ind_tail == NULL); 6718 } 6719 6720 /* 6721 * Given a sonode, grab the TPI info and free any data. 6722 */ 6723 static void 6724 sotpi_info_fini(struct sonode *so) 6725 { 6726 sotpi_info_t *sti = SOTOTPI(so); 6727 mblk_t *mp; 6728 6729 ASSERT(sti->sti_discon_ind_mp == NULL); 6730 6731 if ((mp = sti->sti_conn_ind_head) != NULL) { 6732 mblk_t *mp1; 6733 6734 while (mp) { 6735 mp1 = mp->b_next; 6736 mp->b_next = NULL; 6737 freemsg(mp); 6738 mp = mp1; 6739 } 6740 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6741 } 6742 6743 /* 6744 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6745 * indirect them. It also uses so_count as a validity test. 6746 */ 6747 mutex_enter(&so->so_lock); 6748 6749 if (sti->sti_laddr_sa) { 6750 ASSERT((caddr_t)sti->sti_faddr_sa == 6751 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6752 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6753 sti->sti_laddr_valid = 0; 6754 sti->sti_faddr_valid = 0; 6755 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6756 sti->sti_laddr_sa = NULL; 6757 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6758 sti->sti_faddr_sa = NULL; 6759 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6760 } 6761 6762 mutex_exit(&so->so_lock); 6763 6764 if ((mp = sti->sti_eaddr_mp) != NULL) { 6765 freemsg(mp); 6766 sti->sti_eaddr_mp = NULL; 6767 sti->sti_delayed_error = 0; 6768 } 6769 6770 if ((mp = sti->sti_ack_mp) != NULL) { 6771 freemsg(mp); 6772 sti->sti_ack_mp = NULL; 6773 } 6774 6775 if ((mp = sti->sti_nl7c_rcv_mp) != NULL) { 6776 sti->sti_nl7c_rcv_mp = NULL; 6777 freemsg(mp); 6778 } 6779 sti->sti_nl7c_rcv_rval = 0; 6780 if (sti->sti_nl7c_uri != NULL) { 6781 nl7c_urifree(so); 6782 /* urifree() cleared nl7c_uri */ 6783 } 6784 if (sti->sti_nl7c_flags) { 6785 sti->sti_nl7c_flags = 0; 6786 } 6787 6788 ASSERT(sti->sti_ux_bound_vp == NULL); 6789 if ((mp = sti->sti_unbind_mp) != NULL) { 6790 freemsg(mp); 6791 sti->sti_unbind_mp = NULL; 6792 } 6793 } 6794 6795 /* 6796 * Destroys the TPI information attached to a sonode. 6797 */ 6798 static void 6799 sotpi_info_destroy(struct sonode *so) 6800 { 6801 sotpi_info_t *sti = SOTOTPI(so); 6802 6803 i_sotpi_info_destructor(sti); 6804 kmem_free(sti, sizeof (*sti)); 6805 6806 so->so_priv = NULL; 6807 } 6808 6809 /* 6810 * Create the global sotpi socket module entry. It will never be freed. 6811 */ 6812 smod_info_t * 6813 sotpi_smod_create(void) 6814 { 6815 smod_info_t *smodp; 6816 6817 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6818 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6819 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6820 /* 6821 * Initialize the smod_refcnt to 1 so it will never be freed. 6822 */ 6823 smodp->smod_refcnt = 1; 6824 smodp->smod_uc_version = SOCK_UC_VERSION; 6825 smodp->smod_dc_version = SOCK_DC_VERSION; 6826 smodp->smod_sock_create_func = &sotpi_create; 6827 smodp->smod_sock_destroy_func = &sotpi_destroy; 6828 return (smodp); 6829 } 6830