1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <sys/sodirect.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <inet/kssl/ksslapi.h> 85 86 /* 87 * Possible failures when memory can't be allocated. The documented behavior: 88 * 89 * 5.5: 4.X: XNET: 90 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 91 * EINTR 92 * (4.X does not document EINTR but returns it) 93 * bind: ENOSR - ENOBUFS/ENOSR 94 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 95 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 96 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 97 * (4.X getpeername and getsockname do not fail in practice) 98 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 99 * listen: - - ENOBUFS 100 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 101 * EINTR 102 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 105 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 106 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 107 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 108 * 109 * Resolution. When allocation fails: 110 * recv: return EINTR 111 * send: return EINTR 112 * connect, accept: EINTR 113 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 114 * socket, socketpair: ENOBUFS 115 * getpeername, getsockname: sleep 116 * getsockopt, setsockopt: sleep 117 */ 118 119 #ifdef SOCK_TEST 120 /* 121 * Variables that make sockfs do something other than the standard TPI 122 * for the AF_INET transports. 123 * 124 * solisten_tpi_tcp: 125 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 126 * the transport is already bound. This is needed to avoid loosing the 127 * port number should listen() do a T_UNBIND_REQ followed by a 128 * O_T_BIND_REQ. 129 * 130 * soconnect_tpi_udp: 131 * UDP and ICMP can handle a T_CONN_REQ. 132 * This is needed to make the sequence of connect(), getsockname() 133 * return the local IP address used to send packets to the connected to 134 * destination. 135 * 136 * soconnect_tpi_tcp: 137 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 138 * Set this to non-zero to send TPI conformant messages to TCP in this 139 * respect. This is a performance optimization. 140 * 141 * soaccept_tpi_tcp: 142 * TCP can handle a T_CONN_REQ without the acceptor being bound. 143 * This is a performance optimization that has been picked up in XTI. 144 * 145 * soaccept_tpi_multioptions: 146 * When inheriting SOL_SOCKET options from the listener to the accepting 147 * socket send them as a single message for AF_INET{,6}. 148 */ 149 int solisten_tpi_tcp = 0; 150 int soconnect_tpi_udp = 0; 151 int soconnect_tpi_tcp = 0; 152 int soaccept_tpi_tcp = 0; 153 int soaccept_tpi_multioptions = 1; 154 #else /* SOCK_TEST */ 155 #define soconnect_tpi_tcp 0 156 #define soconnect_tpi_udp 0 157 #define solisten_tpi_tcp 0 158 #define soaccept_tpi_tcp 0 159 #define soaccept_tpi_multioptions 1 160 #endif /* SOCK_TEST */ 161 162 #ifdef SOCK_TEST 163 extern int do_useracc; 164 extern clock_t sock_test_timelimit; 165 #endif /* SOCK_TEST */ 166 167 /* 168 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 169 * applications working. Turn on this flag to disable these checks. 170 */ 171 int xnet_skip_checks = 0; 172 int xnet_check_print = 0; 173 int xnet_truncate_print = 0; 174 175 extern void sigintr(k_sigset_t *, int); 176 extern void sigunintr(k_sigset_t *); 177 178 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 179 extern void *nl7c_add_addr(void *, t_uscalar_t); 180 extern void nl7c_listener_addr(void *, struct sonode *); 181 182 /* Sockets acting as an in-kernel SSL proxy */ 183 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 184 strsigset_t *, strsigset_t *, strpollset_t *); 185 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 186 strsigset_t *, strsigset_t *, strpollset_t *); 187 188 static int sotpi_unbind(struct sonode *, int); 189 190 extern int sodput(sodirect_t *, mblk_t *); 191 extern void sodwakeup(sodirect_t *); 192 193 /* TPI sockfs sonode operations */ 194 static int sotpi_accept(struct sonode *, int, struct sonode **); 195 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 196 int); 197 static int sotpi_connect(struct sonode *, const struct sockaddr *, 198 socklen_t, int, int); 199 static int sotpi_listen(struct sonode *, int); 200 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 201 struct uio *); 202 static int sotpi_shutdown(struct sonode *, int); 203 static int sotpi_getsockname(struct sonode *); 204 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 205 struct uio *, void *, t_uscalar_t, int); 206 static int sodgram_direct(struct sonode *, struct sockaddr *, 207 socklen_t, struct uio *, int); 208 209 sonodeops_t sotpi_sonodeops = { 210 sotpi_accept, /* sop_accept */ 211 sotpi_bind, /* sop_bind */ 212 sotpi_listen, /* sop_listen */ 213 sotpi_connect, /* sop_connect */ 214 sotpi_recvmsg, /* sop_recvmsg */ 215 sotpi_sendmsg, /* sop_sendmsg */ 216 sotpi_getpeername, /* sop_getpeername */ 217 sotpi_getsockname, /* sop_getsockname */ 218 sotpi_shutdown, /* sop_shutdown */ 219 sotpi_getsockopt, /* sop_getsockopt */ 220 sotpi_setsockopt /* sop_setsockopt */ 221 }; 222 223 /* 224 * Common create code for socket and accept. If tso is set the values 225 * from that node is used instead of issuing a T_INFO_REQ. 226 * 227 * Assumes that the caller has a VN_HOLD on accessvp. 228 * The VN_RELE will occur either when sotpi_create() fails or when 229 * the returned sonode is freed. 230 */ 231 struct sonode * 232 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 233 struct sonode *tso, int *errorp) 234 { 235 struct sonode *so; 236 vnode_t *vp; 237 int flags, error; 238 239 ASSERT(accessvp != NULL); 240 vp = makesockvp(accessvp, domain, type, protocol); 241 ASSERT(vp != NULL); 242 so = VTOSO(vp); 243 244 flags = FREAD|FWRITE; 245 246 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 247 (domain == AF_INET || domain == AF_INET6) && 248 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 249 protocol == IPPROTO_IP)) { 250 /* Tell tcp or udp that it's talking to sockets */ 251 flags |= SO_SOCKSTR; 252 253 /* 254 * Here we indicate to socktpi_open() our attempt to 255 * make direct calls between sockfs and transport. 256 * The final decision is left to socktpi_open(). 257 */ 258 so->so_state |= SS_DIRECT; 259 260 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 261 if (so->so_type == SOCK_STREAM && tso != NULL) { 262 if (tso->so_state & SS_DIRECT) { 263 /* 264 * Inherit SS_DIRECT from listener and pass 265 * SO_ACCEPTOR open flag to tcp, indicating 266 * that this is an accept fast-path instance. 267 */ 268 flags |= SO_ACCEPTOR; 269 } else { 270 /* 271 * SS_DIRECT is not set on listener, meaning 272 * that the listener has been converted from 273 * a socket to a stream. Ensure that the 274 * acceptor inherits these settings. 275 */ 276 so->so_state &= ~SS_DIRECT; 277 flags &= ~SO_SOCKSTR; 278 } 279 } 280 } 281 282 /* 283 * Tell local transport that it is talking to sockets. 284 */ 285 if (so->so_family == AF_UNIX) { 286 flags |= SO_SOCKSTR; 287 } 288 289 /* Initialize the kernel SSL proxy fields */ 290 so->so_kssl_type = KSSL_NO_PROXY; 291 so->so_kssl_ent = NULL; 292 so->so_kssl_ctx = NULL; 293 294 if (error = socktpi_open(&vp, flags, CRED(), NULL)) { 295 VN_RELE(vp); 296 *errorp = error; 297 return (NULL); 298 } 299 300 if (error = so_strinit(so, tso)) { 301 (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL); 302 VN_RELE(vp); 303 *errorp = error; 304 return (NULL); 305 } 306 307 if (version == SOV_DEFAULT) 308 version = so_default_version; 309 310 so->so_version = (short)version; 311 312 return (so); 313 } 314 315 /* 316 * Bind the socket to an unspecified address in sockfs only. 317 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 318 * required in all cases. 319 */ 320 static void 321 so_automatic_bind(struct sonode *so) 322 { 323 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 324 325 ASSERT(MUTEX_HELD(&so->so_lock)); 326 ASSERT(!(so->so_state & SS_ISBOUND)); 327 ASSERT(so->so_unbind_mp); 328 329 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 330 bzero(so->so_laddr_sa, so->so_laddr_len); 331 so->so_laddr_sa->sa_family = so->so_family; 332 so->so_state |= SS_ISBOUND; 333 } 334 335 336 /* 337 * bind the socket. 338 * 339 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 340 * are passed in we allow rebinding. Note that for backwards compatibility 341 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 342 * Thus the rebinding code is currently not executed. 343 * 344 * The constraints for rebinding are: 345 * - it is a SOCK_DGRAM, or 346 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 347 * and no listen() has been done. 348 * This rebinding code was added based on some language in the XNET book 349 * about not returning EINVAL it the protocol allows rebinding. However, 350 * this language is not present in the Posix socket draft. Thus maybe the 351 * rebinding logic should be deleted from the source. 352 * 353 * A null "name" can be used to unbind the socket if: 354 * - it is a SOCK_DGRAM, or 355 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 356 * and no listen() has been done. 357 */ 358 static int 359 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 360 socklen_t namelen, int backlog, int flags) 361 { 362 struct T_bind_req bind_req; 363 struct T_bind_ack *bind_ack; 364 int error = 0; 365 mblk_t *mp; 366 void *addr; 367 t_uscalar_t addrlen; 368 int unbind_on_err = 1; 369 boolean_t clear_acceptconn_on_err = B_FALSE; 370 boolean_t restore_backlog_on_err = B_FALSE; 371 int save_so_backlog; 372 t_scalar_t PRIM_type = O_T_BIND_REQ; 373 boolean_t tcp_udp_xport; 374 void *nl7c = NULL; 375 376 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 377 (void *)so, (void *)name, namelen, backlog, flags, 378 pr_state(so->so_state, so->so_mode))); 379 380 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 381 382 if (!(flags & _SOBIND_LOCK_HELD)) { 383 mutex_enter(&so->so_lock); 384 so_lock_single(so); /* Set SOLOCKED */ 385 } else { 386 ASSERT(MUTEX_HELD(&so->so_lock)); 387 ASSERT(so->so_flag & SOLOCKED); 388 } 389 390 /* 391 * Make sure that there is a preallocated unbind_req message 392 * before binding. This message allocated when the socket is 393 * created but it might be have been consumed. 394 */ 395 if (so->so_unbind_mp == NULL) { 396 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 397 /* NOTE: holding so_lock while sleeping */ 398 so->so_unbind_mp = 399 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 400 } 401 402 if (flags & _SOBIND_REBIND) { 403 /* 404 * Called from solisten after doing an sotpi_unbind() or 405 * potentially without the unbind (latter for AF_INET{,6}). 406 */ 407 ASSERT(name == NULL && namelen == 0); 408 409 if (so->so_family == AF_UNIX) { 410 ASSERT(so->so_ux_bound_vp); 411 addr = &so->so_ux_laddr; 412 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 413 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 414 "addr 0x%p, vp %p\n", 415 addrlen, 416 (void *)((struct so_ux_addr *)addr)->soua_vp, 417 (void *)so->so_ux_bound_vp)); 418 } else { 419 addr = so->so_laddr_sa; 420 addrlen = (t_uscalar_t)so->so_laddr_len; 421 } 422 } else if (flags & _SOBIND_UNSPEC) { 423 ASSERT(name == NULL && namelen == 0); 424 425 /* 426 * The caller checked SS_ISBOUND but not necessarily 427 * under so_lock 428 */ 429 if (so->so_state & SS_ISBOUND) { 430 /* No error */ 431 goto done; 432 } 433 434 /* Set an initial local address */ 435 switch (so->so_family) { 436 case AF_UNIX: 437 /* 438 * Use an address with same size as struct sockaddr 439 * just like BSD. 440 */ 441 so->so_laddr_len = 442 (socklen_t)sizeof (struct sockaddr); 443 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 444 bzero(so->so_laddr_sa, so->so_laddr_len); 445 so->so_laddr_sa->sa_family = so->so_family; 446 447 /* 448 * Pass down an address with the implicit bind 449 * magic number and the rest all zeros. 450 * The transport will return a unique address. 451 */ 452 so->so_ux_laddr.soua_vp = NULL; 453 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 454 addr = &so->so_ux_laddr; 455 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 456 break; 457 458 case AF_INET: 459 case AF_INET6: 460 /* 461 * An unspecified bind in TPI has a NULL address. 462 * Set the address in sockfs to have the sa_family. 463 */ 464 so->so_laddr_len = (so->so_family == AF_INET) ? 465 (socklen_t)sizeof (sin_t) : 466 (socklen_t)sizeof (sin6_t); 467 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 468 bzero(so->so_laddr_sa, so->so_laddr_len); 469 so->so_laddr_sa->sa_family = so->so_family; 470 addr = NULL; 471 addrlen = 0; 472 break; 473 474 default: 475 /* 476 * An unspecified bind in TPI has a NULL address. 477 * Set the address in sockfs to be zero length. 478 * 479 * Can not assume there is a sa_family for all 480 * protocol families. For example, AF_X25 does not 481 * have a family field. 482 */ 483 bzero(so->so_laddr_sa, so->so_laddr_len); 484 so->so_laddr_len = 0; /* XXX correct? */ 485 addr = NULL; 486 addrlen = 0; 487 break; 488 } 489 490 } else { 491 if (so->so_state & SS_ISBOUND) { 492 /* 493 * If it is ok to rebind the socket, first unbind 494 * with the transport. A rebind to the NULL address 495 * is interpreted as an unbind. 496 * Note that a bind to NULL in BSD does unbind the 497 * socket but it fails with EINVAL. 498 * Note that regular sockets set SOV_SOCKBSD i.e. 499 * _SOBIND_SOCKBSD gets set here hence no type of 500 * socket does currently allow rebinding. 501 * 502 * If the name is NULL just do an unbind. 503 */ 504 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 505 name != NULL) { 506 error = EINVAL; 507 unbind_on_err = 0; 508 eprintsoline(so, error); 509 goto done; 510 } 511 if ((so->so_mode & SM_CONNREQUIRED) && 512 (so->so_state & SS_CANTREBIND)) { 513 error = EINVAL; 514 unbind_on_err = 0; 515 eprintsoline(so, error); 516 goto done; 517 } 518 error = sotpi_unbind(so, 0); 519 if (error) { 520 eprintsoline(so, error); 521 goto done; 522 } 523 ASSERT(!(so->so_state & SS_ISBOUND)); 524 if (name == NULL) { 525 so->so_state &= 526 ~(SS_ISCONNECTED|SS_ISCONNECTING); 527 goto done; 528 } 529 } 530 /* X/Open requires this check */ 531 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 532 if (xnet_check_print) { 533 printf("sockfs: X/Open bind state check " 534 "caused EINVAL\n"); 535 } 536 error = EINVAL; 537 goto done; 538 } 539 540 switch (so->so_family) { 541 case AF_UNIX: 542 /* 543 * All AF_UNIX addresses are nul terminated 544 * when copied (copyin_name) in so the minimum 545 * length is 3 bytes. 546 */ 547 if (name == NULL || 548 (ssize_t)namelen <= sizeof (short) + 1) { 549 error = EISDIR; 550 eprintsoline(so, error); 551 goto done; 552 } 553 /* 554 * Verify so_family matches the bound family. 555 * BSD does not check this for AF_UNIX resulting 556 * in funny mknods. 557 */ 558 if (name->sa_family != so->so_family) { 559 error = EAFNOSUPPORT; 560 goto done; 561 } 562 break; 563 case AF_INET: 564 if (name == NULL) { 565 error = EINVAL; 566 eprintsoline(so, error); 567 goto done; 568 } 569 if ((size_t)namelen != sizeof (sin_t)) { 570 error = name->sa_family != so->so_family ? 571 EAFNOSUPPORT : EINVAL; 572 eprintsoline(so, error); 573 goto done; 574 } 575 if ((flags & _SOBIND_XPG4_2) && 576 (name->sa_family != so->so_family)) { 577 /* 578 * This check has to be made for X/Open 579 * sockets however application failures have 580 * been observed when it is applied to 581 * all sockets. 582 */ 583 error = EAFNOSUPPORT; 584 eprintsoline(so, error); 585 goto done; 586 } 587 /* 588 * Force a zero sa_family to match so_family. 589 * 590 * Some programs like inetd(1M) don't set the 591 * family field. Other programs leave 592 * sin_family set to garbage - SunOS 4.X does 593 * not check the family field on a bind. 594 * We use the family field that 595 * was passed in to the socket() call. 596 */ 597 name->sa_family = so->so_family; 598 break; 599 600 case AF_INET6: { 601 #ifdef DEBUG 602 sin6_t *sin6 = (sin6_t *)name; 603 #endif /* DEBUG */ 604 605 if (name == NULL) { 606 error = EINVAL; 607 eprintsoline(so, error); 608 goto done; 609 } 610 if ((size_t)namelen != sizeof (sin6_t)) { 611 error = name->sa_family != so->so_family ? 612 EAFNOSUPPORT : EINVAL; 613 eprintsoline(so, error); 614 goto done; 615 } 616 if (name->sa_family != so->so_family) { 617 /* 618 * With IPv6 we require the family to match 619 * unlike in IPv4. 620 */ 621 error = EAFNOSUPPORT; 622 eprintsoline(so, error); 623 goto done; 624 } 625 #ifdef DEBUG 626 /* 627 * Verify that apps don't forget to clear 628 * sin6_scope_id etc 629 */ 630 if (sin6->sin6_scope_id != 0 && 631 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 632 zcmn_err(getzoneid(), CE_WARN, 633 "bind with uninitialized sin6_scope_id " 634 "(%d) on socket. Pid = %d\n", 635 (int)sin6->sin6_scope_id, 636 (int)curproc->p_pid); 637 } 638 if (sin6->__sin6_src_id != 0) { 639 zcmn_err(getzoneid(), CE_WARN, 640 "bind with uninitialized __sin6_src_id " 641 "(%d) on socket. Pid = %d\n", 642 (int)sin6->__sin6_src_id, 643 (int)curproc->p_pid); 644 } 645 #endif /* DEBUG */ 646 break; 647 } 648 default: 649 /* 650 * Don't do any length or sa_family check to allow 651 * non-sockaddr style addresses. 652 */ 653 if (name == NULL) { 654 error = EINVAL; 655 eprintsoline(so, error); 656 goto done; 657 } 658 break; 659 } 660 661 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 662 error = ENAMETOOLONG; 663 eprintsoline(so, error); 664 goto done; 665 } 666 /* 667 * Save local address. 668 */ 669 so->so_laddr_len = (socklen_t)namelen; 670 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 671 bcopy(name, so->so_laddr_sa, namelen); 672 673 addr = so->so_laddr_sa; 674 addrlen = (t_uscalar_t)so->so_laddr_len; 675 switch (so->so_family) { 676 case AF_INET6: 677 case AF_INET: 678 break; 679 case AF_UNIX: { 680 struct sockaddr_un *soun = 681 (struct sockaddr_un *)so->so_laddr_sa; 682 struct vnode *vp; 683 struct vattr vattr; 684 685 ASSERT(so->so_ux_bound_vp == NULL); 686 /* 687 * Create vnode for the specified path name. 688 * Keep vnode held with a reference in so_ux_bound_vp. 689 * Use the vnode pointer as the address used in the 690 * bind with the transport. 691 * 692 * Use the same mode as in BSD. In particular this does 693 * not observe the umask. 694 */ 695 /* MAXPATHLEN + soun_family + nul termination */ 696 if (so->so_laddr_len > 697 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 698 error = ENAMETOOLONG; 699 eprintsoline(so, error); 700 goto done; 701 } 702 vattr.va_type = VSOCK; 703 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 704 vattr.va_mask = AT_TYPE|AT_MODE; 705 /* NOTE: holding so_lock */ 706 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 707 EXCL, 0, &vp, CRMKNOD, 0, 0); 708 if (error) { 709 if (error == EEXIST) 710 error = EADDRINUSE; 711 eprintsoline(so, error); 712 goto done; 713 } 714 /* 715 * Establish pointer from the underlying filesystem 716 * vnode to the socket node. 717 * so_ux_bound_vp and v_stream->sd_vnode form the 718 * cross-linkage between the underlying filesystem 719 * node and the socket node. 720 */ 721 ASSERT(SOTOV(so)->v_stream); 722 mutex_enter(&vp->v_lock); 723 vp->v_stream = SOTOV(so)->v_stream; 724 so->so_ux_bound_vp = vp; 725 mutex_exit(&vp->v_lock); 726 727 /* 728 * Use the vnode pointer value as a unique address 729 * (together with the magic number to avoid conflicts 730 * with implicit binds) in the transport provider. 731 */ 732 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 733 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 734 addr = &so->so_ux_laddr; 735 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 736 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 737 addrlen, 738 ((struct so_ux_addr *)addr)->soua_vp)); 739 break; 740 } 741 } /* end switch (so->so_family) */ 742 } 743 744 /* 745 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 746 * the transport can start passing up T_CONN_IND messages 747 * as soon as it receives the bind req and strsock_proto() 748 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 749 */ 750 if (flags & _SOBIND_LISTEN) { 751 if ((so->so_state & SS_ACCEPTCONN) == 0) 752 clear_acceptconn_on_err = B_TRUE; 753 save_so_backlog = so->so_backlog; 754 restore_backlog_on_err = B_TRUE; 755 so->so_state |= SS_ACCEPTCONN; 756 so->so_backlog = backlog; 757 } 758 759 /* 760 * If NL7C addr(s) have been configured check for addr/port match, 761 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 762 * 763 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 764 * family sockets only. If match mark as such. 765 */ 766 if (nl7c_enabled && ((addr != NULL && 767 (so->so_family == AF_INET || so->so_family == AF_INET6) && 768 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 769 so->so_nl7c_flags == NL7C_AF_NCA)) { 770 /* 771 * NL7C is not supported in non-global zones, 772 * we enforce this restriction here. 773 */ 774 if (so->so_zoneid == GLOBAL_ZONEID) { 775 /* An NL7C socket, mark it */ 776 so->so_nl7c_flags |= NL7C_ENABLED; 777 if (nl7c == NULL) { 778 /* 779 * Was an AF_NCA bind() so add it to the 780 * addr list for reporting purposes. 781 */ 782 nl7c = nl7c_add_addr(addr, addrlen); 783 } 784 } else 785 nl7c = NULL; 786 } 787 /* 788 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 789 * for other transports we will send in a O_T_BIND_REQ. 790 */ 791 if (tcp_udp_xport && 792 (so->so_family == AF_INET || so->so_family == AF_INET6)) 793 PRIM_type = T_BIND_REQ; 794 795 bind_req.PRIM_type = PRIM_type; 796 bind_req.ADDR_length = addrlen; 797 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 798 bind_req.CONIND_number = backlog; 799 /* NOTE: holding so_lock while sleeping */ 800 mp = soallocproto2(&bind_req, sizeof (bind_req), 801 addr, addrlen, 0, _ALLOC_SLEEP); 802 so->so_state &= ~SS_LADDR_VALID; 803 804 /* Done using so_laddr_sa - can drop the lock */ 805 mutex_exit(&so->so_lock); 806 807 /* 808 * Intercept the bind_req message here to check if this <address/port> 809 * was configured as an SSL proxy server, or if another endpoint was 810 * already configured to act as a proxy for us. 811 * 812 * Note, only if NL7C not enabled for this socket. 813 */ 814 if (nl7c == NULL && 815 (so->so_family == AF_INET || so->so_family == AF_INET6) && 816 so->so_type == SOCK_STREAM) { 817 818 if (so->so_kssl_ent != NULL) { 819 kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type); 820 so->so_kssl_ent = NULL; 821 } 822 823 so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent); 824 switch (so->so_kssl_type) { 825 case KSSL_NO_PROXY: 826 break; 827 828 case KSSL_HAS_PROXY: 829 mutex_enter(&so->so_lock); 830 goto skip_transport; 831 832 case KSSL_IS_PROXY: 833 break; 834 } 835 } 836 837 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 838 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 839 if (error) { 840 eprintsoline(so, error); 841 mutex_enter(&so->so_lock); 842 goto done; 843 } 844 845 mutex_enter(&so->so_lock); 846 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 847 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 848 if (error) { 849 eprintsoline(so, error); 850 goto done; 851 } 852 skip_transport: 853 ASSERT(mp); 854 /* 855 * Even if some TPI message (e.g. T_DISCON_IND) was received in 856 * strsock_proto while the lock was dropped above, the bind 857 * is allowed to complete. 858 */ 859 860 /* Mark as bound. This will be undone if we detect errors below. */ 861 if (flags & _SOBIND_NOXLATE) { 862 ASSERT(so->so_family == AF_UNIX); 863 so->so_state |= SS_FADDR_NOXLATE; 864 } 865 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 866 so->so_state |= SS_ISBOUND; 867 ASSERT(so->so_unbind_mp); 868 869 /* note that we've already set SS_ACCEPTCONN above */ 870 871 /* 872 * Recompute addrlen - an unspecied bind sent down an 873 * address of length zero but we expect the appropriate length 874 * in return. 875 */ 876 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 877 sizeof (so->so_ux_laddr) : so->so_laddr_len); 878 879 bind_ack = (struct T_bind_ack *)mp->b_rptr; 880 /* 881 * The alignment restriction is really too strict but 882 * we want enough alignment to inspect the fields of 883 * a sockaddr_in. 884 */ 885 addr = sogetoff(mp, bind_ack->ADDR_offset, 886 bind_ack->ADDR_length, 887 __TPI_ALIGN_SIZE); 888 if (addr == NULL) { 889 freemsg(mp); 890 error = EPROTO; 891 eprintsoline(so, error); 892 goto done; 893 } 894 if (!(flags & _SOBIND_UNSPEC)) { 895 /* 896 * Verify that the transport didn't return something we 897 * did not want e.g. an address other than what we asked for. 898 * 899 * NOTE: These checks would go away if/when we switch to 900 * using the new TPI (in which the transport would fail 901 * the request instead of assigning a different address). 902 * 903 * NOTE2: For protocols that we don't know (i.e. any 904 * other than AF_INET6, AF_INET and AF_UNIX), we 905 * cannot know if the transport should be expected to 906 * return the same address as that requested. 907 * 908 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 909 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 910 * 911 * For example, in the case of netatalk it may be 912 * inappropriate for the transport to return the 913 * requested address (as it may have allocated a local 914 * port number in behaviour similar to that of an 915 * AF_INET bind request with a port number of zero). 916 * 917 * Given the definition of O_T_BIND_REQ, where the 918 * transport may bind to an address other than the 919 * requested address, it's not possible to determine 920 * whether a returned address that differs from the 921 * requested address is a reason to fail (because the 922 * requested address was not available) or succeed 923 * (because the transport allocated an appropriate 924 * address and/or port). 925 * 926 * sockfs currently requires that the transport return 927 * the requested address in the T_BIND_ACK, unless 928 * there is code here to allow for any discrepancy. 929 * Such code exists for AF_INET and AF_INET6. 930 * 931 * Netatalk chooses to return the requested address 932 * rather than the (correct) allocated address. This 933 * means that netatalk violates the TPI specification 934 * (and would not function correctly if used from a 935 * TLI application), but it does mean that it works 936 * with sockfs. 937 * 938 * As noted above, using the newer XTI bind primitive 939 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 940 * allow sockfs to be more sure about whether or not 941 * the bind request had succeeded (as transports are 942 * not permitted to bind to a different address than 943 * that requested - they must return failure). 944 * Unfortunately, support for T_BIND_REQ may not be 945 * present in all transport implementations (netatalk, 946 * for example, doesn't have it), making the 947 * transition difficult. 948 */ 949 if (bind_ack->ADDR_length != addrlen) { 950 /* Assumes that the requested address was in use */ 951 freemsg(mp); 952 error = EADDRINUSE; 953 eprintsoline(so, error); 954 goto done; 955 } 956 957 switch (so->so_family) { 958 case AF_INET6: 959 case AF_INET: { 960 sin_t *rname, *aname; 961 962 rname = (sin_t *)addr; 963 aname = (sin_t *)so->so_laddr_sa; 964 965 /* 966 * Take advantage of the alignment 967 * of sin_port and sin6_port which fall 968 * in the same place in their data structures. 969 * Just use sin_port for either address family. 970 * 971 * This may become a problem if (heaven forbid) 972 * there's a separate ipv6port_reserved... :-P 973 * 974 * Binding to port 0 has the semantics of letting 975 * the transport bind to any port. 976 * 977 * If the transport is TCP or UDP since we had sent 978 * a T_BIND_REQ we would not get a port other than 979 * what we asked for. 980 */ 981 if (tcp_udp_xport) { 982 /* 983 * Pick up the new port number if we bound to 984 * port 0. 985 */ 986 if (aname->sin_port == 0) 987 aname->sin_port = rname->sin_port; 988 so->so_state |= SS_LADDR_VALID; 989 break; 990 } 991 if (aname->sin_port != 0 && 992 aname->sin_port != rname->sin_port) { 993 freemsg(mp); 994 error = EADDRINUSE; 995 eprintsoline(so, error); 996 goto done; 997 } 998 /* 999 * Pick up the new port number if we bound to port 0. 1000 */ 1001 aname->sin_port = rname->sin_port; 1002 1003 /* 1004 * Unfortunately, addresses aren't _quite_ the same. 1005 */ 1006 if (so->so_family == AF_INET) { 1007 if (aname->sin_addr.s_addr != 1008 rname->sin_addr.s_addr) { 1009 freemsg(mp); 1010 error = EADDRNOTAVAIL; 1011 eprintsoline(so, error); 1012 goto done; 1013 } 1014 } else { 1015 sin6_t *rname6 = (sin6_t *)rname; 1016 sin6_t *aname6 = (sin6_t *)aname; 1017 1018 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1019 &rname6->sin6_addr)) { 1020 freemsg(mp); 1021 error = EADDRNOTAVAIL; 1022 eprintsoline(so, error); 1023 goto done; 1024 } 1025 } 1026 break; 1027 } 1028 case AF_UNIX: 1029 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 1030 freemsg(mp); 1031 error = EADDRINUSE; 1032 eprintsoline(so, error); 1033 eprintso(so, 1034 ("addrlen %d, addr 0x%x, vp %p\n", 1035 addrlen, *((int *)addr), 1036 (void *)so->so_ux_bound_vp)); 1037 goto done; 1038 } 1039 so->so_state |= SS_LADDR_VALID; 1040 break; 1041 default: 1042 /* 1043 * NOTE: This assumes that addresses can be 1044 * byte-compared for equivalence. 1045 */ 1046 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 1047 freemsg(mp); 1048 error = EADDRINUSE; 1049 eprintsoline(so, error); 1050 goto done; 1051 } 1052 /* 1053 * Don't mark SS_LADDR_VALID, as we cannot be 1054 * sure that the returned address is the real 1055 * bound address when talking to an unknown 1056 * transport. 1057 */ 1058 break; 1059 } 1060 } else { 1061 /* 1062 * Save for returned address for getsockname. 1063 * Needed for unspecific bind unless transport supports 1064 * the TI_GETMYNAME ioctl. 1065 * Do this for AF_INET{,6} even though they do, as 1066 * caching info here is much better performance than 1067 * a TPI/STREAMS trip to the transport for getsockname. 1068 * Any which can't for some reason _must_ _not_ set 1069 * LADDR_VALID here for the caching version of getsockname 1070 * to not break; 1071 */ 1072 switch (so->so_family) { 1073 case AF_UNIX: 1074 /* 1075 * Record the address bound with the transport 1076 * for use by socketpair. 1077 */ 1078 bcopy(addr, &so->so_ux_laddr, addrlen); 1079 so->so_state |= SS_LADDR_VALID; 1080 break; 1081 case AF_INET: 1082 case AF_INET6: 1083 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1084 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1085 so->so_state |= SS_LADDR_VALID; 1086 break; 1087 default: 1088 /* 1089 * Don't mark SS_LADDR_VALID, as we cannot be 1090 * sure that the returned address is the real 1091 * bound address when talking to an unknown 1092 * transport. 1093 */ 1094 break; 1095 } 1096 } 1097 1098 if (nl7c != NULL) { 1099 /* Register listen()er sonode pointer with NL7C */ 1100 nl7c_listener_addr(nl7c, so); 1101 } 1102 1103 freemsg(mp); 1104 1105 done: 1106 if (error) { 1107 /* reset state & backlog to values held on entry */ 1108 if (clear_acceptconn_on_err == B_TRUE) 1109 so->so_state &= ~SS_ACCEPTCONN; 1110 if (restore_backlog_on_err == B_TRUE) 1111 so->so_backlog = save_so_backlog; 1112 1113 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1114 int err; 1115 1116 err = sotpi_unbind(so, 0); 1117 /* LINTED - statement has no consequent: if */ 1118 if (err) { 1119 eprintsoline(so, error); 1120 } else { 1121 ASSERT(!(so->so_state & SS_ISBOUND)); 1122 } 1123 } 1124 } 1125 if (!(flags & _SOBIND_LOCK_HELD)) { 1126 so_unlock_single(so, SOLOCKED); 1127 mutex_exit(&so->so_lock); 1128 } else { 1129 /* If the caller held the lock don't release it here */ 1130 ASSERT(MUTEX_HELD(&so->so_lock)); 1131 ASSERT(so->so_flag & SOLOCKED); 1132 } 1133 return (error); 1134 } 1135 1136 /* bind the socket */ 1137 static int 1138 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1139 int flags) 1140 { 1141 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1142 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1143 1144 flags &= ~_SOBIND_SOCKETPAIR; 1145 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1146 } 1147 1148 /* 1149 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1150 * address, or when listen needs to unbind and bind. 1151 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1152 * so that a sobind can pick them up. 1153 */ 1154 static int 1155 sotpi_unbind(struct sonode *so, int flags) 1156 { 1157 struct T_unbind_req unbind_req; 1158 int error = 0; 1159 mblk_t *mp; 1160 1161 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1162 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1163 1164 ASSERT(MUTEX_HELD(&so->so_lock)); 1165 ASSERT(so->so_flag & SOLOCKED); 1166 1167 if (!(so->so_state & SS_ISBOUND)) { 1168 error = EINVAL; 1169 eprintsoline(so, error); 1170 goto done; 1171 } 1172 1173 mutex_exit(&so->so_lock); 1174 1175 /* 1176 * Flush the read and write side (except stream head read queue) 1177 * and send down T_UNBIND_REQ. 1178 */ 1179 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1180 1181 unbind_req.PRIM_type = T_UNBIND_REQ; 1182 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1183 0, _ALLOC_SLEEP); 1184 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1185 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1186 mutex_enter(&so->so_lock); 1187 if (error) { 1188 eprintsoline(so, error); 1189 goto done; 1190 } 1191 1192 error = sowaitokack(so, T_UNBIND_REQ); 1193 if (error) { 1194 eprintsoline(so, error); 1195 goto done; 1196 } 1197 1198 /* 1199 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1200 * strsock_proto while the lock was dropped above, the unbind 1201 * is allowed to complete. 1202 */ 1203 if (!(flags & _SOUNBIND_REBIND)) { 1204 /* 1205 * Clear out bound address. 1206 */ 1207 vnode_t *vp; 1208 1209 if ((vp = so->so_ux_bound_vp) != NULL) { 1210 1211 /* Undo any SSL proxy setup */ 1212 if ((so->so_family == AF_INET || 1213 so->so_family == AF_INET6) && 1214 (so->so_type == SOCK_STREAM) && 1215 (so->so_kssl_ent != NULL)) { 1216 kssl_release_ent(so->so_kssl_ent, so, 1217 so->so_kssl_type); 1218 so->so_kssl_ent = NULL; 1219 so->so_kssl_type = KSSL_NO_PROXY; 1220 } 1221 1222 so->so_ux_bound_vp = NULL; 1223 vn_rele_stream(vp); 1224 } 1225 /* Clear out address */ 1226 so->so_laddr_len = 0; 1227 } 1228 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1229 1230 done: 1231 1232 /* If the caller held the lock don't release it here */ 1233 ASSERT(MUTEX_HELD(&so->so_lock)); 1234 ASSERT(so->so_flag & SOLOCKED); 1235 1236 return (error); 1237 } 1238 1239 /* 1240 * listen on the socket. 1241 * For TPI conforming transports this has to first unbind with the transport 1242 * and then bind again using the new backlog. 1243 */ 1244 int 1245 sotpi_listen(struct sonode *so, int backlog) 1246 { 1247 int error = 0; 1248 1249 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1250 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1251 1252 if (so->so_serv_type == T_CLTS) 1253 return (EOPNOTSUPP); 1254 1255 /* 1256 * If the socket is ready to accept connections already, then 1257 * return without doing anything. This avoids a problem where 1258 * a second listen() call fails if a connection is pending and 1259 * leaves the socket unbound. Only when we are not unbinding 1260 * with the transport can we safely increase the backlog. 1261 */ 1262 if (so->so_state & SS_ACCEPTCONN && 1263 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1264 /*CONSTCOND*/ 1265 !solisten_tpi_tcp)) 1266 return (0); 1267 1268 if (so->so_state & SS_ISCONNECTED) 1269 return (EINVAL); 1270 1271 mutex_enter(&so->so_lock); 1272 so_lock_single(so); /* Set SOLOCKED */ 1273 1274 if (backlog < 0) 1275 backlog = 0; 1276 /* 1277 * Use the same qlimit as in BSD. BSD checks the qlimit 1278 * before queuing the next connection implying that a 1279 * listen(sock, 0) allows one connection to be queued. 1280 * BSD also uses 1.5 times the requested backlog. 1281 * 1282 * XNS Issue 4 required a strict interpretation of the backlog. 1283 * This has been waived subsequently for Issue 4 and the change 1284 * incorporated in XNS Issue 5. So we aren't required to do 1285 * anything special for XPG apps. 1286 */ 1287 if (backlog >= (INT_MAX - 1) / 3) 1288 backlog = INT_MAX; 1289 else 1290 backlog = backlog * 3 / 2 + 1; 1291 1292 /* 1293 * If the listen doesn't change the backlog we do nothing. 1294 * This avoids an EPROTO error from the transport. 1295 */ 1296 if ((so->so_state & SS_ACCEPTCONN) && 1297 so->so_backlog == backlog) 1298 goto done; 1299 1300 if (!(so->so_state & SS_ISBOUND)) { 1301 /* 1302 * Must have been explicitly bound in the UNIX domain. 1303 */ 1304 if (so->so_family == AF_UNIX) { 1305 error = EINVAL; 1306 goto done; 1307 } 1308 error = sotpi_bindlisten(so, NULL, 0, backlog, 1309 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1310 } else if (backlog > 0) { 1311 /* 1312 * AF_INET{,6} hack to avoid losing the port. 1313 * Assumes that all AF_INET{,6} transports can handle a 1314 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1315 * has already bound thus it is possible to avoid the unbind. 1316 */ 1317 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1318 /*CONSTCOND*/ 1319 !solisten_tpi_tcp)) { 1320 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1321 if (error) 1322 goto done; 1323 } 1324 error = sotpi_bindlisten(so, NULL, 0, backlog, 1325 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1326 } else { 1327 so->so_state |= SS_ACCEPTCONN; 1328 so->so_backlog = backlog; 1329 } 1330 if (error) 1331 goto done; 1332 ASSERT(so->so_state & SS_ACCEPTCONN); 1333 done: 1334 so_unlock_single(so, SOLOCKED); 1335 mutex_exit(&so->so_lock); 1336 return (error); 1337 } 1338 1339 /* 1340 * Disconnect either a specified seqno or all (-1). 1341 * The former is used on listening sockets only. 1342 * 1343 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1344 * the current use of sodisconnect(seqno == -1) is only for shutdown 1345 * so there is no point (and potentially incorrect) to unbind. 1346 */ 1347 int 1348 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1349 { 1350 struct T_discon_req discon_req; 1351 int error = 0; 1352 mblk_t *mp; 1353 1354 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1355 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1356 1357 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1358 mutex_enter(&so->so_lock); 1359 so_lock_single(so); /* Set SOLOCKED */ 1360 } else { 1361 ASSERT(MUTEX_HELD(&so->so_lock)); 1362 ASSERT(so->so_flag & SOLOCKED); 1363 } 1364 1365 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1366 error = EINVAL; 1367 eprintsoline(so, error); 1368 goto done; 1369 } 1370 1371 mutex_exit(&so->so_lock); 1372 /* 1373 * Flush the write side (unless this is a listener) 1374 * and then send down a T_DISCON_REQ. 1375 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1376 * and other messages.) 1377 */ 1378 if (!(so->so_state & SS_ACCEPTCONN)) 1379 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1380 1381 discon_req.PRIM_type = T_DISCON_REQ; 1382 discon_req.SEQ_number = seqno; 1383 mp = soallocproto1(&discon_req, sizeof (discon_req), 1384 0, _ALLOC_SLEEP); 1385 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1386 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1387 mutex_enter(&so->so_lock); 1388 if (error) { 1389 eprintsoline(so, error); 1390 goto done; 1391 } 1392 1393 error = sowaitokack(so, T_DISCON_REQ); 1394 if (error) { 1395 eprintsoline(so, error); 1396 goto done; 1397 } 1398 /* 1399 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1400 * strsock_proto while the lock was dropped above, the disconnect 1401 * is allowed to complete. However, it is not possible to 1402 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1403 */ 1404 so->so_state &= 1405 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1406 done: 1407 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1408 so_unlock_single(so, SOLOCKED); 1409 mutex_exit(&so->so_lock); 1410 } else { 1411 /* If the caller held the lock don't release it here */ 1412 ASSERT(MUTEX_HELD(&so->so_lock)); 1413 ASSERT(so->so_flag & SOLOCKED); 1414 } 1415 return (error); 1416 } 1417 1418 int 1419 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1420 { 1421 struct T_conn_ind *conn_ind; 1422 struct T_conn_res *conn_res; 1423 int error = 0; 1424 mblk_t *mp, *ctxmp, *ack_mp; 1425 struct sonode *nso; 1426 vnode_t *nvp; 1427 void *src; 1428 t_uscalar_t srclen; 1429 void *opt; 1430 t_uscalar_t optlen; 1431 t_scalar_t PRIM_type; 1432 t_scalar_t SEQ_number; 1433 size_t sinlen; 1434 1435 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1436 (void *)so, fflag, (void *)nsop, 1437 pr_state(so->so_state, so->so_mode))); 1438 1439 /* 1440 * Defer single-threading the accepting socket until 1441 * the T_CONN_IND has been received and parsed and the 1442 * new sonode has been opened. 1443 */ 1444 1445 /* Check that we are not already connected */ 1446 if ((so->so_state & SS_ACCEPTCONN) == 0) 1447 goto conn_bad; 1448 again: 1449 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1450 goto e_bad; 1451 1452 ASSERT(mp); 1453 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1454 ctxmp = mp->b_cont; 1455 1456 /* 1457 * Save SEQ_number for error paths. 1458 */ 1459 SEQ_number = conn_ind->SEQ_number; 1460 1461 srclen = conn_ind->SRC_length; 1462 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1463 if (src == NULL) { 1464 error = EPROTO; 1465 freemsg(mp); 1466 eprintsoline(so, error); 1467 goto disconnect_unlocked; 1468 } 1469 optlen = conn_ind->OPT_length; 1470 switch (so->so_family) { 1471 case AF_INET: 1472 case AF_INET6: 1473 if ((optlen == sizeof (intptr_t)) && 1474 ((so->so_state & SS_DIRECT) != 0)) { 1475 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1476 &opt, conn_ind->OPT_length); 1477 } else { 1478 /* 1479 * The transport (in this case TCP) hasn't sent up 1480 * a pointer to an instance for the accept fast-path. 1481 * Disable fast-path completely because the call to 1482 * sotpi_create() below would otherwise create an 1483 * incomplete TCP instance, which would lead to 1484 * problems when sockfs sends a normal T_CONN_RES 1485 * message down the new stream. 1486 */ 1487 if (so->so_state & SS_DIRECT) { 1488 int rval; 1489 /* 1490 * For consistency we inform tcp to disable 1491 * direct interface on the listener, though 1492 * we can certainly live without doing this 1493 * because no data will ever travel upstream 1494 * on the listening socket. 1495 */ 1496 so->so_state &= ~SS_DIRECT; 1497 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1498 0, 0, K_TO_K, CRED(), &rval); 1499 } 1500 opt = NULL; 1501 optlen = 0; 1502 } 1503 break; 1504 case AF_UNIX: 1505 default: 1506 if (optlen != 0) { 1507 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1508 __TPI_ALIGN_SIZE); 1509 if (opt == NULL) { 1510 error = EPROTO; 1511 freemsg(mp); 1512 eprintsoline(so, error); 1513 goto disconnect_unlocked; 1514 } 1515 } 1516 if (so->so_family == AF_UNIX) { 1517 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1518 src = NULL; 1519 srclen = 0; 1520 } 1521 /* Extract src address from options */ 1522 if (optlen != 0) 1523 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1524 } 1525 break; 1526 } 1527 1528 /* 1529 * Create the new socket. 1530 */ 1531 VN_HOLD(so->so_accessvp); 1532 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1533 so->so_protocol, so->so_version, so, &error); 1534 if (nso == NULL) { 1535 ASSERT(error != 0); 1536 /* 1537 * Accept can not fail with ENOBUFS. sotpi_create 1538 * sleeps waiting for memory until a signal is caught 1539 * so return EINTR. 1540 */ 1541 freemsg(mp); 1542 if (error == ENOBUFS) 1543 error = EINTR; 1544 goto e_disc_unl; 1545 } 1546 nvp = SOTOV(nso); 1547 1548 /* 1549 * If the transport sent up an SSL connection context, then attach 1550 * it the new socket, and set the (sd_wputdatafunc)() and 1551 * (sd_rputdatafunc)() stream head hooks to intercept and process 1552 * SSL records. 1553 */ 1554 if (ctxmp != NULL) { 1555 /* 1556 * This kssl_ctx_t is already held for us by the transport. 1557 * So, we don't need to do a kssl_hold_ctx() here. 1558 */ 1559 nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1560 freemsg(ctxmp); 1561 mp->b_cont = NULL; 1562 strsetrwputdatahooks(nvp, strsock_kssl_input, 1563 strsock_kssl_output); 1564 } 1565 #ifdef DEBUG 1566 /* 1567 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1568 * it's inherited early to allow debugging of the accept code itself. 1569 */ 1570 nso->so_options |= so->so_options & SO_DEBUG; 1571 #endif /* DEBUG */ 1572 1573 /* 1574 * Save the SRC address from the T_CONN_IND 1575 * for getpeername to work on AF_UNIX and on transports that do not 1576 * support TI_GETPEERNAME. 1577 * 1578 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1579 * copyin_name(). 1580 */ 1581 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1582 error = EINVAL; 1583 freemsg(mp); 1584 eprintsoline(so, error); 1585 goto disconnect_vp_unlocked; 1586 } 1587 nso->so_faddr_len = (socklen_t)srclen; 1588 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1589 bcopy(src, nso->so_faddr_sa, srclen); 1590 nso->so_state |= SS_FADDR_VALID; 1591 1592 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1593 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1594 cred_t *cr; 1595 1596 if ((cr = DB_CRED(mp)) != NULL) { 1597 crhold(cr); 1598 nso->so_peercred = cr; 1599 nso->so_cpid = DB_CPID(mp); 1600 } 1601 freemsg(mp); 1602 1603 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1604 sizeof (intptr_t), 0, _ALLOC_INTR); 1605 if (mp == NULL) { 1606 /* 1607 * Accept can not fail with ENOBUFS. 1608 * A signal was caught so return EINTR. 1609 */ 1610 error = EINTR; 1611 eprintsoline(so, error); 1612 goto disconnect_vp_unlocked; 1613 } 1614 conn_res = (struct T_conn_res *)mp->b_rptr; 1615 } else { 1616 nso->so_peercred = DB_CRED(mp); 1617 nso->so_cpid = DB_CPID(mp); 1618 DB_CRED(mp) = NULL; 1619 1620 mp->b_rptr = DB_BASE(mp); 1621 conn_res = (struct T_conn_res *)mp->b_rptr; 1622 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1623 } 1624 1625 /* 1626 * New socket must be bound at least in sockfs and, except for AF_INET, 1627 * (or AF_INET6) it also has to be bound in the transport provider. 1628 * We set the local address in the sonode from the T_OK_ACK of the 1629 * T_CONN_RES. For this reason the address we bind to here isn't 1630 * important. 1631 */ 1632 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1633 /*CONSTCOND*/ 1634 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1635 /* 1636 * Optimization for AF_INET{,6} transports 1637 * that can handle a T_CONN_RES without being bound. 1638 */ 1639 mutex_enter(&nso->so_lock); 1640 so_automatic_bind(nso); 1641 mutex_exit(&nso->so_lock); 1642 } else { 1643 /* Perform NULL bind with the transport provider. */ 1644 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1645 ASSERT(error != ENOBUFS); 1646 freemsg(mp); 1647 eprintsoline(nso, error); 1648 goto disconnect_vp_unlocked; 1649 } 1650 } 1651 1652 /* 1653 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1654 * so that any data arriving on the new socket will cause the 1655 * appropriate signals to be delivered for the new socket. 1656 * 1657 * No other thread (except strsock_proto and strsock_misc) 1658 * can access the new socket thus we relax the locking. 1659 */ 1660 nso->so_pgrp = so->so_pgrp; 1661 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1662 1663 if (nso->so_pgrp != 0) { 1664 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1665 eprintsoline(nso, error); 1666 error = 0; 1667 nso->so_pgrp = 0; 1668 } 1669 } 1670 1671 /* 1672 * Make note of the socket level options. TCP and IP level options 1673 * are already inherited. We could do all this after accept is 1674 * successful but doing it here simplifies code and no harm done 1675 * for error case. 1676 */ 1677 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1678 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1679 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1680 nso->so_sndbuf = so->so_sndbuf; 1681 nso->so_rcvbuf = so->so_rcvbuf; 1682 if (nso->so_options & SO_LINGER) 1683 nso->so_linger = so->so_linger; 1684 1685 if ((so->so_state & SS_DIRECT) != 0) { 1686 1687 ASSERT(opt != NULL); 1688 1689 conn_res->OPT_length = optlen; 1690 conn_res->OPT_offset = MBLKL(mp); 1691 bcopy(&opt, mp->b_wptr, optlen); 1692 mp->b_wptr += optlen; 1693 conn_res->PRIM_type = T_CONN_RES; 1694 conn_res->ACCEPTOR_id = 0; 1695 PRIM_type = T_CONN_RES; 1696 1697 /* Send down the T_CONN_RES on acceptor STREAM */ 1698 error = kstrputmsg(SOTOV(nso), mp, NULL, 1699 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1700 if (error) { 1701 mutex_enter(&so->so_lock); 1702 so_lock_single(so); 1703 eprintsoline(so, error); 1704 goto disconnect_vp; 1705 } 1706 mutex_enter(&nso->so_lock); 1707 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1708 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1709 if (error) { 1710 mutex_exit(&nso->so_lock); 1711 mutex_enter(&so->so_lock); 1712 so_lock_single(so); 1713 eprintsoline(so, error); 1714 goto disconnect_vp; 1715 } 1716 if (nso->so_family == AF_INET) { 1717 sin_t *sin; 1718 1719 sin = (sin_t *)(ack_mp->b_rptr + 1720 sizeof (struct T_ok_ack)); 1721 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1722 nso->so_laddr_len = sizeof (sin_t); 1723 } else { 1724 sin6_t *sin6; 1725 1726 sin6 = (sin6_t *)(ack_mp->b_rptr + 1727 sizeof (struct T_ok_ack)); 1728 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1729 nso->so_laddr_len = sizeof (sin6_t); 1730 } 1731 freemsg(ack_mp); 1732 1733 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1734 nso->so_priv = opt; 1735 1736 if (so->so_nl7c_flags & NL7C_ENABLED) { 1737 /* 1738 * A NL7C marked listen()er so the new socket 1739 * inherits the listen()er's NL7C state, except 1740 * for NL7C_POLLIN. 1741 * 1742 * Only call NL7C to process the new socket if 1743 * the listen socket allows blocking i/o. 1744 */ 1745 nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN); 1746 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1747 /* 1748 * Nonblocking accept() just make it 1749 * persist to defer processing to the 1750 * read-side syscall (e.g. read). 1751 */ 1752 nso->so_nl7c_flags |= NL7C_SOPERSIST; 1753 } else if (nl7c_process(nso, B_FALSE)) { 1754 /* 1755 * NL7C has completed processing on the 1756 * socket, close the socket and back to 1757 * the top to await the next T_CONN_IND. 1758 */ 1759 mutex_exit(&nso->so_lock); 1760 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1761 CRED(), NULL); 1762 VN_RELE(nvp); 1763 goto again; 1764 } 1765 /* Pass the new socket out */ 1766 } 1767 1768 mutex_exit(&nso->so_lock); 1769 1770 /* 1771 * It's possible, through the use of autopush for example, 1772 * that the acceptor stream may not support SS_DIRECT 1773 * semantics. If the new socket does not support SS_DIRECT 1774 * we issue a _SIOCSOCKFALLBACK to inform the transport 1775 * as we would in the I_PUSH case. 1776 */ 1777 if (!(nso->so_state & SS_DIRECT)) { 1778 int rval; 1779 1780 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1781 0, 0, K_TO_K, CRED(), &rval)) != 0) { 1782 mutex_enter(&so->so_lock); 1783 so_lock_single(so); 1784 eprintsoline(so, error); 1785 goto disconnect_vp; 1786 } 1787 } 1788 1789 /* 1790 * Pass out new socket. 1791 */ 1792 if (nsop != NULL) 1793 *nsop = nso; 1794 1795 return (0); 1796 } 1797 1798 /* 1799 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1800 * which don't support the FireEngine accept fast-path. It is also 1801 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1802 * again. Neither sockfs nor TCP attempt to find out if some other 1803 * random module has been inserted in between (in which case we 1804 * should follow TLI accept behaviour). We blindly assume the worst 1805 * case and revert back to old behaviour i.e. TCP will not send us 1806 * any option (eager) and the accept should happen on the listener 1807 * queue. Any queued T_conn_ind have already got their options removed 1808 * by so_sock2_stream() when "sockmod" was I_POP'd. 1809 */ 1810 /* 1811 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1812 */ 1813 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1814 #ifdef _ILP32 1815 queue_t *q; 1816 1817 /* 1818 * Find read queue in driver 1819 * Can safely do this since we "own" nso/nvp. 1820 */ 1821 q = strvp2wq(nvp)->q_next; 1822 while (SAMESTR(q)) 1823 q = q->q_next; 1824 q = RD(q); 1825 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1826 #else 1827 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1828 #endif /* _ILP32 */ 1829 conn_res->PRIM_type = O_T_CONN_RES; 1830 PRIM_type = O_T_CONN_RES; 1831 } else { 1832 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1833 conn_res->PRIM_type = T_CONN_RES; 1834 PRIM_type = T_CONN_RES; 1835 } 1836 conn_res->SEQ_number = SEQ_number; 1837 conn_res->OPT_length = 0; 1838 conn_res->OPT_offset = 0; 1839 1840 mutex_enter(&so->so_lock); 1841 so_lock_single(so); /* Set SOLOCKED */ 1842 mutex_exit(&so->so_lock); 1843 1844 error = kstrputmsg(SOTOV(so), mp, NULL, 1845 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1846 mutex_enter(&so->so_lock); 1847 if (error) { 1848 eprintsoline(so, error); 1849 goto disconnect_vp; 1850 } 1851 error = sowaitprim(so, PRIM_type, T_OK_ACK, 1852 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1853 if (error) { 1854 eprintsoline(so, error); 1855 goto disconnect_vp; 1856 } 1857 /* 1858 * If there is a sin/sin6 appended onto the T_OK_ACK use 1859 * that to set the local address. If this is not present 1860 * then we zero out the address and don't set the 1861 * SS_LADDR_VALID bit. For AF_UNIX endpoints we copy over 1862 * the pathname from the listening socket. 1863 */ 1864 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 1865 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 1866 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 1867 ack_mp->b_rptr += sizeof (struct T_ok_ack); 1868 bcopy(ack_mp->b_rptr, nso->so_laddr_sa, sinlen); 1869 nso->so_laddr_len = sinlen; 1870 nso->so_state |= SS_LADDR_VALID; 1871 } else if (nso->so_family == AF_UNIX) { 1872 ASSERT(so->so_family == AF_UNIX); 1873 nso->so_laddr_len = so->so_laddr_len; 1874 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1875 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1876 nso->so_state |= SS_LADDR_VALID; 1877 } else { 1878 nso->so_laddr_len = so->so_laddr_len; 1879 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1880 bzero(nso->so_laddr_sa, nso->so_addr_size); 1881 nso->so_laddr_sa->sa_family = nso->so_family; 1882 } 1883 freemsg(ack_mp); 1884 1885 so_unlock_single(so, SOLOCKED); 1886 mutex_exit(&so->so_lock); 1887 1888 nso->so_state |= SS_ISCONNECTED; 1889 1890 /* 1891 * Pass out new socket. 1892 */ 1893 if (nsop != NULL) 1894 *nsop = nso; 1895 1896 return (0); 1897 1898 1899 eproto_disc_unl: 1900 error = EPROTO; 1901 e_disc_unl: 1902 eprintsoline(so, error); 1903 goto disconnect_unlocked; 1904 1905 pr_disc_vp_unl: 1906 eprintsoline(so, error); 1907 disconnect_vp_unlocked: 1908 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1909 VN_RELE(nvp); 1910 disconnect_unlocked: 1911 (void) sodisconnect(so, SEQ_number, 0); 1912 return (error); 1913 1914 pr_disc_vp: 1915 eprintsoline(so, error); 1916 disconnect_vp: 1917 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1918 so_unlock_single(so, SOLOCKED); 1919 mutex_exit(&so->so_lock); 1920 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1921 VN_RELE(nvp); 1922 return (error); 1923 1924 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1925 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1926 ? EOPNOTSUPP : EINVAL; 1927 e_bad: 1928 eprintsoline(so, error); 1929 return (error); 1930 } 1931 1932 /* 1933 * connect a socket. 1934 * 1935 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1936 * unconnect (by specifying a null address). 1937 */ 1938 int 1939 sotpi_connect(struct sonode *so, 1940 const struct sockaddr *name, 1941 socklen_t namelen, 1942 int fflag, 1943 int flags) 1944 { 1945 struct T_conn_req conn_req; 1946 int error = 0; 1947 mblk_t *mp; 1948 void *src; 1949 socklen_t srclen; 1950 void *addr; 1951 socklen_t addrlen; 1952 boolean_t need_unlock; 1953 1954 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1955 (void *)so, (void *)name, namelen, fflag, flags, 1956 pr_state(so->so_state, so->so_mode))); 1957 1958 /* 1959 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1960 * avoid sleeping for memory with SOLOCKED held. 1961 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1962 * + sizeof (struct T_opthdr). 1963 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1964 * exceed so_faddr_maxlen). 1965 */ 1966 mp = soallocproto(sizeof (struct T_conn_req) + 1967 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1968 if (mp == NULL) { 1969 /* 1970 * Connect can not fail with ENOBUFS. A signal was 1971 * caught so return EINTR. 1972 */ 1973 error = EINTR; 1974 eprintsoline(so, error); 1975 return (error); 1976 } 1977 1978 mutex_enter(&so->so_lock); 1979 /* 1980 * Make sure there is a preallocated T_unbind_req message 1981 * before any binding. This message is allocated when the 1982 * socket is created. Since another thread can consume 1983 * so_unbind_mp by the time we return from so_lock_single(), 1984 * we should check the availability of so_unbind_mp after 1985 * we return from so_lock_single(). 1986 */ 1987 1988 so_lock_single(so); /* Set SOLOCKED */ 1989 need_unlock = B_TRUE; 1990 1991 if (so->so_unbind_mp == NULL) { 1992 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1993 /* NOTE: holding so_lock while sleeping */ 1994 so->so_unbind_mp = 1995 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1996 if (so->so_unbind_mp == NULL) { 1997 error = EINTR; 1998 goto done; 1999 } 2000 } 2001 2002 /* 2003 * Can't have done a listen before connecting. 2004 */ 2005 if (so->so_state & SS_ACCEPTCONN) { 2006 error = EOPNOTSUPP; 2007 goto done; 2008 } 2009 2010 /* 2011 * Must be bound with the transport 2012 */ 2013 if (!(so->so_state & SS_ISBOUND)) { 2014 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2015 /*CONSTCOND*/ 2016 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2017 /* 2018 * Optimization for AF_INET{,6} transports 2019 * that can handle a T_CONN_REQ without being bound. 2020 */ 2021 so_automatic_bind(so); 2022 } else { 2023 error = sotpi_bind(so, NULL, 0, 2024 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 2025 if (error) 2026 goto done; 2027 } 2028 ASSERT(so->so_state & SS_ISBOUND); 2029 flags |= _SOCONNECT_DID_BIND; 2030 } 2031 2032 /* 2033 * Handle a connect to a name parameter of type AF_UNSPEC like a 2034 * connect to a null address. This is the portable method to 2035 * unconnect a socket. 2036 */ 2037 if ((namelen >= sizeof (sa_family_t)) && 2038 (name->sa_family == AF_UNSPEC)) { 2039 name = NULL; 2040 namelen = 0; 2041 } 2042 2043 /* 2044 * Check that we are not already connected. 2045 * A connection-oriented socket cannot be reconnected. 2046 * A connected connection-less socket can be 2047 * - connected to a different address by a subsequent connect 2048 * - "unconnected" by a connect to the NULL address 2049 */ 2050 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2051 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2052 if (so->so_mode & SM_CONNREQUIRED) { 2053 /* Connection-oriented socket */ 2054 error = so->so_state & SS_ISCONNECTED ? 2055 EISCONN : EALREADY; 2056 goto done; 2057 } 2058 /* Connection-less socket */ 2059 if (name == NULL) { 2060 /* 2061 * Remove the connected state and clear SO_DGRAM_ERRIND 2062 * since it was set when the socket was connected. 2063 * If this is UDP also send down a T_DISCON_REQ. 2064 */ 2065 int val; 2066 2067 if ((so->so_family == AF_INET || 2068 so->so_family == AF_INET6) && 2069 (so->so_type == SOCK_DGRAM || 2070 so->so_type == SOCK_RAW) && 2071 /*CONSTCOND*/ 2072 !soconnect_tpi_udp) { 2073 /* XXX What about implicitly unbinding here? */ 2074 error = sodisconnect(so, -1, 2075 _SODISCONNECT_LOCK_HELD); 2076 } else { 2077 so->so_state &= 2078 ~(SS_ISCONNECTED | SS_ISCONNECTING | 2079 SS_FADDR_VALID); 2080 so->so_faddr_len = 0; 2081 } 2082 2083 so_unlock_single(so, SOLOCKED); 2084 mutex_exit(&so->so_lock); 2085 2086 val = 0; 2087 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2088 &val, (t_uscalar_t)sizeof (val)); 2089 2090 mutex_enter(&so->so_lock); 2091 so_lock_single(so); /* Set SOLOCKED */ 2092 goto done; 2093 } 2094 } 2095 ASSERT(so->so_state & SS_ISBOUND); 2096 2097 if (name == NULL || namelen == 0) { 2098 error = EINVAL; 2099 goto done; 2100 } 2101 /* 2102 * Mark the socket if so_faddr_sa represents the transport level 2103 * address. 2104 */ 2105 if (flags & _SOCONNECT_NOXLATE) { 2106 struct sockaddr_ux *soaddr_ux; 2107 2108 ASSERT(so->so_family == AF_UNIX); 2109 if (namelen != sizeof (struct sockaddr_ux)) { 2110 error = EINVAL; 2111 goto done; 2112 } 2113 soaddr_ux = (struct sockaddr_ux *)name; 2114 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2115 namelen = sizeof (soaddr_ux->sou_addr); 2116 so->so_state |= SS_FADDR_NOXLATE; 2117 } 2118 2119 /* 2120 * Length and family checks. 2121 */ 2122 error = so_addr_verify(so, name, namelen); 2123 if (error) 2124 goto bad; 2125 2126 /* 2127 * Save foreign address. Needed for AF_UNIX as well as 2128 * transport providers that do not support TI_GETPEERNAME. 2129 * Also used for cached foreign address for TCP and UDP. 2130 */ 2131 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2132 error = EINVAL; 2133 goto done; 2134 } 2135 so->so_faddr_len = (socklen_t)namelen; 2136 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2137 bcopy(name, so->so_faddr_sa, namelen); 2138 so->so_state |= SS_FADDR_VALID; 2139 2140 if (so->so_family == AF_UNIX) { 2141 if (so->so_state & SS_FADDR_NOXLATE) { 2142 /* 2143 * Already have a transport internal address. Do not 2144 * pass any (transport internal) source address. 2145 */ 2146 addr = so->so_faddr_sa; 2147 addrlen = (t_uscalar_t)so->so_faddr_len; 2148 src = NULL; 2149 srclen = 0; 2150 } else { 2151 /* 2152 * Pass the sockaddr_un source address as an option 2153 * and translate the remote address. 2154 * Holding so_lock thus so_laddr_sa can not change. 2155 */ 2156 src = so->so_laddr_sa; 2157 srclen = (t_uscalar_t)so->so_laddr_len; 2158 dprintso(so, 1, 2159 ("sotpi_connect UNIX: srclen %d, src %p\n", 2160 srclen, src)); 2161 error = so_ux_addr_xlate(so, 2162 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2163 (flags & _SOCONNECT_XPG4_2), 2164 &addr, &addrlen); 2165 if (error) 2166 goto bad; 2167 } 2168 } else { 2169 addr = so->so_faddr_sa; 2170 addrlen = (t_uscalar_t)so->so_faddr_len; 2171 src = NULL; 2172 srclen = 0; 2173 } 2174 /* 2175 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2176 * option which asks the transport provider to send T_UDERR_IND 2177 * messages. These T_UDERR_IND messages are used to return connected 2178 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2179 * 2180 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2181 * we send down a T_CONN_REQ. This is needed to let the 2182 * transport assign a local address that is consistent with 2183 * the remote address. Applications depend on a getsockname() 2184 * after a connect() to retrieve the "source" IP address for 2185 * the connected socket. Invalidate the cached local address 2186 * to force getsockname() to enquire of the transport. 2187 */ 2188 if (!(so->so_mode & SM_CONNREQUIRED)) { 2189 /* 2190 * Datagram socket. 2191 */ 2192 int32_t val; 2193 2194 so_unlock_single(so, SOLOCKED); 2195 mutex_exit(&so->so_lock); 2196 2197 val = 1; 2198 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2199 &val, (t_uscalar_t)sizeof (val)); 2200 2201 mutex_enter(&so->so_lock); 2202 so_lock_single(so); /* Set SOLOCKED */ 2203 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2204 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2205 soconnect_tpi_udp) { 2206 soisconnected(so); 2207 goto done; 2208 } 2209 /* 2210 * Send down T_CONN_REQ etc. 2211 * Clear fflag to avoid returning EWOULDBLOCK. 2212 */ 2213 fflag = 0; 2214 ASSERT(so->so_family != AF_UNIX); 2215 so->so_state &= ~SS_LADDR_VALID; 2216 } else if (so->so_laddr_len != 0) { 2217 /* 2218 * If the local address or port was "any" then it may be 2219 * changed by the transport as a result of the 2220 * connect. Invalidate the cached version if we have one. 2221 */ 2222 switch (so->so_family) { 2223 case AF_INET: 2224 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2225 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2226 INADDR_ANY || 2227 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2228 so->so_state &= ~SS_LADDR_VALID; 2229 break; 2230 2231 case AF_INET6: 2232 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2233 if (IN6_IS_ADDR_UNSPECIFIED( 2234 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2235 IN6_IS_ADDR_V4MAPPED_ANY( 2236 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2237 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2238 so->so_state &= ~SS_LADDR_VALID; 2239 break; 2240 2241 default: 2242 break; 2243 } 2244 } 2245 2246 /* 2247 * Check for failure of an earlier call 2248 */ 2249 if (so->so_error != 0) 2250 goto so_bad; 2251 2252 /* 2253 * Send down T_CONN_REQ. Message was allocated above. 2254 */ 2255 conn_req.PRIM_type = T_CONN_REQ; 2256 conn_req.DEST_length = addrlen; 2257 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2258 if (srclen == 0) { 2259 conn_req.OPT_length = 0; 2260 conn_req.OPT_offset = 0; 2261 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2262 soappendmsg(mp, addr, addrlen); 2263 } else { 2264 /* 2265 * There is a AF_UNIX sockaddr_un to include as a source 2266 * address option. 2267 */ 2268 struct T_opthdr toh; 2269 2270 toh.level = SOL_SOCKET; 2271 toh.name = SO_SRCADDR; 2272 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2273 toh.status = 0; 2274 conn_req.OPT_length = 2275 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2276 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2277 _TPI_ALIGN_TOPT(addrlen)); 2278 2279 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2280 soappendmsg(mp, addr, addrlen); 2281 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2282 soappendmsg(mp, &toh, sizeof (toh)); 2283 soappendmsg(mp, src, srclen); 2284 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2285 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2286 } 2287 /* 2288 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2289 * in order to have the right state when the T_CONN_CON shows up. 2290 */ 2291 soisconnecting(so); 2292 mutex_exit(&so->so_lock); 2293 2294 if (audit_active) 2295 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2296 2297 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2298 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2299 mp = NULL; 2300 mutex_enter(&so->so_lock); 2301 if (error != 0) 2302 goto bad; 2303 2304 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2305 goto bad; 2306 2307 /* Allow other threads to access the socket */ 2308 so_unlock_single(so, SOLOCKED); 2309 need_unlock = B_FALSE; 2310 2311 /* 2312 * Wait until we get a T_CONN_CON or an error 2313 */ 2314 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2315 so_lock_single(so); /* Set SOLOCKED */ 2316 need_unlock = B_TRUE; 2317 } 2318 2319 done: 2320 freemsg(mp); 2321 switch (error) { 2322 case EINPROGRESS: 2323 case EALREADY: 2324 case EISCONN: 2325 case EINTR: 2326 /* Non-fatal errors */ 2327 so->so_state &= ~SS_LADDR_VALID; 2328 /* FALLTHRU */ 2329 case 0: 2330 break; 2331 2332 case EHOSTUNREACH: 2333 if (flags & _SOCONNECT_XPG4_2) { 2334 /* 2335 * X/Open specification contains a requirement that 2336 * ENETUNREACH be returned but does not require 2337 * EHOSTUNREACH. In order to keep the test suite 2338 * happy we mess with the errno here. 2339 */ 2340 error = ENETUNREACH; 2341 } 2342 /* FALLTHRU */ 2343 2344 default: 2345 ASSERT(need_unlock); 2346 /* 2347 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2348 * and invalidate local-address cache 2349 */ 2350 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2351 /* A discon_ind might have already unbound us */ 2352 if ((flags & _SOCONNECT_DID_BIND) && 2353 (so->so_state & SS_ISBOUND)) { 2354 int err; 2355 2356 err = sotpi_unbind(so, 0); 2357 /* LINTED - statement has no conseq */ 2358 if (err) { 2359 eprintsoline(so, err); 2360 } 2361 } 2362 break; 2363 } 2364 if (need_unlock) 2365 so_unlock_single(so, SOLOCKED); 2366 mutex_exit(&so->so_lock); 2367 return (error); 2368 2369 so_bad: error = sogeterr(so); 2370 bad: eprintsoline(so, error); 2371 goto done; 2372 } 2373 2374 int 2375 sotpi_shutdown(struct sonode *so, int how) 2376 { 2377 struct T_ordrel_req ordrel_req; 2378 mblk_t *mp; 2379 uint_t old_state, state_change; 2380 int error = 0; 2381 2382 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2383 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2384 2385 mutex_enter(&so->so_lock); 2386 so_lock_single(so); /* Set SOLOCKED */ 2387 2388 /* 2389 * SunOS 4.X has no check for datagram sockets. 2390 * 5.X checks that it is connected (ENOTCONN) 2391 * X/Open requires that we check the connected state. 2392 */ 2393 if (!(so->so_state & SS_ISCONNECTED)) { 2394 if (!xnet_skip_checks) { 2395 error = ENOTCONN; 2396 if (xnet_check_print) { 2397 printf("sockfs: X/Open shutdown check " 2398 "caused ENOTCONN\n"); 2399 } 2400 } 2401 goto done; 2402 } 2403 /* 2404 * Record the current state and then perform any state changes. 2405 * Then use the difference between the old and new states to 2406 * determine which messages need to be sent. 2407 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2408 * duplicate calls to shutdown(). 2409 */ 2410 old_state = so->so_state; 2411 2412 switch (how) { 2413 case 0: 2414 socantrcvmore(so); 2415 break; 2416 case 1: 2417 socantsendmore(so); 2418 break; 2419 case 2: 2420 socantsendmore(so); 2421 socantrcvmore(so); 2422 break; 2423 default: 2424 error = EINVAL; 2425 goto done; 2426 } 2427 2428 /* 2429 * Assumes that the SS_CANT* flags are never cleared in the above code. 2430 */ 2431 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2432 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2433 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2434 2435 switch (state_change) { 2436 case 0: 2437 dprintso(so, 1, 2438 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2439 so->so_state)); 2440 goto done; 2441 2442 case SS_CANTRCVMORE: 2443 mutex_exit(&so->so_lock); 2444 strseteof(SOTOV(so), 1); 2445 /* 2446 * strseteof takes care of read side wakeups, 2447 * pollwakeups, and signals. 2448 */ 2449 /* 2450 * Get the read lock before flushing data to avoid problems 2451 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2452 */ 2453 mutex_enter(&so->so_lock); 2454 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2455 mutex_exit(&so->so_lock); 2456 2457 /* Flush read side queue */ 2458 strflushrq(SOTOV(so), FLUSHALL); 2459 2460 mutex_enter(&so->so_lock); 2461 so_unlock_read(so); /* Clear SOREADLOCKED */ 2462 break; 2463 2464 case SS_CANTSENDMORE: 2465 mutex_exit(&so->so_lock); 2466 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2467 mutex_enter(&so->so_lock); 2468 break; 2469 2470 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2471 mutex_exit(&so->so_lock); 2472 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2473 strseteof(SOTOV(so), 1); 2474 /* 2475 * strseteof takes care of read side wakeups, 2476 * pollwakeups, and signals. 2477 */ 2478 /* 2479 * Get the read lock before flushing data to avoid problems 2480 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2481 */ 2482 mutex_enter(&so->so_lock); 2483 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2484 mutex_exit(&so->so_lock); 2485 2486 /* Flush read side queue */ 2487 strflushrq(SOTOV(so), FLUSHALL); 2488 2489 mutex_enter(&so->so_lock); 2490 so_unlock_read(so); /* Clear SOREADLOCKED */ 2491 break; 2492 } 2493 2494 ASSERT(MUTEX_HELD(&so->so_lock)); 2495 2496 /* 2497 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2498 * was set due to this call and the new state has both of them set: 2499 * Send the AF_UNIX close indication 2500 * For T_COTS send a discon_ind 2501 * 2502 * If cantsend was set due to this call: 2503 * For T_COTSORD send an ordrel_ind 2504 * 2505 * Note that for T_CLTS there is no message sent here. 2506 */ 2507 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2508 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2509 /* 2510 * For SunOS 4.X compatibility we tell the other end 2511 * that we are unable to receive at this point. 2512 */ 2513 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2514 so_unix_close(so); 2515 2516 if (so->so_serv_type == T_COTS) 2517 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2518 } 2519 if ((state_change & SS_CANTSENDMORE) && 2520 (so->so_serv_type == T_COTS_ORD)) { 2521 /* Send an orderly release */ 2522 ordrel_req.PRIM_type = T_ORDREL_REQ; 2523 2524 mutex_exit(&so->so_lock); 2525 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2526 0, _ALLOC_SLEEP); 2527 /* 2528 * Send down the T_ORDREL_REQ even if there is flow control. 2529 * This prevents shutdown from blocking. 2530 * Note that there is no T_OK_ACK for ordrel_req. 2531 */ 2532 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2533 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2534 mutex_enter(&so->so_lock); 2535 if (error) { 2536 eprintsoline(so, error); 2537 goto done; 2538 } 2539 } 2540 2541 done: 2542 so_unlock_single(so, SOLOCKED); 2543 mutex_exit(&so->so_lock); 2544 return (error); 2545 } 2546 2547 /* 2548 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2549 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2550 * that we have closed. 2551 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2552 * T_UNITDATA_REQ containing the same option. 2553 * 2554 * For SOCK_DGRAM half-connections (somebody connected to this end 2555 * but this end is not connect) we don't know where to send any 2556 * SO_UNIX_CLOSE. 2557 * 2558 * We have to ignore stream head errors just in case there has been 2559 * a shutdown(output). 2560 * Ignore any flow control to try to get the message more quickly to the peer. 2561 * While locally ignoring flow control solves the problem when there 2562 * is only the loopback transport on the stream it would not provide 2563 * the correct AF_UNIX socket semantics when one or more modules have 2564 * been pushed. 2565 */ 2566 void 2567 so_unix_close(struct sonode *so) 2568 { 2569 int error; 2570 struct T_opthdr toh; 2571 mblk_t *mp; 2572 2573 ASSERT(MUTEX_HELD(&so->so_lock)); 2574 2575 ASSERT(so->so_family == AF_UNIX); 2576 2577 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2578 (SS_ISCONNECTED|SS_ISBOUND)) 2579 return; 2580 2581 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2582 (void *)so, pr_state(so->so_state, so->so_mode))); 2583 2584 toh.level = SOL_SOCKET; 2585 toh.name = SO_UNIX_CLOSE; 2586 2587 /* zero length + header */ 2588 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2589 toh.status = 0; 2590 2591 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2592 struct T_optdata_req tdr; 2593 2594 tdr.PRIM_type = T_OPTDATA_REQ; 2595 tdr.DATA_flag = 0; 2596 2597 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2598 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2599 2600 /* NOTE: holding so_lock while sleeping */ 2601 mp = soallocproto2(&tdr, sizeof (tdr), 2602 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2603 } else { 2604 struct T_unitdata_req tudr; 2605 void *addr; 2606 socklen_t addrlen; 2607 void *src; 2608 socklen_t srclen; 2609 struct T_opthdr toh2; 2610 t_scalar_t size; 2611 2612 /* Connecteded DGRAM socket */ 2613 2614 /* 2615 * For AF_UNIX the destination address is translated to 2616 * an internal name and the source address is passed as 2617 * an option. 2618 */ 2619 /* 2620 * Length and family checks. 2621 */ 2622 error = so_addr_verify(so, so->so_faddr_sa, 2623 (t_uscalar_t)so->so_faddr_len); 2624 if (error) { 2625 eprintsoline(so, error); 2626 return; 2627 } 2628 if (so->so_state & SS_FADDR_NOXLATE) { 2629 /* 2630 * Already have a transport internal address. Do not 2631 * pass any (transport internal) source address. 2632 */ 2633 addr = so->so_faddr_sa; 2634 addrlen = (t_uscalar_t)so->so_faddr_len; 2635 src = NULL; 2636 srclen = 0; 2637 } else { 2638 /* 2639 * Pass the sockaddr_un source address as an option 2640 * and translate the remote address. 2641 * Holding so_lock thus so_laddr_sa can not change. 2642 */ 2643 src = so->so_laddr_sa; 2644 srclen = (socklen_t)so->so_laddr_len; 2645 dprintso(so, 1, 2646 ("so_ux_close: srclen %d, src %p\n", 2647 srclen, src)); 2648 error = so_ux_addr_xlate(so, 2649 so->so_faddr_sa, 2650 (socklen_t)so->so_faddr_len, 0, 2651 &addr, &addrlen); 2652 if (error) { 2653 eprintsoline(so, error); 2654 return; 2655 } 2656 } 2657 tudr.PRIM_type = T_UNITDATA_REQ; 2658 tudr.DEST_length = addrlen; 2659 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2660 if (srclen == 0) { 2661 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2662 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2663 _TPI_ALIGN_TOPT(addrlen)); 2664 2665 size = tudr.OPT_offset + tudr.OPT_length; 2666 /* NOTE: holding so_lock while sleeping */ 2667 mp = soallocproto2(&tudr, sizeof (tudr), 2668 addr, addrlen, size, _ALLOC_SLEEP); 2669 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2670 soappendmsg(mp, &toh, sizeof (toh)); 2671 } else { 2672 /* 2673 * There is a AF_UNIX sockaddr_un to include as a 2674 * source address option. 2675 */ 2676 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2677 _TPI_ALIGN_TOPT(srclen)); 2678 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2679 _TPI_ALIGN_TOPT(addrlen)); 2680 2681 toh2.level = SOL_SOCKET; 2682 toh2.name = SO_SRCADDR; 2683 toh2.len = (t_uscalar_t)(srclen + 2684 sizeof (struct T_opthdr)); 2685 toh2.status = 0; 2686 2687 size = tudr.OPT_offset + tudr.OPT_length; 2688 2689 /* NOTE: holding so_lock while sleeping */ 2690 mp = soallocproto2(&tudr, sizeof (tudr), 2691 addr, addrlen, size, _ALLOC_SLEEP); 2692 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2693 soappendmsg(mp, &toh, sizeof (toh)); 2694 soappendmsg(mp, &toh2, sizeof (toh2)); 2695 soappendmsg(mp, src, srclen); 2696 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2697 } 2698 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2699 } 2700 mutex_exit(&so->so_lock); 2701 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2702 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2703 mutex_enter(&so->so_lock); 2704 } 2705 2706 /* 2707 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2708 */ 2709 int 2710 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2711 { 2712 mblk_t *mp, *nmp; 2713 int error; 2714 2715 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", 2716 (void *)so, (void *)msg, flags)); 2717 2718 /* 2719 * There is never any oob data with addresses or control since 2720 * the T_EXDATA_IND does not carry any options. 2721 */ 2722 msg->msg_controllen = 0; 2723 msg->msg_namelen = 0; 2724 2725 mutex_enter(&so->so_lock); 2726 ASSERT(so_verify_oobstate(so)); 2727 if ((so->so_options & SO_OOBINLINE) || 2728 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2729 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2730 mutex_exit(&so->so_lock); 2731 return (EINVAL); 2732 } 2733 if (!(so->so_state & SS_HAVEOOBDATA)) { 2734 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2735 mutex_exit(&so->so_lock); 2736 return (EWOULDBLOCK); 2737 } 2738 ASSERT(so->so_oobmsg != NULL); 2739 mp = so->so_oobmsg; 2740 if (flags & MSG_PEEK) { 2741 /* 2742 * Since recv* can not return ENOBUFS we can not use dupmsg. 2743 * Instead we revert to the consolidation private 2744 * allocb_wait plus bcopy. 2745 */ 2746 mblk_t *mp1; 2747 2748 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2749 ASSERT(mp1); 2750 2751 while (mp != NULL) { 2752 ssize_t size; 2753 2754 size = MBLKL(mp); 2755 bcopy(mp->b_rptr, mp1->b_wptr, size); 2756 mp1->b_wptr += size; 2757 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2758 mp = mp->b_cont; 2759 } 2760 mp = mp1; 2761 } else { 2762 /* 2763 * Update the state indicating that the data has been consumed. 2764 * Keep SS_OOBPEND set until data is consumed past the mark. 2765 */ 2766 so->so_oobmsg = NULL; 2767 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2768 } 2769 dprintso(so, 1, 2770 ("after recvoob(%p): counts %d/%d state %s\n", 2771 (void *)so, so->so_oobsigcnt, 2772 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2773 ASSERT(so_verify_oobstate(so)); 2774 mutex_exit(&so->so_lock); 2775 2776 error = 0; 2777 nmp = mp; 2778 while (nmp != NULL && uiop->uio_resid > 0) { 2779 ssize_t n = MBLKL(nmp); 2780 2781 n = MIN(n, uiop->uio_resid); 2782 if (n > 0) 2783 error = uiomove(nmp->b_rptr, n, 2784 UIO_READ, uiop); 2785 if (error) 2786 break; 2787 nmp = nmp->b_cont; 2788 } 2789 freemsg(mp); 2790 return (error); 2791 } 2792 2793 /* 2794 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2795 * In addition, the caller typically verifies that there is some 2796 * potential state to clear by checking 2797 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2798 * before calling this routine. 2799 * Note that such a check can be made without holding so_lock since 2800 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2801 * decrements so_oobsigcnt. 2802 * 2803 * When data is read *after* the point that all pending 2804 * oob data has been consumed the oob indication is cleared. 2805 * 2806 * This logic keeps select/poll returning POLLRDBAND and 2807 * SIOCATMARK returning true until we have read past 2808 * the mark. 2809 */ 2810 static void 2811 sorecv_update_oobstate(struct sonode *so) 2812 { 2813 mutex_enter(&so->so_lock); 2814 ASSERT(so_verify_oobstate(so)); 2815 dprintso(so, 1, 2816 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2817 so->so_oobsigcnt, 2818 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2819 if (so->so_oobsigcnt == 0) { 2820 /* No more pending oob indications */ 2821 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2822 freemsg(so->so_oobmsg); 2823 so->so_oobmsg = NULL; 2824 } 2825 ASSERT(so_verify_oobstate(so)); 2826 mutex_exit(&so->so_lock); 2827 } 2828 2829 /* 2830 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2831 */ 2832 static int 2833 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2834 { 2835 int error = 0; 2836 mblk_t *tmp = NULL; 2837 mblk_t *pmp = NULL; 2838 mblk_t *nmp = so->so_nl7c_rcv_mp; 2839 2840 ASSERT(nmp != NULL); 2841 2842 while (nmp != NULL && uiop->uio_resid > 0) { 2843 ssize_t n; 2844 2845 if (DB_TYPE(nmp) == M_DATA) { 2846 /* 2847 * We have some data, uiomove up to resid bytes. 2848 */ 2849 n = MIN(MBLKL(nmp), uiop->uio_resid); 2850 if (n > 0) 2851 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2852 nmp->b_rptr += n; 2853 if (nmp->b_rptr == nmp->b_wptr) { 2854 pmp = nmp; 2855 nmp = nmp->b_cont; 2856 } 2857 if (error) 2858 break; 2859 } else { 2860 /* 2861 * We only handle data, save for caller to handle. 2862 */ 2863 if (pmp != NULL) { 2864 pmp->b_cont = nmp->b_cont; 2865 } 2866 nmp->b_cont = NULL; 2867 if (*rmp == NULL) { 2868 *rmp = nmp; 2869 } else { 2870 tmp->b_cont = nmp; 2871 } 2872 nmp = nmp->b_cont; 2873 tmp = nmp; 2874 } 2875 } 2876 if (pmp != NULL) { 2877 /* Free any mblk_t(s) which we have consumed */ 2878 pmp->b_cont = NULL; 2879 freemsg(so->so_nl7c_rcv_mp); 2880 } 2881 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2882 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 2883 if (error == 0) { 2884 rval_t *p = (rval_t *)&so->so_nl7c_rcv_rval; 2885 2886 error = p->r_v.r_v2; 2887 p->r_v.r_v2 = 0; 2888 } 2889 rp->r_vals = so->so_nl7c_rcv_rval; 2890 so->so_nl7c_rcv_rval = 0; 2891 } else { 2892 /* More mblk_t(s) to process so no rval to return */ 2893 rp->r_vals = 0; 2894 } 2895 return (error); 2896 } 2897 2898 /* 2899 * Receive the next message on the queue. 2900 * If msg_controllen is non-zero when called the caller is interested in 2901 * any received control info (options). 2902 * If msg_namelen is non-zero when called the caller is interested in 2903 * any received source address. 2904 * The routine returns with msg_control and msg_name pointing to 2905 * kmem_alloc'ed memory which the caller has to free. 2906 */ 2907 int 2908 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2909 { 2910 union T_primitives *tpr; 2911 mblk_t *mp; 2912 uchar_t pri; 2913 int pflag, opflag; 2914 void *control; 2915 t_uscalar_t controllen; 2916 t_uscalar_t namelen; 2917 int so_state = so->so_state; /* Snapshot */ 2918 ssize_t saved_resid; 2919 rval_t rval; 2920 int flags; 2921 clock_t timout; 2922 int first; 2923 int error = 0; 2924 struct uio *suiop = NULL; 2925 sodirect_t *sodp = so->so_direct; 2926 2927 flags = msg->msg_flags; 2928 msg->msg_flags = 0; 2929 2930 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2931 (void *)so, (void *)msg, flags, 2932 pr_state(so->so_state, so->so_mode), so->so_error)); 2933 2934 /* 2935 * If we are not connected because we have never been connected 2936 * we return ENOTCONN. If we have been connected (but are no longer 2937 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2938 * the EOF. 2939 * 2940 * An alternative would be to post an ENOTCONN error in stream head 2941 * (read+write) and clear it when we're connected. However, that error 2942 * would cause incorrect poll/select behavior! 2943 */ 2944 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2945 (so->so_mode & SM_CONNREQUIRED)) { 2946 return (ENOTCONN); 2947 } 2948 2949 /* 2950 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2951 * after checking that the read queue is empty) and returns zero. 2952 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2953 * is zero. 2954 */ 2955 2956 if (flags & MSG_OOB) { 2957 /* Check that the transport supports OOB */ 2958 if (!(so->so_mode & SM_EXDATA)) 2959 return (EOPNOTSUPP); 2960 return (sorecvoob(so, msg, uiop, flags)); 2961 } 2962 2963 /* 2964 * Set msg_controllen and msg_namelen to zero here to make it 2965 * simpler in the cases that no control or name is returned. 2966 */ 2967 controllen = msg->msg_controllen; 2968 namelen = msg->msg_namelen; 2969 msg->msg_controllen = 0; 2970 msg->msg_namelen = 0; 2971 2972 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2973 namelen, controllen)); 2974 2975 mutex_enter(&so->so_lock); 2976 /* 2977 * If an NL7C enabled socket and not waiting for write data. 2978 */ 2979 if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 2980 NL7C_ENABLED) { 2981 if (so->so_nl7c_uri) { 2982 /* Close uri processing for a previous request */ 2983 nl7c_close(so); 2984 } 2985 if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) { 2986 /* Nothing to process, EOF */ 2987 mutex_exit(&so->so_lock); 2988 return (0); 2989 } else if (so->so_nl7c_flags & NL7C_SOPERSIST) { 2990 /* Persistent NL7C socket, try to process request */ 2991 boolean_t ret; 2992 2993 ret = nl7c_process(so, 2994 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 2995 rval.r_vals = so->so_nl7c_rcv_rval; 2996 error = rval.r_v.r_v2; 2997 if (error) { 2998 /* Error of some sort, return it */ 2999 mutex_exit(&so->so_lock); 3000 return (error); 3001 } 3002 if (so->so_nl7c_flags && 3003 ! (so->so_nl7c_flags & NL7C_WAITWRITE)) { 3004 /* 3005 * Still an NL7C socket and no data 3006 * to pass up to the caller. 3007 */ 3008 mutex_exit(&so->so_lock); 3009 if (ret) { 3010 /* EOF */ 3011 return (0); 3012 } else { 3013 /* Need more data */ 3014 return (EAGAIN); 3015 } 3016 } 3017 } else { 3018 /* 3019 * Not persistent so no further NL7C processing. 3020 */ 3021 so->so_nl7c_flags = 0; 3022 } 3023 } 3024 /* 3025 * Only one reader is allowed at any given time. This is needed 3026 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3027 * 3028 * This is slightly different that BSD behavior in that it fails with 3029 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3030 * is single-threaded using sblock(), which is dropped while waiting 3031 * for data to appear. The difference shows up e.g. if one 3032 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3033 * does use nonblocking io and different threads are reading each 3034 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3035 * in this case as long as the read queue doesn't get empty. 3036 * In this implementation the thread using nonblocking io can 3037 * get an EWOULDBLOCK error due to the blocking thread executing 3038 * e.g. in the uiomove in kstrgetmsg. 3039 * This difference is not believed to be significant. 3040 */ 3041 /* Set SOREADLOCKED */ 3042 error = so_lock_read_intr(so, 3043 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3044 mutex_exit(&so->so_lock); 3045 if (error) 3046 return (error); 3047 3048 /* 3049 * Tell kstrgetmsg to not inspect the stream head errors until all 3050 * queued data has been consumed. 3051 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3052 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3053 * 3054 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3055 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3056 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3057 */ 3058 pflag = MSG_ANY | MSG_DELAYERROR; 3059 if (flags & MSG_PEEK) { 3060 pflag |= MSG_IPEEK; 3061 flags &= ~MSG_WAITALL; 3062 } 3063 if (so->so_mode & SM_ATOMIC) 3064 pflag |= MSG_DISCARDTAIL; 3065 3066 if (flags & MSG_DONTWAIT) 3067 timout = 0; 3068 else 3069 timout = -1; 3070 opflag = pflag; 3071 first = 1; 3072 3073 if (uiop->uio_resid >= uioasync.mincnt && 3074 sodp != NULL && (sodp->sod_state & SOD_ENABLED) && 3075 uioasync.enabled && !(flags & MSG_PEEK) && 3076 !(so_state & SS_CANTRCVMORE)) { 3077 /* 3078 * Big enough I/O for uioa min setup and an sodirect socket 3079 * and sodirect enabled and uioa enabled and I/O will be done 3080 * and not EOF so initialize the sodirect_t uioa_t with "uiop". 3081 */ 3082 mutex_enter(sodp->sod_lock); 3083 if (!uioainit(uiop, &sodp->sod_uioa)) { 3084 /* 3085 * Successful uioainit() so the uio_t part of the 3086 * uioa_t will be used for all uio_t work to follow, 3087 * we save the original "uiop" in "suiop". 3088 */ 3089 suiop = uiop; 3090 uiop = (uio_t *)&sodp->sod_uioa; 3091 /* 3092 * Before returning to the caller the passed in uio_t 3093 * "uiop" will be updated via a call to uioafini() 3094 * below. 3095 * 3096 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED 3097 * here as first we have to uioamove() any currently 3098 * queued M_DATA mblk_t(s) so it will be done in 3099 * kstrgetmsg(). 3100 */ 3101 } 3102 /* 3103 * In either uioainit() success or not case note the number 3104 * of uio bytes the caller wants for sod framework and/or 3105 * transport (e.g. TCP) strategy. 3106 */ 3107 sodp->sod_want = uiop->uio_resid; 3108 mutex_exit(sodp->sod_lock); 3109 } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { 3110 /* 3111 * No uioa but still using sodirect so note the number of 3112 * uio bytes the caller wants for sodirect framework and/or 3113 * transport (e.g. TCP) strategy. 3114 * 3115 * Note, sod_lock not held, only writer is in this function 3116 * and only one thread at a time so not needed just to init. 3117 */ 3118 sodp->sod_want = uiop->uio_resid; 3119 } 3120 retry: 3121 saved_resid = uiop->uio_resid; 3122 pri = 0; 3123 mp = NULL; 3124 if (so->so_nl7c_rcv_mp != NULL) { 3125 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3126 error = nl7c_sorecv(so, &mp, uiop, &rval); 3127 } else { 3128 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3129 timout, &rval); 3130 } 3131 if (error) { 3132 switch (error) { 3133 case EINTR: 3134 case EWOULDBLOCK: 3135 if (!first) 3136 error = 0; 3137 break; 3138 case ETIME: 3139 /* Returned from kstrgetmsg when timeout expires */ 3140 if (!first) 3141 error = 0; 3142 else 3143 error = EWOULDBLOCK; 3144 break; 3145 default: 3146 eprintsoline(so, error); 3147 break; 3148 } 3149 goto out; 3150 } 3151 /* 3152 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3153 * For non-datagrams MOREDATA is used to set MSG_EOR. 3154 */ 3155 ASSERT(!(rval.r_val1 & MORECTL)); 3156 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3157 msg->msg_flags |= MSG_TRUNC; 3158 3159 if (mp == NULL) { 3160 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3161 /* 3162 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3163 * The draft Posix socket spec states that the mark should 3164 * not be cleared when peeking. We follow the latter. 3165 */ 3166 if ((so->so_state & 3167 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3168 (uiop->uio_resid != saved_resid) && 3169 !(flags & MSG_PEEK)) { 3170 sorecv_update_oobstate(so); 3171 } 3172 3173 mutex_enter(&so->so_lock); 3174 /* Set MSG_EOR based on MOREDATA */ 3175 if (!(rval.r_val1 & MOREDATA)) { 3176 if (so->so_state & SS_SAVEDEOR) { 3177 msg->msg_flags |= MSG_EOR; 3178 so->so_state &= ~SS_SAVEDEOR; 3179 } 3180 } 3181 /* 3182 * If some data was received (i.e. not EOF) and the 3183 * read/recv* has not been satisfied wait for some more. 3184 */ 3185 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3186 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3187 mutex_exit(&so->so_lock); 3188 first = 0; 3189 pflag = opflag | MSG_NOMARK; 3190 goto retry; 3191 } 3192 goto out_locked; 3193 } 3194 3195 /* strsock_proto has already verified length and alignment */ 3196 tpr = (union T_primitives *)mp->b_rptr; 3197 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3198 3199 switch (tpr->type) { 3200 case T_DATA_IND: { 3201 if ((so->so_state & 3202 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3203 (uiop->uio_resid != saved_resid) && 3204 !(flags & MSG_PEEK)) { 3205 sorecv_update_oobstate(so); 3206 } 3207 3208 /* 3209 * Set msg_flags to MSG_EOR based on 3210 * MORE_flag and MOREDATA. 3211 */ 3212 mutex_enter(&so->so_lock); 3213 so->so_state &= ~SS_SAVEDEOR; 3214 if (!(tpr->data_ind.MORE_flag & 1)) { 3215 if (!(rval.r_val1 & MOREDATA)) 3216 msg->msg_flags |= MSG_EOR; 3217 else 3218 so->so_state |= SS_SAVEDEOR; 3219 } 3220 freemsg(mp); 3221 /* 3222 * If some data was received (i.e. not EOF) and the 3223 * read/recv* has not been satisfied wait for some more. 3224 */ 3225 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3226 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3227 mutex_exit(&so->so_lock); 3228 first = 0; 3229 pflag = opflag | MSG_NOMARK; 3230 goto retry; 3231 } 3232 goto out_locked; 3233 } 3234 case T_UNITDATA_IND: { 3235 void *addr; 3236 t_uscalar_t addrlen; 3237 void *abuf; 3238 t_uscalar_t optlen; 3239 void *opt; 3240 3241 if ((so->so_state & 3242 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3243 (uiop->uio_resid != saved_resid) && 3244 !(flags & MSG_PEEK)) { 3245 sorecv_update_oobstate(so); 3246 } 3247 3248 if (namelen != 0) { 3249 /* Caller wants source address */ 3250 addrlen = tpr->unitdata_ind.SRC_length; 3251 addr = sogetoff(mp, 3252 tpr->unitdata_ind.SRC_offset, 3253 addrlen, 1); 3254 if (addr == NULL) { 3255 freemsg(mp); 3256 error = EPROTO; 3257 eprintsoline(so, error); 3258 goto out; 3259 } 3260 if (so->so_family == AF_UNIX) { 3261 /* 3262 * Can not use the transport level address. 3263 * If there is a SO_SRCADDR option carrying 3264 * the socket level address it will be 3265 * extracted below. 3266 */ 3267 addr = NULL; 3268 addrlen = 0; 3269 } 3270 } 3271 optlen = tpr->unitdata_ind.OPT_length; 3272 if (optlen != 0) { 3273 t_uscalar_t ncontrollen; 3274 3275 /* 3276 * Extract any source address option. 3277 * Determine how large cmsg buffer is needed. 3278 */ 3279 opt = sogetoff(mp, 3280 tpr->unitdata_ind.OPT_offset, 3281 optlen, __TPI_ALIGN_SIZE); 3282 3283 if (opt == NULL) { 3284 freemsg(mp); 3285 error = EPROTO; 3286 eprintsoline(so, error); 3287 goto out; 3288 } 3289 if (so->so_family == AF_UNIX) 3290 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3291 ncontrollen = so_cmsglen(mp, opt, optlen, 3292 !(flags & MSG_XPG4_2)); 3293 if (controllen != 0) 3294 controllen = ncontrollen; 3295 else if (ncontrollen != 0) 3296 msg->msg_flags |= MSG_CTRUNC; 3297 } else { 3298 controllen = 0; 3299 } 3300 3301 if (namelen != 0) { 3302 /* 3303 * Return address to caller. 3304 * Caller handles truncation if length 3305 * exceeds msg_namelen. 3306 * NOTE: AF_UNIX NUL termination is ensured by 3307 * the sender's copyin_name(). 3308 */ 3309 abuf = kmem_alloc(addrlen, KM_SLEEP); 3310 3311 bcopy(addr, abuf, addrlen); 3312 msg->msg_name = abuf; 3313 msg->msg_namelen = addrlen; 3314 } 3315 3316 if (controllen != 0) { 3317 /* 3318 * Return control msg to caller. 3319 * Caller handles truncation if length 3320 * exceeds msg_controllen. 3321 */ 3322 control = kmem_zalloc(controllen, KM_SLEEP); 3323 3324 error = so_opt2cmsg(mp, opt, optlen, 3325 !(flags & MSG_XPG4_2), 3326 control, controllen); 3327 if (error) { 3328 freemsg(mp); 3329 if (msg->msg_namelen != 0) 3330 kmem_free(msg->msg_name, 3331 msg->msg_namelen); 3332 kmem_free(control, controllen); 3333 eprintsoline(so, error); 3334 goto out; 3335 } 3336 msg->msg_control = control; 3337 msg->msg_controllen = controllen; 3338 } 3339 3340 freemsg(mp); 3341 goto out; 3342 } 3343 case T_OPTDATA_IND: { 3344 struct T_optdata_req *tdr; 3345 void *opt; 3346 t_uscalar_t optlen; 3347 3348 if ((so->so_state & 3349 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3350 (uiop->uio_resid != saved_resid) && 3351 !(flags & MSG_PEEK)) { 3352 sorecv_update_oobstate(so); 3353 } 3354 3355 tdr = (struct T_optdata_req *)mp->b_rptr; 3356 optlen = tdr->OPT_length; 3357 if (optlen != 0) { 3358 t_uscalar_t ncontrollen; 3359 /* 3360 * Determine how large cmsg buffer is needed. 3361 */ 3362 opt = sogetoff(mp, 3363 tpr->optdata_ind.OPT_offset, 3364 optlen, __TPI_ALIGN_SIZE); 3365 3366 if (opt == NULL) { 3367 freemsg(mp); 3368 error = EPROTO; 3369 eprintsoline(so, error); 3370 goto out; 3371 } 3372 3373 ncontrollen = so_cmsglen(mp, opt, optlen, 3374 !(flags & MSG_XPG4_2)); 3375 if (controllen != 0) 3376 controllen = ncontrollen; 3377 else if (ncontrollen != 0) 3378 msg->msg_flags |= MSG_CTRUNC; 3379 } else { 3380 controllen = 0; 3381 } 3382 3383 if (controllen != 0) { 3384 /* 3385 * Return control msg to caller. 3386 * Caller handles truncation if length 3387 * exceeds msg_controllen. 3388 */ 3389 control = kmem_zalloc(controllen, KM_SLEEP); 3390 3391 error = so_opt2cmsg(mp, opt, optlen, 3392 !(flags & MSG_XPG4_2), 3393 control, controllen); 3394 if (error) { 3395 freemsg(mp); 3396 kmem_free(control, controllen); 3397 eprintsoline(so, error); 3398 goto out; 3399 } 3400 msg->msg_control = control; 3401 msg->msg_controllen = controllen; 3402 } 3403 3404 /* 3405 * Set msg_flags to MSG_EOR based on 3406 * DATA_flag and MOREDATA. 3407 */ 3408 mutex_enter(&so->so_lock); 3409 so->so_state &= ~SS_SAVEDEOR; 3410 if (!(tpr->data_ind.MORE_flag & 1)) { 3411 if (!(rval.r_val1 & MOREDATA)) 3412 msg->msg_flags |= MSG_EOR; 3413 else 3414 so->so_state |= SS_SAVEDEOR; 3415 } 3416 freemsg(mp); 3417 /* 3418 * If some data was received (i.e. not EOF) and the 3419 * read/recv* has not been satisfied wait for some more. 3420 * Not possible to wait if control info was received. 3421 */ 3422 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3423 controllen == 0 && 3424 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3425 mutex_exit(&so->so_lock); 3426 first = 0; 3427 pflag = opflag | MSG_NOMARK; 3428 goto retry; 3429 } 3430 goto out_locked; 3431 } 3432 case T_EXDATA_IND: { 3433 dprintso(so, 1, 3434 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3435 "state %s\n", 3436 so->so_oobsigcnt, so->so_oobcnt, 3437 saved_resid - uiop->uio_resid, 3438 pr_state(so->so_state, so->so_mode))); 3439 /* 3440 * kstrgetmsg handles MSGMARK so there is nothing to 3441 * inspect in the T_EXDATA_IND. 3442 * strsock_proto makes the stream head queue the T_EXDATA_IND 3443 * as a separate message with no M_DATA component. Furthermore, 3444 * the stream head does not consolidate M_DATA messages onto 3445 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3446 * remains a message by itself. This is needed since MSGMARK 3447 * marks both the whole message as well as the last byte 3448 * of the message. 3449 */ 3450 freemsg(mp); 3451 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3452 if (flags & MSG_PEEK) { 3453 /* 3454 * Even though we are peeking we consume the 3455 * T_EXDATA_IND thereby moving the mark information 3456 * to SS_RCVATMARK. Then the oob code below will 3457 * retry the peeking kstrgetmsg. 3458 * Note that the stream head read queue is 3459 * never flushed without holding SOREADLOCKED 3460 * thus the T_EXDATA_IND can not disappear 3461 * underneath us. 3462 */ 3463 dprintso(so, 1, 3464 ("sotpi_recvmsg: consume EXDATA_IND " 3465 "counts %d/%d state %s\n", 3466 so->so_oobsigcnt, 3467 so->so_oobcnt, 3468 pr_state(so->so_state, so->so_mode))); 3469 3470 pflag = MSG_ANY | MSG_DELAYERROR; 3471 if (so->so_mode & SM_ATOMIC) 3472 pflag |= MSG_DISCARDTAIL; 3473 3474 pri = 0; 3475 mp = NULL; 3476 3477 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3478 &pri, &pflag, (clock_t)-1, &rval); 3479 ASSERT(uiop->uio_resid == saved_resid); 3480 3481 if (error) { 3482 #ifdef SOCK_DEBUG 3483 if (error != EWOULDBLOCK && error != EINTR) { 3484 eprintsoline(so, error); 3485 } 3486 #endif /* SOCK_DEBUG */ 3487 goto out; 3488 } 3489 ASSERT(mp); 3490 tpr = (union T_primitives *)mp->b_rptr; 3491 ASSERT(tpr->type == T_EXDATA_IND); 3492 freemsg(mp); 3493 } /* end "if (flags & MSG_PEEK)" */ 3494 3495 /* 3496 * Decrement the number of queued and pending oob. 3497 * 3498 * SS_RCVATMARK is cleared when we read past a mark. 3499 * SS_HAVEOOBDATA is cleared when we've read past the 3500 * last mark. 3501 * SS_OOBPEND is cleared if we've read past the last 3502 * mark and no (new) SIGURG has been posted. 3503 */ 3504 mutex_enter(&so->so_lock); 3505 ASSERT(so_verify_oobstate(so)); 3506 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3507 ASSERT(so->so_oobsigcnt > 0); 3508 so->so_oobsigcnt--; 3509 ASSERT(so->so_oobcnt > 0); 3510 so->so_oobcnt--; 3511 /* 3512 * Since the T_EXDATA_IND has been removed from the stream 3513 * head, but we have not read data past the mark, 3514 * sockfs needs to track that the socket is still at the mark. 3515 * 3516 * Since no data was received call kstrgetmsg again to wait 3517 * for data. 3518 */ 3519 so->so_state |= SS_RCVATMARK; 3520 mutex_exit(&so->so_lock); 3521 dprintso(so, 1, 3522 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3523 so->so_oobsigcnt, so->so_oobcnt, 3524 pr_state(so->so_state, so->so_mode))); 3525 pflag = opflag; 3526 goto retry; 3527 } 3528 default: 3529 ASSERT(0); 3530 freemsg(mp); 3531 error = EPROTO; 3532 eprintsoline(so, error); 3533 goto out; 3534 } 3535 /* NOTREACHED */ 3536 out: 3537 mutex_enter(&so->so_lock); 3538 out_locked: 3539 if (sodp != NULL) { 3540 /* Finish any sodirect and uioa processing */ 3541 mutex_enter(sodp->sod_lock); 3542 if (suiop != NULL) { 3543 /* Finish any uioa_t processing */ 3544 int ret; 3545 3546 ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 3547 ret = uioafini(suiop, (uioa_t *)uiop); 3548 if (error == 0 && ret != 0) { 3549 /* If no error yet, set it */ 3550 error = ret; 3551 } 3552 if ((mp = sodp->sod_uioafh) != NULL) { 3553 sodp->sod_uioafh = NULL; 3554 sodp->sod_uioaft = NULL; 3555 freemsg(mp); 3556 } 3557 } 3558 if (!(sodp->sod_state & SOD_WAKE_NOT)) { 3559 /* Awoke */ 3560 sodp->sod_state &= SOD_WAKE_CLR; 3561 sodp->sod_state |= SOD_WAKE_NOT; 3562 } 3563 /* Last, clear sod_want value */ 3564 sodp->sod_want = 0; 3565 mutex_exit(sodp->sod_lock); 3566 } 3567 so_unlock_read(so); /* Clear SOREADLOCKED */ 3568 mutex_exit(&so->so_lock); 3569 return (error); 3570 } 3571 3572 /* 3573 * Sending data with options on a datagram socket. 3574 * Assumes caller has verified that SS_ISBOUND etc. are set. 3575 */ 3576 static int 3577 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3578 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3579 { 3580 struct T_unitdata_req tudr; 3581 mblk_t *mp; 3582 int error; 3583 void *addr; 3584 socklen_t addrlen; 3585 void *src; 3586 socklen_t srclen; 3587 ssize_t len; 3588 int size; 3589 struct T_opthdr toh; 3590 struct fdbuf *fdbuf; 3591 t_uscalar_t optlen; 3592 void *fds; 3593 int fdlen; 3594 3595 ASSERT(name && namelen); 3596 ASSERT(control && controllen); 3597 3598 len = uiop->uio_resid; 3599 if (len > (ssize_t)so->so_tidu_size) { 3600 return (EMSGSIZE); 3601 } 3602 3603 /* 3604 * For AF_UNIX the destination address is translated to an internal 3605 * name and the source address is passed as an option. 3606 * Also, file descriptors are passed as file pointers in an 3607 * option. 3608 */ 3609 3610 /* 3611 * Length and family checks. 3612 */ 3613 error = so_addr_verify(so, name, namelen); 3614 if (error) { 3615 eprintsoline(so, error); 3616 return (error); 3617 } 3618 if (so->so_family == AF_UNIX) { 3619 if (so->so_state & SS_FADDR_NOXLATE) { 3620 /* 3621 * Already have a transport internal address. Do not 3622 * pass any (transport internal) source address. 3623 */ 3624 addr = name; 3625 addrlen = namelen; 3626 src = NULL; 3627 srclen = 0; 3628 } else { 3629 /* 3630 * Pass the sockaddr_un source address as an option 3631 * and translate the remote address. 3632 * 3633 * Note that this code does not prevent so_laddr_sa 3634 * from changing while it is being used. Thus 3635 * if an unbind+bind occurs concurrently with this 3636 * send the peer might see a partially new and a 3637 * partially old "from" address. 3638 */ 3639 src = so->so_laddr_sa; 3640 srclen = (t_uscalar_t)so->so_laddr_len; 3641 dprintso(so, 1, 3642 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3643 srclen, src)); 3644 error = so_ux_addr_xlate(so, name, namelen, 3645 (flags & MSG_XPG4_2), 3646 &addr, &addrlen); 3647 if (error) { 3648 eprintsoline(so, error); 3649 return (error); 3650 } 3651 } 3652 } else { 3653 addr = name; 3654 addrlen = namelen; 3655 src = NULL; 3656 srclen = 0; 3657 } 3658 optlen = so_optlen(control, controllen, 3659 !(flags & MSG_XPG4_2)); 3660 tudr.PRIM_type = T_UNITDATA_REQ; 3661 tudr.DEST_length = addrlen; 3662 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3663 if (srclen != 0) 3664 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3665 _TPI_ALIGN_TOPT(srclen)); 3666 else 3667 tudr.OPT_length = optlen; 3668 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3669 _TPI_ALIGN_TOPT(addrlen)); 3670 3671 size = tudr.OPT_offset + tudr.OPT_length; 3672 3673 /* 3674 * File descriptors only when SM_FDPASSING set. 3675 */ 3676 error = so_getfdopt(control, controllen, 3677 !(flags & MSG_XPG4_2), &fds, &fdlen); 3678 if (error) 3679 return (error); 3680 if (fdlen != -1) { 3681 if (!(so->so_mode & SM_FDPASSING)) 3682 return (EOPNOTSUPP); 3683 3684 error = fdbuf_create(fds, fdlen, &fdbuf); 3685 if (error) 3686 return (error); 3687 mp = fdbuf_allocmsg(size, fdbuf); 3688 } else { 3689 mp = soallocproto(size, _ALLOC_INTR); 3690 if (mp == NULL) { 3691 /* 3692 * Caught a signal waiting for memory. 3693 * Let send* return EINTR. 3694 */ 3695 return (EINTR); 3696 } 3697 } 3698 soappendmsg(mp, &tudr, sizeof (tudr)); 3699 soappendmsg(mp, addr, addrlen); 3700 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3701 3702 if (fdlen != -1) { 3703 ASSERT(fdbuf != NULL); 3704 toh.level = SOL_SOCKET; 3705 toh.name = SO_FILEP; 3706 toh.len = fdbuf->fd_size + 3707 (t_uscalar_t)sizeof (struct T_opthdr); 3708 toh.status = 0; 3709 soappendmsg(mp, &toh, sizeof (toh)); 3710 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3711 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3712 } 3713 if (srclen != 0) { 3714 /* 3715 * There is a AF_UNIX sockaddr_un to include as a source 3716 * address option. 3717 */ 3718 toh.level = SOL_SOCKET; 3719 toh.name = SO_SRCADDR; 3720 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3721 toh.status = 0; 3722 soappendmsg(mp, &toh, sizeof (toh)); 3723 soappendmsg(mp, src, srclen); 3724 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3725 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3726 } 3727 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3728 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3729 /* At most 3 bytes left in the message */ 3730 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3731 ASSERT(MBLKL(mp) <= (ssize_t)size); 3732 3733 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3734 if (audit_active) 3735 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3736 3737 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3738 #ifdef SOCK_DEBUG 3739 if (error) { 3740 eprintsoline(so, error); 3741 } 3742 #endif /* SOCK_DEBUG */ 3743 return (error); 3744 } 3745 3746 /* 3747 * Sending data with options on a connected stream socket. 3748 * Assumes caller has verified that SS_ISCONNECTED is set. 3749 */ 3750 static int 3751 sosend_svccmsg(struct sonode *so, 3752 struct uio *uiop, 3753 int more, 3754 void *control, 3755 t_uscalar_t controllen, 3756 int flags) 3757 { 3758 struct T_optdata_req tdr; 3759 mblk_t *mp; 3760 int error; 3761 ssize_t iosize; 3762 int first = 1; 3763 int size; 3764 struct fdbuf *fdbuf; 3765 t_uscalar_t optlen; 3766 void *fds; 3767 int fdlen; 3768 struct T_opthdr toh; 3769 3770 dprintso(so, 1, 3771 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3772 3773 /* 3774 * Has to be bound and connected. However, since no locks are 3775 * held the state could have changed after sotpi_sendmsg checked it 3776 * thus it is not possible to ASSERT on the state. 3777 */ 3778 3779 /* Options on connection-oriented only when SM_OPTDATA set. */ 3780 if (!(so->so_mode & SM_OPTDATA)) 3781 return (EOPNOTSUPP); 3782 3783 do { 3784 /* 3785 * Set the MORE flag if uio_resid does not fit in this 3786 * message or if the caller passed in "more". 3787 * Error for transports with zero tidu_size. 3788 */ 3789 tdr.PRIM_type = T_OPTDATA_REQ; 3790 iosize = so->so_tidu_size; 3791 if (iosize <= 0) 3792 return (EMSGSIZE); 3793 if (uiop->uio_resid > iosize) { 3794 tdr.DATA_flag = 1; 3795 } else { 3796 if (more) 3797 tdr.DATA_flag = 1; 3798 else 3799 tdr.DATA_flag = 0; 3800 iosize = uiop->uio_resid; 3801 } 3802 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3803 tdr.DATA_flag, iosize)); 3804 3805 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3806 tdr.OPT_length = optlen; 3807 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3808 3809 size = (int)sizeof (tdr) + optlen; 3810 /* 3811 * File descriptors only when SM_FDPASSING set. 3812 */ 3813 error = so_getfdopt(control, controllen, 3814 !(flags & MSG_XPG4_2), &fds, &fdlen); 3815 if (error) 3816 return (error); 3817 if (fdlen != -1) { 3818 if (!(so->so_mode & SM_FDPASSING)) 3819 return (EOPNOTSUPP); 3820 3821 error = fdbuf_create(fds, fdlen, &fdbuf); 3822 if (error) 3823 return (error); 3824 mp = fdbuf_allocmsg(size, fdbuf); 3825 } else { 3826 mp = soallocproto(size, _ALLOC_INTR); 3827 if (mp == NULL) { 3828 /* 3829 * Caught a signal waiting for memory. 3830 * Let send* return EINTR. 3831 */ 3832 return (first ? EINTR : 0); 3833 } 3834 } 3835 soappendmsg(mp, &tdr, sizeof (tdr)); 3836 3837 if (fdlen != -1) { 3838 ASSERT(fdbuf != NULL); 3839 toh.level = SOL_SOCKET; 3840 toh.name = SO_FILEP; 3841 toh.len = fdbuf->fd_size + 3842 (t_uscalar_t)sizeof (struct T_opthdr); 3843 toh.status = 0; 3844 soappendmsg(mp, &toh, sizeof (toh)); 3845 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3846 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3847 } 3848 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3849 /* At most 3 bytes left in the message */ 3850 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3851 ASSERT(MBLKL(mp) <= (ssize_t)size); 3852 3853 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3854 3855 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3856 0, MSG_BAND, 0); 3857 if (error) { 3858 if (!first && error == EWOULDBLOCK) 3859 return (0); 3860 eprintsoline(so, error); 3861 return (error); 3862 } 3863 control = NULL; 3864 first = 0; 3865 if (uiop->uio_resid > 0) { 3866 /* 3867 * Recheck for fatal errors. Fail write even though 3868 * some data have been written. This is consistent 3869 * with strwrite semantics and BSD sockets semantics. 3870 */ 3871 if (so->so_state & SS_CANTSENDMORE) { 3872 tsignal(curthread, SIGPIPE); 3873 eprintsoline(so, error); 3874 return (EPIPE); 3875 } 3876 if (so->so_error != 0) { 3877 mutex_enter(&so->so_lock); 3878 error = sogeterr(so); 3879 mutex_exit(&so->so_lock); 3880 if (error != 0) { 3881 eprintsoline(so, error); 3882 return (error); 3883 } 3884 } 3885 } 3886 } while (uiop->uio_resid > 0); 3887 return (0); 3888 } 3889 3890 /* 3891 * Sending data on a datagram socket. 3892 * Assumes caller has verified that SS_ISBOUND etc. are set. 3893 * 3894 * For AF_UNIX the destination address is translated to an internal 3895 * name and the source address is passed as an option. 3896 */ 3897 int 3898 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3899 struct uio *uiop, int flags) 3900 { 3901 struct T_unitdata_req tudr; 3902 mblk_t *mp; 3903 int error; 3904 void *addr; 3905 socklen_t addrlen; 3906 void *src; 3907 socklen_t srclen; 3908 ssize_t len; 3909 3910 ASSERT(name != NULL && namelen != 0); 3911 3912 len = uiop->uio_resid; 3913 if (len > so->so_tidu_size) { 3914 error = EMSGSIZE; 3915 goto done; 3916 } 3917 3918 /* Length and family checks */ 3919 error = so_addr_verify(so, name, namelen); 3920 if (error != 0) 3921 goto done; 3922 3923 if (so->so_state & SS_DIRECT) 3924 return (sodgram_direct(so, name, namelen, uiop, flags)); 3925 3926 if (so->so_family == AF_UNIX) { 3927 if (so->so_state & SS_FADDR_NOXLATE) { 3928 /* 3929 * Already have a transport internal address. Do not 3930 * pass any (transport internal) source address. 3931 */ 3932 addr = name; 3933 addrlen = namelen; 3934 src = NULL; 3935 srclen = 0; 3936 } else { 3937 /* 3938 * Pass the sockaddr_un source address as an option 3939 * and translate the remote address. 3940 * 3941 * Note that this code does not prevent so_laddr_sa 3942 * from changing while it is being used. Thus 3943 * if an unbind+bind occurs concurrently with this 3944 * send the peer might see a partially new and a 3945 * partially old "from" address. 3946 */ 3947 src = so->so_laddr_sa; 3948 srclen = (socklen_t)so->so_laddr_len; 3949 dprintso(so, 1, 3950 ("sosend_dgram UNIX: srclen %d, src %p\n", 3951 srclen, src)); 3952 error = so_ux_addr_xlate(so, name, namelen, 3953 (flags & MSG_XPG4_2), 3954 &addr, &addrlen); 3955 if (error) { 3956 eprintsoline(so, error); 3957 goto done; 3958 } 3959 } 3960 } else { 3961 addr = name; 3962 addrlen = namelen; 3963 src = NULL; 3964 srclen = 0; 3965 } 3966 tudr.PRIM_type = T_UNITDATA_REQ; 3967 tudr.DEST_length = addrlen; 3968 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3969 if (srclen == 0) { 3970 tudr.OPT_length = 0; 3971 tudr.OPT_offset = 0; 3972 3973 mp = soallocproto2(&tudr, sizeof (tudr), 3974 addr, addrlen, 0, _ALLOC_INTR); 3975 if (mp == NULL) { 3976 /* 3977 * Caught a signal waiting for memory. 3978 * Let send* return EINTR. 3979 */ 3980 error = EINTR; 3981 goto done; 3982 } 3983 } else { 3984 /* 3985 * There is a AF_UNIX sockaddr_un to include as a source 3986 * address option. 3987 */ 3988 struct T_opthdr toh; 3989 ssize_t size; 3990 3991 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3992 _TPI_ALIGN_TOPT(srclen)); 3993 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3994 _TPI_ALIGN_TOPT(addrlen)); 3995 3996 toh.level = SOL_SOCKET; 3997 toh.name = SO_SRCADDR; 3998 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3999 toh.status = 0; 4000 4001 size = tudr.OPT_offset + tudr.OPT_length; 4002 mp = soallocproto2(&tudr, sizeof (tudr), 4003 addr, addrlen, size, _ALLOC_INTR); 4004 if (mp == NULL) { 4005 /* 4006 * Caught a signal waiting for memory. 4007 * Let send* return EINTR. 4008 */ 4009 error = EINTR; 4010 goto done; 4011 } 4012 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4013 soappendmsg(mp, &toh, sizeof (toh)); 4014 soappendmsg(mp, src, srclen); 4015 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4016 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4017 } 4018 4019 if (audit_active) 4020 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4021 4022 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4023 done: 4024 #ifdef SOCK_DEBUG 4025 if (error) { 4026 eprintsoline(so, error); 4027 } 4028 #endif /* SOCK_DEBUG */ 4029 return (error); 4030 } 4031 4032 /* 4033 * Sending data on a connected stream socket. 4034 * Assumes caller has verified that SS_ISCONNECTED is set. 4035 */ 4036 int 4037 sosend_svc(struct sonode *so, 4038 struct uio *uiop, 4039 t_scalar_t prim, 4040 int more, 4041 int sflag) 4042 { 4043 struct T_data_req tdr; 4044 mblk_t *mp; 4045 int error; 4046 ssize_t iosize; 4047 int first = 1; 4048 4049 dprintso(so, 1, 4050 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4051 (void *)so, uiop->uio_resid, prim, sflag)); 4052 4053 /* 4054 * Has to be bound and connected. However, since no locks are 4055 * held the state could have changed after sotpi_sendmsg checked it 4056 * thus it is not possible to ASSERT on the state. 4057 */ 4058 4059 do { 4060 /* 4061 * Set the MORE flag if uio_resid does not fit in this 4062 * message or if the caller passed in "more". 4063 * Error for transports with zero tidu_size. 4064 */ 4065 tdr.PRIM_type = prim; 4066 iosize = so->so_tidu_size; 4067 if (iosize <= 0) 4068 return (EMSGSIZE); 4069 if (uiop->uio_resid > iosize) { 4070 tdr.MORE_flag = 1; 4071 } else { 4072 if (more) 4073 tdr.MORE_flag = 1; 4074 else 4075 tdr.MORE_flag = 0; 4076 iosize = uiop->uio_resid; 4077 } 4078 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4079 prim, tdr.MORE_flag, iosize)); 4080 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 4081 if (mp == NULL) { 4082 /* 4083 * Caught a signal waiting for memory. 4084 * Let send* return EINTR. 4085 */ 4086 if (first) 4087 return (EINTR); 4088 else 4089 return (0); 4090 } 4091 4092 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4093 0, sflag | MSG_BAND, 0); 4094 if (error) { 4095 if (!first && error == EWOULDBLOCK) 4096 return (0); 4097 eprintsoline(so, error); 4098 return (error); 4099 } 4100 first = 0; 4101 if (uiop->uio_resid > 0) { 4102 /* 4103 * Recheck for fatal errors. Fail write even though 4104 * some data have been written. This is consistent 4105 * with strwrite semantics and BSD sockets semantics. 4106 */ 4107 if (so->so_state & SS_CANTSENDMORE) { 4108 tsignal(curthread, SIGPIPE); 4109 eprintsoline(so, error); 4110 return (EPIPE); 4111 } 4112 if (so->so_error != 0) { 4113 mutex_enter(&so->so_lock); 4114 error = sogeterr(so); 4115 mutex_exit(&so->so_lock); 4116 if (error != 0) { 4117 eprintsoline(so, error); 4118 return (error); 4119 } 4120 } 4121 } 4122 } while (uiop->uio_resid > 0); 4123 return (0); 4124 } 4125 4126 /* 4127 * Check the state for errors and call the appropriate send function. 4128 * 4129 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4130 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4131 * after sending the message. 4132 */ 4133 static int 4134 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 4135 { 4136 int so_state; 4137 int so_mode; 4138 int error; 4139 struct sockaddr *name; 4140 t_uscalar_t namelen; 4141 int dontroute; 4142 int flags; 4143 4144 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4145 (void *)so, (void *)msg, msg->msg_flags, 4146 pr_state(so->so_state, so->so_mode), so->so_error)); 4147 4148 mutex_enter(&so->so_lock); 4149 so_state = so->so_state; 4150 4151 if (so_state & SS_CANTSENDMORE) { 4152 mutex_exit(&so->so_lock); 4153 tsignal(curthread, SIGPIPE); 4154 return (EPIPE); 4155 } 4156 4157 if (so->so_error != 0) { 4158 error = sogeterr(so); 4159 if (error != 0) { 4160 mutex_exit(&so->so_lock); 4161 return (error); 4162 } 4163 } 4164 4165 name = (struct sockaddr *)msg->msg_name; 4166 namelen = msg->msg_namelen; 4167 4168 so_mode = so->so_mode; 4169 4170 if (name == NULL) { 4171 if (!(so_state & SS_ISCONNECTED)) { 4172 mutex_exit(&so->so_lock); 4173 if (so_mode & SM_CONNREQUIRED) 4174 return (ENOTCONN); 4175 else 4176 return (EDESTADDRREQ); 4177 } 4178 if (so_mode & SM_CONNREQUIRED) { 4179 name = NULL; 4180 namelen = 0; 4181 } else { 4182 /* 4183 * Note that this code does not prevent so_faddr_sa 4184 * from changing while it is being used. Thus 4185 * if an "unconnect"+connect occurs concurrently with 4186 * this send the datagram might be delivered to a 4187 * garbaled address. 4188 */ 4189 ASSERT(so->so_faddr_sa); 4190 name = so->so_faddr_sa; 4191 namelen = (t_uscalar_t)so->so_faddr_len; 4192 } 4193 } else { 4194 if (!(so_state & SS_ISCONNECTED) && 4195 (so_mode & SM_CONNREQUIRED)) { 4196 /* Required but not connected */ 4197 mutex_exit(&so->so_lock); 4198 return (ENOTCONN); 4199 } 4200 /* 4201 * Ignore the address on connection-oriented sockets. 4202 * Just like BSD this code does not generate an error for 4203 * TCP (a CONNREQUIRED socket) when sending to an address 4204 * passed in with sendto/sendmsg. Instead the data is 4205 * delivered on the connection as if no address had been 4206 * supplied. 4207 */ 4208 if ((so_state & SS_ISCONNECTED) && 4209 !(so_mode & SM_CONNREQUIRED)) { 4210 mutex_exit(&so->so_lock); 4211 return (EISCONN); 4212 } 4213 if (!(so_state & SS_ISBOUND)) { 4214 so_lock_single(so); /* Set SOLOCKED */ 4215 error = sotpi_bind(so, NULL, 0, 4216 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 4217 so_unlock_single(so, SOLOCKED); 4218 if (error) { 4219 mutex_exit(&so->so_lock); 4220 eprintsoline(so, error); 4221 return (error); 4222 } 4223 } 4224 /* 4225 * Handle delayed datagram errors. These are only queued 4226 * when the application sets SO_DGRAM_ERRIND. 4227 * Return the error if we are sending to the address 4228 * that was returned in the last T_UDERROR_IND. 4229 * If sending to some other address discard the delayed 4230 * error indication. 4231 */ 4232 if (so->so_delayed_error) { 4233 struct T_uderror_ind *tudi; 4234 void *addr; 4235 t_uscalar_t addrlen; 4236 boolean_t match = B_FALSE; 4237 4238 ASSERT(so->so_eaddr_mp); 4239 error = so->so_delayed_error; 4240 so->so_delayed_error = 0; 4241 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4242 addrlen = tudi->DEST_length; 4243 addr = sogetoff(so->so_eaddr_mp, 4244 tudi->DEST_offset, 4245 addrlen, 1); 4246 ASSERT(addr); /* Checked by strsock_proto */ 4247 switch (so->so_family) { 4248 case AF_INET: { 4249 /* Compare just IP address and port */ 4250 sin_t *sin1 = (sin_t *)name; 4251 sin_t *sin2 = (sin_t *)addr; 4252 4253 if (addrlen == sizeof (sin_t) && 4254 namelen == addrlen && 4255 sin1->sin_port == sin2->sin_port && 4256 sin1->sin_addr.s_addr == 4257 sin2->sin_addr.s_addr) 4258 match = B_TRUE; 4259 break; 4260 } 4261 case AF_INET6: { 4262 /* Compare just IP address and port. Not flow */ 4263 sin6_t *sin1 = (sin6_t *)name; 4264 sin6_t *sin2 = (sin6_t *)addr; 4265 4266 if (addrlen == sizeof (sin6_t) && 4267 namelen == addrlen && 4268 sin1->sin6_port == sin2->sin6_port && 4269 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4270 &sin2->sin6_addr)) 4271 match = B_TRUE; 4272 break; 4273 } 4274 case AF_UNIX: 4275 default: 4276 if (namelen == addrlen && 4277 bcmp(name, addr, namelen) == 0) 4278 match = B_TRUE; 4279 } 4280 if (match) { 4281 freemsg(so->so_eaddr_mp); 4282 so->so_eaddr_mp = NULL; 4283 mutex_exit(&so->so_lock); 4284 #ifdef DEBUG 4285 dprintso(so, 0, 4286 ("sockfs delayed error %d for %s\n", 4287 error, 4288 pr_addr(so->so_family, name, namelen))); 4289 #endif /* DEBUG */ 4290 return (error); 4291 } 4292 freemsg(so->so_eaddr_mp); 4293 so->so_eaddr_mp = NULL; 4294 } 4295 } 4296 mutex_exit(&so->so_lock); 4297 4298 flags = msg->msg_flags; 4299 dontroute = 0; 4300 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4301 uint32_t val; 4302 4303 val = 1; 4304 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4305 &val, (t_uscalar_t)sizeof (val)); 4306 if (error) 4307 return (error); 4308 dontroute = 1; 4309 } 4310 4311 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4312 error = EOPNOTSUPP; 4313 goto done; 4314 } 4315 if (msg->msg_controllen != 0) { 4316 if (!(so_mode & SM_CONNREQUIRED)) { 4317 error = sosend_dgramcmsg(so, name, namelen, uiop, 4318 msg->msg_control, msg->msg_controllen, flags); 4319 } else { 4320 if (flags & MSG_OOB) { 4321 /* Can't generate T_EXDATA_REQ with options */ 4322 error = EOPNOTSUPP; 4323 goto done; 4324 } 4325 error = sosend_svccmsg(so, uiop, 4326 !(flags & MSG_EOR), 4327 msg->msg_control, msg->msg_controllen, 4328 flags); 4329 } 4330 goto done; 4331 } 4332 4333 if (!(so_mode & SM_CONNREQUIRED)) { 4334 /* 4335 * If there is no SO_DONTROUTE to turn off return immediately 4336 * from send_dgram. This can allow tail-call optimizations. 4337 */ 4338 if (!dontroute) { 4339 return (sosend_dgram(so, name, namelen, uiop, flags)); 4340 } 4341 error = sosend_dgram(so, name, namelen, uiop, flags); 4342 } else { 4343 t_scalar_t prim; 4344 int sflag; 4345 4346 /* Ignore msg_name in the connected state */ 4347 if (flags & MSG_OOB) { 4348 prim = T_EXDATA_REQ; 4349 /* 4350 * Send down T_EXDATA_REQ even if there is flow 4351 * control for data. 4352 */ 4353 sflag = MSG_IGNFLOW; 4354 } else { 4355 if (so_mode & SM_BYTESTREAM) { 4356 /* Byte stream transport - use write */ 4357 4358 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4359 /* 4360 * If there is no SO_DONTROUTE to turn off, 4361 * SS_DIRECT is on, and there is no flow 4362 * control, we can take the fast path. 4363 */ 4364 if (!dontroute && 4365 (so_state & SS_DIRECT) && 4366 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4367 return (sostream_direct(so, uiop, 4368 NULL, CRED())); 4369 } 4370 error = strwrite(SOTOV(so), uiop, CRED()); 4371 goto done; 4372 } 4373 prim = T_DATA_REQ; 4374 sflag = 0; 4375 } 4376 /* 4377 * If there is no SO_DONTROUTE to turn off return immediately 4378 * from sosend_svc. This can allow tail-call optimizations. 4379 */ 4380 if (!dontroute) 4381 return (sosend_svc(so, uiop, prim, 4382 !(flags & MSG_EOR), sflag)); 4383 error = sosend_svc(so, uiop, prim, 4384 !(flags & MSG_EOR), sflag); 4385 } 4386 ASSERT(dontroute); 4387 done: 4388 if (dontroute) { 4389 uint32_t val; 4390 4391 val = 0; 4392 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4393 &val, (t_uscalar_t)sizeof (val)); 4394 } 4395 return (error); 4396 } 4397 4398 /* 4399 * Sending data on a datagram socket. 4400 * Assumes caller has verified that SS_ISBOUND etc. are set. 4401 */ 4402 /* ARGSUSED */ 4403 static int 4404 sodgram_direct(struct sonode *so, struct sockaddr *name, 4405 socklen_t namelen, struct uio *uiop, int flags) 4406 { 4407 struct T_unitdata_req tudr; 4408 mblk_t *mp = NULL; 4409 int error = 0; 4410 void *addr; 4411 socklen_t addrlen; 4412 ssize_t len; 4413 struct stdata *stp = SOTOV(so)->v_stream; 4414 int so_state; 4415 queue_t *udp_wq; 4416 boolean_t connected; 4417 mblk_t *mpdata = NULL; 4418 4419 ASSERT(name != NULL && namelen != 0); 4420 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4421 ASSERT(!(so->so_mode & SM_EXDATA)); 4422 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4423 ASSERT(SOTOV(so)->v_type == VSOCK); 4424 4425 /* Caller checked for proper length */ 4426 len = uiop->uio_resid; 4427 ASSERT(len <= so->so_tidu_size); 4428 4429 /* Length and family checks have been done by caller */ 4430 ASSERT(name->sa_family == so->so_family); 4431 ASSERT(so->so_family == AF_INET || 4432 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4433 ASSERT(so->so_family == AF_INET6 || 4434 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4435 4436 addr = name; 4437 addrlen = namelen; 4438 4439 if (stp->sd_sidp != NULL && 4440 (error = straccess(stp, JCWRITE)) != 0) 4441 goto done; 4442 4443 so_state = so->so_state; 4444 4445 connected = so_state & SS_ISCONNECTED; 4446 if (!connected) { 4447 tudr.PRIM_type = T_UNITDATA_REQ; 4448 tudr.DEST_length = addrlen; 4449 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4450 tudr.OPT_length = 0; 4451 tudr.OPT_offset = 0; 4452 4453 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4454 _ALLOC_INTR); 4455 if (mp == NULL) { 4456 /* 4457 * Caught a signal waiting for memory. 4458 * Let send* return EINTR. 4459 */ 4460 error = EINTR; 4461 goto done; 4462 } 4463 } 4464 4465 /* 4466 * For UDP we don't break up the copyin into smaller pieces 4467 * as in the TCP case. That means if ENOMEM is returned by 4468 * mcopyinuio() then the uio vector has not been modified at 4469 * all and we fallback to either strwrite() or kstrputmsg() 4470 * below. Note also that we never generate priority messages 4471 * from here. 4472 */ 4473 udp_wq = stp->sd_wrq->q_next; 4474 if (canput(udp_wq) && 4475 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4476 ASSERT(DB_TYPE(mpdata) == M_DATA); 4477 ASSERT(uiop->uio_resid == 0); 4478 if (!connected) 4479 linkb(mp, mpdata); 4480 else 4481 mp = mpdata; 4482 if (audit_active) 4483 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4484 4485 udp_wput(udp_wq, mp); 4486 return (0); 4487 } 4488 4489 ASSERT(mpdata == NULL); 4490 if (error != 0 && error != ENOMEM) { 4491 freemsg(mp); 4492 return (error); 4493 } 4494 4495 /* 4496 * For connected, let strwrite() handle the blocking case. 4497 * Otherwise we fall thru and use kstrputmsg(). 4498 */ 4499 if (connected) 4500 return (strwrite(SOTOV(so), uiop, CRED())); 4501 4502 if (audit_active) 4503 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4504 4505 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4506 done: 4507 #ifdef SOCK_DEBUG 4508 if (error != 0) { 4509 eprintsoline(so, error); 4510 } 4511 #endif /* SOCK_DEBUG */ 4512 return (error); 4513 } 4514 4515 int 4516 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4517 { 4518 struct stdata *stp = SOTOV(so)->v_stream; 4519 ssize_t iosize, rmax, maxblk; 4520 queue_t *tcp_wq = stp->sd_wrq->q_next; 4521 mblk_t *newmp; 4522 int error = 0, wflag = 0; 4523 4524 ASSERT(so->so_mode & SM_BYTESTREAM); 4525 ASSERT(SOTOV(so)->v_type == VSOCK); 4526 4527 if (stp->sd_sidp != NULL && 4528 (error = straccess(stp, JCWRITE)) != 0) 4529 return (error); 4530 4531 if (uiop == NULL) { 4532 /* 4533 * kstrwritemp() should have checked sd_flag and 4534 * flow-control before coming here. If we end up 4535 * here it means that we can simply pass down the 4536 * data to tcp. 4537 */ 4538 ASSERT(mp != NULL); 4539 if (stp->sd_wputdatafunc != NULL) { 4540 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4541 NULL, NULL, NULL); 4542 if (newmp == NULL) { 4543 /* The caller will free mp */ 4544 return (ECOMM); 4545 } 4546 mp = newmp; 4547 } 4548 tcp_wput(tcp_wq, mp); 4549 return (0); 4550 } 4551 4552 /* Fallback to strwrite() to do proper error handling */ 4553 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4554 return (strwrite(SOTOV(so), uiop, cr)); 4555 4556 rmax = stp->sd_qn_maxpsz; 4557 ASSERT(rmax >= 0 || rmax == INFPSZ); 4558 if (rmax == 0 || uiop->uio_resid <= 0) 4559 return (0); 4560 4561 if (rmax == INFPSZ) 4562 rmax = uiop->uio_resid; 4563 4564 maxblk = stp->sd_maxblk; 4565 4566 for (;;) { 4567 iosize = MIN(uiop->uio_resid, rmax); 4568 4569 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4570 if (mp == NULL) { 4571 /* 4572 * Fallback to strwrite() for ENOMEM; if this 4573 * is our first time in this routine and the uio 4574 * vector has not been modified, we will end up 4575 * calling strwrite() without any flag set. 4576 */ 4577 if (error == ENOMEM) 4578 goto slow_send; 4579 else 4580 return (error); 4581 } 4582 ASSERT(uiop->uio_resid >= 0); 4583 /* 4584 * If mp is non-NULL and ENOMEM is set, it means that 4585 * mcopyinuio() was able to break down some of the user 4586 * data into one or more mblks. Send the partial data 4587 * to tcp and let the rest be handled in strwrite(). 4588 */ 4589 ASSERT(error == 0 || error == ENOMEM); 4590 if (stp->sd_wputdatafunc != NULL) { 4591 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4592 NULL, NULL, NULL); 4593 if (newmp == NULL) { 4594 /* The caller will free mp */ 4595 return (ECOMM); 4596 } 4597 mp = newmp; 4598 } 4599 tcp_wput(tcp_wq, mp); 4600 4601 wflag |= NOINTR; 4602 4603 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4604 ASSERT(error == 0); 4605 break; 4606 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4607 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4608 slow_send: 4609 /* 4610 * We were able to send down partial data using 4611 * the direct call interface, but are now relying 4612 * on strwrite() to handle the non-fastpath cases. 4613 * If the socket is blocking we will sleep in 4614 * strwaitq() until write is permitted, otherwise, 4615 * we will need to return the amount of bytes 4616 * written so far back to the app. This is the 4617 * reason why we pass NOINTR flag to strwrite() 4618 * for non-blocking socket, because we don't want 4619 * to return EAGAIN when portion of the user data 4620 * has actually been sent down. 4621 */ 4622 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4623 } 4624 } 4625 return (0); 4626 } 4627 4628 /* 4629 * Update so_faddr by asking the transport (unless AF_UNIX). 4630 */ 4631 int 4632 sotpi_getpeername(struct sonode *so) 4633 { 4634 struct strbuf strbuf; 4635 int error = 0, res; 4636 void *addr; 4637 t_uscalar_t addrlen; 4638 k_sigset_t smask; 4639 4640 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4641 (void *)so, pr_state(so->so_state, so->so_mode))); 4642 4643 mutex_enter(&so->so_lock); 4644 so_lock_single(so); /* Set SOLOCKED */ 4645 if (!(so->so_state & SS_ISCONNECTED)) { 4646 error = ENOTCONN; 4647 goto done; 4648 } 4649 /* Added this check for X/Open */ 4650 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4651 error = EINVAL; 4652 if (xnet_check_print) { 4653 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4654 } 4655 goto done; 4656 } 4657 #ifdef DEBUG 4658 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4659 pr_addr(so->so_family, so->so_faddr_sa, 4660 (t_uscalar_t)so->so_faddr_len))); 4661 #endif /* DEBUG */ 4662 4663 if (so->so_family == AF_UNIX) { 4664 /* Transport has different name space - return local info */ 4665 error = 0; 4666 goto done; 4667 } 4668 4669 ASSERT(so->so_faddr_sa); 4670 /* Allocate local buffer to use with ioctl */ 4671 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4672 mutex_exit(&so->so_lock); 4673 addr = kmem_alloc(addrlen, KM_SLEEP); 4674 4675 /* 4676 * Issue TI_GETPEERNAME with signals masked. 4677 * Put the result in so_faddr_sa so that getpeername works after 4678 * a shutdown(output). 4679 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4680 * back to the socket. 4681 */ 4682 strbuf.buf = addr; 4683 strbuf.maxlen = addrlen; 4684 strbuf.len = 0; 4685 4686 sigintr(&smask, 0); 4687 res = 0; 4688 ASSERT(CRED()); 4689 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4690 0, K_TO_K, CRED(), &res); 4691 sigunintr(&smask); 4692 4693 mutex_enter(&so->so_lock); 4694 /* 4695 * If there is an error record the error in so_error put don't fail 4696 * the getpeername. Instead fallback on the recorded 4697 * so->so_faddr_sa. 4698 */ 4699 if (error) { 4700 /* 4701 * Various stream head errors can be returned to the ioctl. 4702 * However, it is impossible to determine which ones of 4703 * these are really socket level errors that were incorrectly 4704 * consumed by the ioctl. Thus this code silently ignores the 4705 * error - to code explicitly does not reinstate the error 4706 * using soseterror(). 4707 * Experiments have shows that at least this set of 4708 * errors are reported and should not be reinstated on the 4709 * socket: 4710 * EINVAL E.g. if an I_LINK was in effect when 4711 * getpeername was called. 4712 * EPIPE The ioctl error semantics prefer the write 4713 * side error over the read side error. 4714 * ENOTCONN The transport just got disconnected but 4715 * sockfs had not yet seen the T_DISCON_IND 4716 * when issuing the ioctl. 4717 */ 4718 error = 0; 4719 } else if (res == 0 && strbuf.len > 0 && 4720 (so->so_state & SS_ISCONNECTED)) { 4721 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4722 so->so_faddr_len = (socklen_t)strbuf.len; 4723 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4724 so->so_state |= SS_FADDR_VALID; 4725 } 4726 kmem_free(addr, addrlen); 4727 #ifdef DEBUG 4728 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4729 pr_addr(so->so_family, so->so_faddr_sa, 4730 (t_uscalar_t)so->so_faddr_len))); 4731 #endif /* DEBUG */ 4732 done: 4733 so_unlock_single(so, SOLOCKED); 4734 mutex_exit(&so->so_lock); 4735 return (error); 4736 } 4737 4738 /* 4739 * Update so_laddr by asking the transport (unless AF_UNIX). 4740 */ 4741 int 4742 sotpi_getsockname(struct sonode *so) 4743 { 4744 struct strbuf strbuf; 4745 int error = 0, res; 4746 void *addr; 4747 t_uscalar_t addrlen; 4748 k_sigset_t smask; 4749 4750 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4751 (void *)so, pr_state(so->so_state, so->so_mode))); 4752 4753 mutex_enter(&so->so_lock); 4754 so_lock_single(so); /* Set SOLOCKED */ 4755 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4756 /* Return an all zero address except for the family */ 4757 if (so->so_family == AF_INET) 4758 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4759 else if (so->so_family == AF_INET6) 4760 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4761 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4762 bzero(so->so_laddr_sa, so->so_laddr_len); 4763 /* 4764 * Can not assume there is a sa_family for all 4765 * protocol families. 4766 */ 4767 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4768 so->so_laddr_sa->sa_family = so->so_family; 4769 } 4770 #ifdef DEBUG 4771 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4772 pr_addr(so->so_family, so->so_laddr_sa, 4773 (t_uscalar_t)so->so_laddr_len))); 4774 #endif /* DEBUG */ 4775 if (so->so_family == AF_UNIX) { 4776 /* Transport has different name space - return local info */ 4777 error = 0; 4778 goto done; 4779 } 4780 if (!(so->so_state & SS_ISBOUND)) { 4781 /* If not bound, then nothing to return. */ 4782 error = 0; 4783 goto done; 4784 } 4785 /* Allocate local buffer to use with ioctl */ 4786 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4787 mutex_exit(&so->so_lock); 4788 addr = kmem_alloc(addrlen, KM_SLEEP); 4789 4790 /* 4791 * Issue TI_GETMYNAME with signals masked. 4792 * Put the result in so_laddr_sa so that getsockname works after 4793 * a shutdown(output). 4794 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4795 * back to the socket. 4796 */ 4797 strbuf.buf = addr; 4798 strbuf.maxlen = addrlen; 4799 strbuf.len = 0; 4800 4801 sigintr(&smask, 0); 4802 res = 0; 4803 ASSERT(CRED()); 4804 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4805 0, K_TO_K, CRED(), &res); 4806 sigunintr(&smask); 4807 4808 mutex_enter(&so->so_lock); 4809 /* 4810 * If there is an error record the error in so_error put don't fail 4811 * the getsockname. Instead fallback on the recorded 4812 * so->so_laddr_sa. 4813 */ 4814 if (error) { 4815 /* 4816 * Various stream head errors can be returned to the ioctl. 4817 * However, it is impossible to determine which ones of 4818 * these are really socket level errors that were incorrectly 4819 * consumed by the ioctl. Thus this code silently ignores the 4820 * error - to code explicitly does not reinstate the error 4821 * using soseterror(). 4822 * Experiments have shows that at least this set of 4823 * errors are reported and should not be reinstated on the 4824 * socket: 4825 * EINVAL E.g. if an I_LINK was in effect when 4826 * getsockname was called. 4827 * EPIPE The ioctl error semantics prefer the write 4828 * side error over the read side error. 4829 */ 4830 error = 0; 4831 } else if (res == 0 && strbuf.len > 0 && 4832 (so->so_state & SS_ISBOUND)) { 4833 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4834 so->so_laddr_len = (socklen_t)strbuf.len; 4835 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4836 so->so_state |= SS_LADDR_VALID; 4837 } 4838 kmem_free(addr, addrlen); 4839 #ifdef DEBUG 4840 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4841 pr_addr(so->so_family, so->so_laddr_sa, 4842 (t_uscalar_t)so->so_laddr_len))); 4843 #endif /* DEBUG */ 4844 done: 4845 so_unlock_single(so, SOLOCKED); 4846 mutex_exit(&so->so_lock); 4847 return (error); 4848 } 4849 4850 /* 4851 * Get socket options. For SOL_SOCKET options some options are handled 4852 * by the sockfs while others use the value recorded in the sonode as a 4853 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4854 * 4855 * On the return most *optlenp bytes are copied to optval. 4856 */ 4857 int 4858 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4859 void *optval, socklen_t *optlenp, int flags) 4860 { 4861 struct T_optmgmt_req optmgmt_req; 4862 struct T_optmgmt_ack *optmgmt_ack; 4863 struct opthdr oh; 4864 struct opthdr *opt_res; 4865 mblk_t *mp = NULL; 4866 int error = 0; 4867 void *option = NULL; /* Set if fallback value */ 4868 t_uscalar_t maxlen = *optlenp; 4869 t_uscalar_t len; 4870 uint32_t value; 4871 4872 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4873 (void *)so, level, option_name, optval, (void *)optlenp, 4874 pr_state(so->so_state, so->so_mode))); 4875 4876 mutex_enter(&so->so_lock); 4877 so_lock_single(so); /* Set SOLOCKED */ 4878 4879 /* 4880 * Check for SOL_SOCKET options. 4881 * Certain SOL_SOCKET options are returned directly whereas 4882 * others only provide a default (fallback) value should 4883 * the T_SVR4_OPTMGMT_REQ fail. 4884 */ 4885 if (level == SOL_SOCKET) { 4886 /* Check parameters */ 4887 switch (option_name) { 4888 case SO_TYPE: 4889 case SO_ERROR: 4890 case SO_DEBUG: 4891 case SO_ACCEPTCONN: 4892 case SO_REUSEADDR: 4893 case SO_KEEPALIVE: 4894 case SO_DONTROUTE: 4895 case SO_BROADCAST: 4896 case SO_USELOOPBACK: 4897 case SO_OOBINLINE: 4898 case SO_SNDBUF: 4899 case SO_RCVBUF: 4900 #ifdef notyet 4901 case SO_SNDLOWAT: 4902 case SO_RCVLOWAT: 4903 case SO_SNDTIMEO: 4904 case SO_RCVTIMEO: 4905 #endif /* notyet */ 4906 case SO_DOMAIN: 4907 case SO_DGRAM_ERRIND: 4908 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4909 error = EINVAL; 4910 eprintsoline(so, error); 4911 goto done2; 4912 } 4913 break; 4914 case SO_LINGER: 4915 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4916 error = EINVAL; 4917 eprintsoline(so, error); 4918 goto done2; 4919 } 4920 break; 4921 } 4922 4923 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4924 4925 switch (option_name) { 4926 case SO_TYPE: 4927 value = so->so_type; 4928 option = &value; 4929 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4930 4931 case SO_ERROR: 4932 value = sogeterr(so); 4933 option = &value; 4934 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4935 4936 case SO_ACCEPTCONN: 4937 if (so->so_state & SS_ACCEPTCONN) 4938 value = SO_ACCEPTCONN; 4939 else 4940 value = 0; 4941 #ifdef DEBUG 4942 if (value) { 4943 dprintso(so, 1, 4944 ("sotpi_getsockopt: 0x%x is set\n", 4945 option_name)); 4946 } else { 4947 dprintso(so, 1, 4948 ("sotpi_getsockopt: 0x%x not set\n", 4949 option_name)); 4950 } 4951 #endif /* DEBUG */ 4952 option = &value; 4953 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4954 4955 case SO_DEBUG: 4956 case SO_REUSEADDR: 4957 case SO_KEEPALIVE: 4958 case SO_DONTROUTE: 4959 case SO_BROADCAST: 4960 case SO_USELOOPBACK: 4961 case SO_OOBINLINE: 4962 case SO_DGRAM_ERRIND: 4963 value = (so->so_options & option_name); 4964 #ifdef DEBUG 4965 if (value) { 4966 dprintso(so, 1, 4967 ("sotpi_getsockopt: 0x%x is set\n", 4968 option_name)); 4969 } else { 4970 dprintso(so, 1, 4971 ("sotpi_getsockopt: 0x%x not set\n", 4972 option_name)); 4973 } 4974 #endif /* DEBUG */ 4975 option = &value; 4976 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4977 4978 /* 4979 * The following options are only returned by sockfs when the 4980 * T_SVR4_OPTMGMT_REQ fails. 4981 */ 4982 case SO_LINGER: 4983 option = &so->so_linger; 4984 len = (t_uscalar_t)sizeof (struct linger); 4985 break; 4986 case SO_SNDBUF: { 4987 ssize_t lvalue; 4988 4989 /* 4990 * If the option has not been set then get a default 4991 * value from the read queue. This value is 4992 * returned if the transport fails 4993 * the T_SVR4_OPTMGMT_REQ. 4994 */ 4995 lvalue = so->so_sndbuf; 4996 if (lvalue == 0) { 4997 mutex_exit(&so->so_lock); 4998 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4999 QHIWAT, 0, &lvalue); 5000 mutex_enter(&so->so_lock); 5001 dprintso(so, 1, 5002 ("got SO_SNDBUF %ld from q\n", lvalue)); 5003 } 5004 value = (int)lvalue; 5005 option = &value; 5006 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5007 break; 5008 } 5009 case SO_RCVBUF: { 5010 ssize_t lvalue; 5011 5012 /* 5013 * If the option has not been set then get a default 5014 * value from the read queue. This value is 5015 * returned if the transport fails 5016 * the T_SVR4_OPTMGMT_REQ. 5017 * 5018 * XXX If SO_RCVBUF has been set and this is an 5019 * XPG 4.2 application then do not ask the transport 5020 * since the transport might adjust the value and not 5021 * return exactly what was set by the application. 5022 * For non-XPG 4.2 application we return the value 5023 * that the transport is actually using. 5024 */ 5025 lvalue = so->so_rcvbuf; 5026 if (lvalue == 0) { 5027 mutex_exit(&so->so_lock); 5028 (void) strqget(RD(strvp2wq(SOTOV(so))), 5029 QHIWAT, 0, &lvalue); 5030 mutex_enter(&so->so_lock); 5031 dprintso(so, 1, 5032 ("got SO_RCVBUF %ld from q\n", lvalue)); 5033 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5034 value = (int)lvalue; 5035 option = &value; 5036 goto copyout; /* skip asking transport */ 5037 } 5038 value = (int)lvalue; 5039 option = &value; 5040 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5041 break; 5042 } 5043 case SO_DOMAIN: 5044 value = so->so_family; 5045 option = &value; 5046 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5047 5048 #ifdef notyet 5049 /* 5050 * We do not implement the semantics of these options 5051 * thus we shouldn't implement the options either. 5052 */ 5053 case SO_SNDLOWAT: 5054 value = so->so_sndlowat; 5055 option = &value; 5056 break; 5057 case SO_RCVLOWAT: 5058 value = so->so_rcvlowat; 5059 option = &value; 5060 break; 5061 case SO_SNDTIMEO: 5062 value = so->so_sndtimeo; 5063 option = &value; 5064 break; 5065 case SO_RCVTIMEO: 5066 value = so->so_rcvtimeo; 5067 option = &value; 5068 break; 5069 #endif /* notyet */ 5070 } 5071 } 5072 5073 mutex_exit(&so->so_lock); 5074 5075 /* Send request */ 5076 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5077 optmgmt_req.MGMT_flags = T_CHECK; 5078 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5079 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5080 5081 oh.level = level; 5082 oh.name = option_name; 5083 oh.len = maxlen; 5084 5085 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5086 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 5087 /* Let option management work in the presence of data flow control */ 5088 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5089 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5090 mp = NULL; 5091 mutex_enter(&so->so_lock); 5092 if (error) { 5093 eprintsoline(so, error); 5094 goto done2; 5095 } 5096 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5097 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5098 if (error) { 5099 if (option != NULL) { 5100 /* We have a fallback value */ 5101 error = 0; 5102 goto copyout; 5103 } 5104 eprintsoline(so, error); 5105 goto done2; 5106 } 5107 ASSERT(mp); 5108 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5109 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5110 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5111 if (opt_res == NULL) { 5112 if (option != NULL) { 5113 /* We have a fallback value */ 5114 error = 0; 5115 goto copyout; 5116 } 5117 error = EPROTO; 5118 eprintsoline(so, error); 5119 goto done; 5120 } 5121 option = &opt_res[1]; 5122 5123 /* check to ensure that the option is within bounds */ 5124 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5125 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5126 if (option != NULL) { 5127 /* We have a fallback value */ 5128 error = 0; 5129 goto copyout; 5130 } 5131 error = EPROTO; 5132 eprintsoline(so, error); 5133 goto done; 5134 } 5135 5136 len = opt_res->len; 5137 5138 copyout: { 5139 t_uscalar_t size = MIN(len, maxlen); 5140 bcopy(option, optval, size); 5141 bcopy(&size, optlenp, sizeof (size)); 5142 } 5143 done: 5144 freemsg(mp); 5145 done2: 5146 so_unlock_single(so, SOLOCKED); 5147 mutex_exit(&so->so_lock); 5148 return (error); 5149 } 5150 5151 /* 5152 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5153 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5154 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5155 * setsockopt has to work even if the transport does not support the option. 5156 */ 5157 int 5158 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5159 const void *optval, t_uscalar_t optlen) 5160 { 5161 struct T_optmgmt_req optmgmt_req; 5162 struct opthdr oh; 5163 mblk_t *mp; 5164 int error = 0; 5165 boolean_t handled = B_FALSE; 5166 5167 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5168 (void *)so, level, option_name, optval, optlen, 5169 pr_state(so->so_state, so->so_mode))); 5170 5171 5172 /* X/Open requires this check */ 5173 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5174 if (xnet_check_print) 5175 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5176 return (EINVAL); 5177 } 5178 5179 /* Caller allocates aligned optval, or passes null */ 5180 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 5181 /* If optval is null optlen is 0, and vice-versa */ 5182 ASSERT(optval != NULL || optlen == 0); 5183 ASSERT(optlen != 0 || optval == NULL); 5184 5185 mutex_enter(&so->so_lock); 5186 so_lock_single(so); /* Set SOLOCKED */ 5187 mutex_exit(&so->so_lock); 5188 5189 /* 5190 * For SOCKET or TCP level options, try to set it here itself 5191 * provided socket has not been popped and we know the tcp 5192 * structure (stored in so_priv). 5193 */ 5194 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5195 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5196 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 5197 tcp_t *tcp = so->so_priv; 5198 boolean_t onoff; 5199 5200 #define intvalue (*(int32_t *)optval) 5201 5202 switch (level) { 5203 case SOL_SOCKET: 5204 switch (option_name) { /* Check length param */ 5205 case SO_DEBUG: 5206 case SO_REUSEADDR: 5207 case SO_DONTROUTE: 5208 case SO_BROADCAST: 5209 case SO_USELOOPBACK: 5210 case SO_OOBINLINE: 5211 case SO_DGRAM_ERRIND: 5212 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5213 error = EINVAL; 5214 eprintsoline(so, error); 5215 mutex_enter(&so->so_lock); 5216 goto done2; 5217 } 5218 ASSERT(optval); 5219 onoff = intvalue != 0; 5220 handled = B_TRUE; 5221 break; 5222 case SO_LINGER: 5223 if (optlen != 5224 (t_uscalar_t)sizeof (struct linger)) { 5225 error = EINVAL; 5226 eprintsoline(so, error); 5227 mutex_enter(&so->so_lock); 5228 goto done2; 5229 } 5230 ASSERT(optval); 5231 handled = B_TRUE; 5232 break; 5233 } 5234 5235 switch (option_name) { /* Do actions */ 5236 case SO_LINGER: { 5237 struct linger *lgr = (struct linger *)optval; 5238 5239 if (lgr->l_onoff) { 5240 tcp->tcp_linger = 1; 5241 tcp->tcp_lingertime = lgr->l_linger; 5242 so->so_linger.l_onoff = SO_LINGER; 5243 so->so_options |= SO_LINGER; 5244 } else { 5245 tcp->tcp_linger = 0; 5246 tcp->tcp_lingertime = 0; 5247 so->so_linger.l_onoff = 0; 5248 so->so_options &= ~SO_LINGER; 5249 } 5250 so->so_linger.l_linger = lgr->l_linger; 5251 handled = B_TRUE; 5252 break; 5253 } 5254 case SO_DEBUG: 5255 tcp->tcp_debug = onoff; 5256 #ifdef SOCK_TEST 5257 if (intvalue & 2) 5258 sock_test_timelimit = 10 * hz; 5259 else 5260 sock_test_timelimit = 0; 5261 5262 if (intvalue & 4) 5263 do_useracc = 0; 5264 else 5265 do_useracc = 1; 5266 #endif /* SOCK_TEST */ 5267 break; 5268 case SO_DONTROUTE: 5269 /* 5270 * SO_DONTROUTE, SO_USELOOPBACK and 5271 * SO_BROADCAST are only of interest to IP. 5272 * We track them here only so 5273 * that we can report their current value. 5274 */ 5275 tcp->tcp_dontroute = onoff; 5276 if (onoff) 5277 so->so_options |= option_name; 5278 else 5279 so->so_options &= ~option_name; 5280 break; 5281 case SO_USELOOPBACK: 5282 tcp->tcp_useloopback = onoff; 5283 if (onoff) 5284 so->so_options |= option_name; 5285 else 5286 so->so_options &= ~option_name; 5287 break; 5288 case SO_BROADCAST: 5289 tcp->tcp_broadcast = onoff; 5290 if (onoff) 5291 so->so_options |= option_name; 5292 else 5293 so->so_options &= ~option_name; 5294 break; 5295 case SO_REUSEADDR: 5296 tcp->tcp_reuseaddr = onoff; 5297 if (onoff) 5298 so->so_options |= option_name; 5299 else 5300 so->so_options &= ~option_name; 5301 break; 5302 case SO_OOBINLINE: 5303 tcp->tcp_oobinline = onoff; 5304 if (onoff) 5305 so->so_options |= option_name; 5306 else 5307 so->so_options &= ~option_name; 5308 break; 5309 case SO_DGRAM_ERRIND: 5310 tcp->tcp_dgram_errind = onoff; 5311 if (onoff) 5312 so->so_options |= option_name; 5313 else 5314 so->so_options &= ~option_name; 5315 break; 5316 } 5317 break; 5318 case IPPROTO_TCP: 5319 switch (option_name) { 5320 case TCP_NODELAY: 5321 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5322 error = EINVAL; 5323 eprintsoline(so, error); 5324 mutex_enter(&so->so_lock); 5325 goto done2; 5326 } 5327 ASSERT(optval); 5328 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5329 handled = B_TRUE; 5330 break; 5331 } 5332 break; 5333 default: 5334 handled = B_FALSE; 5335 break; 5336 } 5337 } 5338 5339 if (handled) { 5340 mutex_enter(&so->so_lock); 5341 goto done2; 5342 } 5343 5344 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5345 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5346 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5347 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5348 5349 oh.level = level; 5350 oh.name = option_name; 5351 oh.len = optlen; 5352 5353 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5354 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5355 /* Let option management work in the presence of data flow control */ 5356 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5357 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5358 mp = NULL; 5359 mutex_enter(&so->so_lock); 5360 if (error) { 5361 eprintsoline(so, error); 5362 goto done; 5363 } 5364 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5365 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5366 if (error) { 5367 eprintsoline(so, error); 5368 goto done; 5369 } 5370 ASSERT(mp); 5371 /* No need to verify T_optmgmt_ack */ 5372 freemsg(mp); 5373 done: 5374 /* 5375 * Check for SOL_SOCKET options and record their values. 5376 * If we know about a SOL_SOCKET parameter and the transport 5377 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5378 * EPROTO) we let the setsockopt succeed. 5379 */ 5380 if (level == SOL_SOCKET) { 5381 /* Check parameters */ 5382 switch (option_name) { 5383 case SO_DEBUG: 5384 case SO_REUSEADDR: 5385 case SO_KEEPALIVE: 5386 case SO_DONTROUTE: 5387 case SO_BROADCAST: 5388 case SO_USELOOPBACK: 5389 case SO_OOBINLINE: 5390 case SO_SNDBUF: 5391 case SO_RCVBUF: 5392 #ifdef notyet 5393 case SO_SNDLOWAT: 5394 case SO_RCVLOWAT: 5395 case SO_SNDTIMEO: 5396 case SO_RCVTIMEO: 5397 #endif /* notyet */ 5398 case SO_DGRAM_ERRIND: 5399 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5400 error = EINVAL; 5401 eprintsoline(so, error); 5402 goto done2; 5403 } 5404 ASSERT(optval); 5405 handled = B_TRUE; 5406 break; 5407 case SO_LINGER: 5408 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5409 error = EINVAL; 5410 eprintsoline(so, error); 5411 goto done2; 5412 } 5413 ASSERT(optval); 5414 handled = B_TRUE; 5415 break; 5416 } 5417 5418 #define intvalue (*(int32_t *)optval) 5419 5420 switch (option_name) { 5421 case SO_TYPE: 5422 case SO_ERROR: 5423 case SO_ACCEPTCONN: 5424 /* Can't be set */ 5425 error = ENOPROTOOPT; 5426 goto done2; 5427 case SO_LINGER: { 5428 struct linger *l = (struct linger *)optval; 5429 5430 so->so_linger.l_linger = l->l_linger; 5431 if (l->l_onoff) { 5432 so->so_linger.l_onoff = SO_LINGER; 5433 so->so_options |= SO_LINGER; 5434 } else { 5435 so->so_linger.l_onoff = 0; 5436 so->so_options &= ~SO_LINGER; 5437 } 5438 break; 5439 } 5440 5441 case SO_DEBUG: 5442 #ifdef SOCK_TEST 5443 if (intvalue & 2) 5444 sock_test_timelimit = 10 * hz; 5445 else 5446 sock_test_timelimit = 0; 5447 5448 if (intvalue & 4) 5449 do_useracc = 0; 5450 else 5451 do_useracc = 1; 5452 #endif /* SOCK_TEST */ 5453 /* FALLTHRU */ 5454 case SO_REUSEADDR: 5455 case SO_KEEPALIVE: 5456 case SO_DONTROUTE: 5457 case SO_BROADCAST: 5458 case SO_USELOOPBACK: 5459 case SO_OOBINLINE: 5460 case SO_DGRAM_ERRIND: 5461 if (intvalue != 0) { 5462 dprintso(so, 1, 5463 ("sotpi_setsockopt: setting 0x%x\n", 5464 option_name)); 5465 so->so_options |= option_name; 5466 } else { 5467 dprintso(so, 1, 5468 ("sotpi_setsockopt: clearing 0x%x\n", 5469 option_name)); 5470 so->so_options &= ~option_name; 5471 } 5472 break; 5473 /* 5474 * The following options are only returned by us when the 5475 * T_SVR4_OPTMGMT_REQ fails. 5476 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5477 * since the transport might adjust the value and not 5478 * return exactly what was set by the application. 5479 */ 5480 case SO_SNDBUF: 5481 so->so_sndbuf = intvalue; 5482 break; 5483 case SO_RCVBUF: 5484 so->so_rcvbuf = intvalue; 5485 break; 5486 #ifdef notyet 5487 /* 5488 * We do not implement the semantics of these options 5489 * thus we shouldn't implement the options either. 5490 */ 5491 case SO_SNDLOWAT: 5492 so->so_sndlowat = intvalue; 5493 break; 5494 case SO_RCVLOWAT: 5495 so->so_rcvlowat = intvalue; 5496 break; 5497 case SO_SNDTIMEO: 5498 so->so_sndtimeo = intvalue; 5499 break; 5500 case SO_RCVTIMEO: 5501 so->so_rcvtimeo = intvalue; 5502 break; 5503 #endif /* notyet */ 5504 } 5505 #undef intvalue 5506 5507 if (error) { 5508 if ((error == ENOPROTOOPT || error == EPROTO || 5509 error == EINVAL) && handled) { 5510 dprintso(so, 1, 5511 ("setsockopt: ignoring error %d for 0x%x\n", 5512 error, option_name)); 5513 error = 0; 5514 } 5515 } 5516 } 5517 done2: 5518 ret: 5519 so_unlock_single(so, SOLOCKED); 5520 mutex_exit(&so->so_lock); 5521 return (error); 5522 } 5523