1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/ddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/vtrace.h> 55 #include <sys/cmn_err.h> 56 #include <sys/pathname.h> 57 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sockio.h> 61 #include <sys/sodirect.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/strsun.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 70 71 #include <c2/audit.h> 72 73 #include <inet/common.h> 74 #include <inet/ip.h> 75 #include <inet/ip6.h> 76 #include <inet/tcp.h> 77 #include <inet/udp_impl.h> 78 79 #include <sys/zone.h> 80 81 #include <fs/sockfs/nl7c.h> 82 #include <fs/sockfs/nl7curi.h> 83 84 #include <inet/kssl/ksslapi.h> 85 86 /* 87 * Possible failures when memory can't be allocated. The documented behavior: 88 * 89 * 5.5: 4.X: XNET: 90 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 91 * EINTR 92 * (4.X does not document EINTR but returns it) 93 * bind: ENOSR - ENOBUFS/ENOSR 94 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 95 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 96 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 97 * (4.X getpeername and getsockname do not fail in practice) 98 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 99 * listen: - - ENOBUFS 100 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 101 * EINTR 102 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 105 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 106 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 107 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 108 * 109 * Resolution. When allocation fails: 110 * recv: return EINTR 111 * send: return EINTR 112 * connect, accept: EINTR 113 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 114 * socket, socketpair: ENOBUFS 115 * getpeername, getsockname: sleep 116 * getsockopt, setsockopt: sleep 117 */ 118 119 #ifdef SOCK_TEST 120 /* 121 * Variables that make sockfs do something other than the standard TPI 122 * for the AF_INET transports. 123 * 124 * solisten_tpi_tcp: 125 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 126 * the transport is already bound. This is needed to avoid loosing the 127 * port number should listen() do a T_UNBIND_REQ followed by a 128 * O_T_BIND_REQ. 129 * 130 * soconnect_tpi_udp: 131 * UDP and ICMP can handle a T_CONN_REQ. 132 * This is needed to make the sequence of connect(), getsockname() 133 * return the local IP address used to send packets to the connected to 134 * destination. 135 * 136 * soconnect_tpi_tcp: 137 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 138 * Set this to non-zero to send TPI conformant messages to TCP in this 139 * respect. This is a performance optimization. 140 * 141 * soaccept_tpi_tcp: 142 * TCP can handle a T_CONN_REQ without the acceptor being bound. 143 * This is a performance optimization that has been picked up in XTI. 144 * 145 * soaccept_tpi_multioptions: 146 * When inheriting SOL_SOCKET options from the listener to the accepting 147 * socket send them as a single message for AF_INET{,6}. 148 */ 149 int solisten_tpi_tcp = 0; 150 int soconnect_tpi_udp = 0; 151 int soconnect_tpi_tcp = 0; 152 int soaccept_tpi_tcp = 0; 153 int soaccept_tpi_multioptions = 1; 154 #else /* SOCK_TEST */ 155 #define soconnect_tpi_tcp 0 156 #define soconnect_tpi_udp 0 157 #define solisten_tpi_tcp 0 158 #define soaccept_tpi_tcp 0 159 #define soaccept_tpi_multioptions 1 160 #endif /* SOCK_TEST */ 161 162 #ifdef SOCK_TEST 163 extern int do_useracc; 164 extern clock_t sock_test_timelimit; 165 #endif /* SOCK_TEST */ 166 167 /* 168 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 169 * applications working. Turn on this flag to disable these checks. 170 */ 171 int xnet_skip_checks = 0; 172 int xnet_check_print = 0; 173 int xnet_truncate_print = 0; 174 175 extern void sigintr(k_sigset_t *, int); 176 extern void sigunintr(k_sigset_t *); 177 178 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 179 extern void *nl7c_add_addr(void *, t_uscalar_t); 180 extern void nl7c_listener_addr(void *, struct sonode *); 181 182 /* Sockets acting as an in-kernel SSL proxy */ 183 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 184 strsigset_t *, strsigset_t *, strpollset_t *); 185 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 186 strsigset_t *, strsigset_t *, strpollset_t *); 187 188 static int sotpi_unbind(struct sonode *, int); 189 190 extern int sodput(sodirect_t *, mblk_t *); 191 extern void sodwakeup(sodirect_t *); 192 193 /* TPI sockfs sonode operations */ 194 static int sotpi_accept(struct sonode *, int, struct sonode **); 195 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 196 int); 197 static int sotpi_connect(struct sonode *, const struct sockaddr *, 198 socklen_t, int, int); 199 static int sotpi_listen(struct sonode *, int); 200 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 201 struct uio *); 202 static int sotpi_shutdown(struct sonode *, int); 203 static int sotpi_getsockname(struct sonode *); 204 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 205 struct uio *, void *, t_uscalar_t, int); 206 static int sodgram_direct(struct sonode *, struct sockaddr *, 207 socklen_t, struct uio *, int); 208 209 sonodeops_t sotpi_sonodeops = { 210 sotpi_accept, /* sop_accept */ 211 sotpi_bind, /* sop_bind */ 212 sotpi_listen, /* sop_listen */ 213 sotpi_connect, /* sop_connect */ 214 sotpi_recvmsg, /* sop_recvmsg */ 215 sotpi_sendmsg, /* sop_sendmsg */ 216 sotpi_getpeername, /* sop_getpeername */ 217 sotpi_getsockname, /* sop_getsockname */ 218 sotpi_shutdown, /* sop_shutdown */ 219 sotpi_getsockopt, /* sop_getsockopt */ 220 sotpi_setsockopt /* sop_setsockopt */ 221 }; 222 223 /* 224 * Common create code for socket and accept. If tso is set the values 225 * from that node is used instead of issuing a T_INFO_REQ. 226 * 227 * Assumes that the caller has a VN_HOLD on accessvp. 228 * The VN_RELE will occur either when sotpi_create() fails or when 229 * the returned sonode is freed. 230 */ 231 struct sonode * 232 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 233 struct sonode *tso, int *errorp) 234 { 235 struct sonode *so; 236 vnode_t *vp; 237 int flags, error; 238 239 ASSERT(accessvp != NULL); 240 vp = makesockvp(accessvp, domain, type, protocol); 241 ASSERT(vp != NULL); 242 so = VTOSO(vp); 243 244 flags = FREAD|FWRITE; 245 246 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 247 (domain == AF_INET || domain == AF_INET6) && 248 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 249 protocol == IPPROTO_IP)) { 250 /* Tell tcp or udp that it's talking to sockets */ 251 flags |= SO_SOCKSTR; 252 253 /* 254 * Here we indicate to socktpi_open() our attempt to 255 * make direct calls between sockfs and transport. 256 * The final decision is left to socktpi_open(). 257 */ 258 so->so_state |= SS_DIRECT; 259 260 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 261 if (so->so_type == SOCK_STREAM && tso != NULL) { 262 if (tso->so_state & SS_DIRECT) { 263 /* 264 * Inherit SS_DIRECT from listener and pass 265 * SO_ACCEPTOR open flag to tcp, indicating 266 * that this is an accept fast-path instance. 267 */ 268 flags |= SO_ACCEPTOR; 269 } else { 270 /* 271 * SS_DIRECT is not set on listener, meaning 272 * that the listener has been converted from 273 * a socket to a stream. Ensure that the 274 * acceptor inherits these settings. 275 */ 276 so->so_state &= ~SS_DIRECT; 277 flags &= ~SO_SOCKSTR; 278 } 279 } 280 } 281 282 /* 283 * Tell local transport that it is talking to sockets. 284 */ 285 if (so->so_family == AF_UNIX) { 286 flags |= SO_SOCKSTR; 287 } 288 289 /* Initialize the kernel SSL proxy fields */ 290 so->so_kssl_type = KSSL_NO_PROXY; 291 so->so_kssl_ent = NULL; 292 so->so_kssl_ctx = NULL; 293 294 if (error = socktpi_open(&vp, flags, CRED(), NULL)) { 295 VN_RELE(vp); 296 *errorp = error; 297 return (NULL); 298 } 299 300 if (error = so_strinit(so, tso)) { 301 (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL); 302 VN_RELE(vp); 303 *errorp = error; 304 return (NULL); 305 } 306 307 if (version == SOV_DEFAULT) 308 version = so_default_version; 309 310 so->so_version = (short)version; 311 312 return (so); 313 } 314 315 /* 316 * Bind the socket to an unspecified address in sockfs only. 317 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 318 * required in all cases. 319 */ 320 static void 321 so_automatic_bind(struct sonode *so) 322 { 323 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 324 325 ASSERT(MUTEX_HELD(&so->so_lock)); 326 ASSERT(!(so->so_state & SS_ISBOUND)); 327 ASSERT(so->so_unbind_mp); 328 329 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 330 bzero(so->so_laddr_sa, so->so_laddr_len); 331 so->so_laddr_sa->sa_family = so->so_family; 332 so->so_state |= SS_ISBOUND; 333 } 334 335 336 /* 337 * bind the socket. 338 * 339 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 340 * are passed in we allow rebinding. Note that for backwards compatibility 341 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 342 * Thus the rebinding code is currently not executed. 343 * 344 * The constraints for rebinding are: 345 * - it is a SOCK_DGRAM, or 346 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 347 * and no listen() has been done. 348 * This rebinding code was added based on some language in the XNET book 349 * about not returning EINVAL it the protocol allows rebinding. However, 350 * this language is not present in the Posix socket draft. Thus maybe the 351 * rebinding logic should be deleted from the source. 352 * 353 * A null "name" can be used to unbind the socket if: 354 * - it is a SOCK_DGRAM, or 355 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 356 * and no listen() has been done. 357 */ 358 static int 359 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 360 socklen_t namelen, int backlog, int flags) 361 { 362 struct T_bind_req bind_req; 363 struct T_bind_ack *bind_ack; 364 int error = 0; 365 mblk_t *mp; 366 void *addr; 367 t_uscalar_t addrlen; 368 int unbind_on_err = 1; 369 boolean_t clear_acceptconn_on_err = B_FALSE; 370 boolean_t restore_backlog_on_err = B_FALSE; 371 int save_so_backlog; 372 t_scalar_t PRIM_type = O_T_BIND_REQ; 373 boolean_t tcp_udp_xport; 374 void *nl7c = NULL; 375 376 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 377 so, name, namelen, backlog, flags, 378 pr_state(so->so_state, so->so_mode))); 379 380 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 381 382 if (!(flags & _SOBIND_LOCK_HELD)) { 383 mutex_enter(&so->so_lock); 384 so_lock_single(so); /* Set SOLOCKED */ 385 } else { 386 ASSERT(MUTEX_HELD(&so->so_lock)); 387 ASSERT(so->so_flag & SOLOCKED); 388 } 389 390 /* 391 * Make sure that there is a preallocated unbind_req message 392 * before binding. This message allocated when the socket is 393 * created but it might be have been consumed. 394 */ 395 if (so->so_unbind_mp == NULL) { 396 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 397 /* NOTE: holding so_lock while sleeping */ 398 so->so_unbind_mp = 399 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 400 } 401 402 if (flags & _SOBIND_REBIND) { 403 /* 404 * Called from solisten after doing an sotpi_unbind() or 405 * potentially without the unbind (latter for AF_INET{,6}). 406 */ 407 ASSERT(name == NULL && namelen == 0); 408 409 if (so->so_family == AF_UNIX) { 410 ASSERT(so->so_ux_bound_vp); 411 addr = &so->so_ux_laddr; 412 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 413 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 414 "addr 0x%p, vp %p\n", 415 addrlen, 416 ((struct so_ux_addr *)addr)->soua_vp, 417 so->so_ux_bound_vp)); 418 } else { 419 addr = so->so_laddr_sa; 420 addrlen = (t_uscalar_t)so->so_laddr_len; 421 } 422 } else if (flags & _SOBIND_UNSPEC) { 423 ASSERT(name == NULL && namelen == 0); 424 425 /* 426 * The caller checked SS_ISBOUND but not necessarily 427 * under so_lock 428 */ 429 if (so->so_state & SS_ISBOUND) { 430 /* No error */ 431 goto done; 432 } 433 434 /* Set an initial local address */ 435 switch (so->so_family) { 436 case AF_UNIX: 437 /* 438 * Use an address with same size as struct sockaddr 439 * just like BSD. 440 */ 441 so->so_laddr_len = 442 (socklen_t)sizeof (struct sockaddr); 443 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 444 bzero(so->so_laddr_sa, so->so_laddr_len); 445 so->so_laddr_sa->sa_family = so->so_family; 446 447 /* 448 * Pass down an address with the implicit bind 449 * magic number and the rest all zeros. 450 * The transport will return a unique address. 451 */ 452 so->so_ux_laddr.soua_vp = NULL; 453 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 454 addr = &so->so_ux_laddr; 455 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 456 break; 457 458 case AF_INET: 459 case AF_INET6: 460 /* 461 * An unspecified bind in TPI has a NULL address. 462 * Set the address in sockfs to have the sa_family. 463 */ 464 so->so_laddr_len = (so->so_family == AF_INET) ? 465 (socklen_t)sizeof (sin_t) : 466 (socklen_t)sizeof (sin6_t); 467 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 468 bzero(so->so_laddr_sa, so->so_laddr_len); 469 so->so_laddr_sa->sa_family = so->so_family; 470 addr = NULL; 471 addrlen = 0; 472 break; 473 474 default: 475 /* 476 * An unspecified bind in TPI has a NULL address. 477 * Set the address in sockfs to be zero length. 478 * 479 * Can not assume there is a sa_family for all 480 * protocol families. For example, AF_X25 does not 481 * have a family field. 482 */ 483 bzero(so->so_laddr_sa, so->so_laddr_len); 484 so->so_laddr_len = 0; /* XXX correct? */ 485 addr = NULL; 486 addrlen = 0; 487 break; 488 } 489 490 } else { 491 if (so->so_state & SS_ISBOUND) { 492 /* 493 * If it is ok to rebind the socket, first unbind 494 * with the transport. A rebind to the NULL address 495 * is interpreted as an unbind. 496 * Note that a bind to NULL in BSD does unbind the 497 * socket but it fails with EINVAL. 498 * Note that regular sockets set SOV_SOCKBSD i.e. 499 * _SOBIND_SOCKBSD gets set here hence no type of 500 * socket does currently allow rebinding. 501 * 502 * If the name is NULL just do an unbind. 503 */ 504 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 505 name != NULL) { 506 error = EINVAL; 507 unbind_on_err = 0; 508 eprintsoline(so, error); 509 goto done; 510 } 511 if ((so->so_mode & SM_CONNREQUIRED) && 512 (so->so_state & SS_CANTREBIND)) { 513 error = EINVAL; 514 unbind_on_err = 0; 515 eprintsoline(so, error); 516 goto done; 517 } 518 error = sotpi_unbind(so, 0); 519 if (error) { 520 eprintsoline(so, error); 521 goto done; 522 } 523 ASSERT(!(so->so_state & SS_ISBOUND)); 524 if (name == NULL) { 525 so->so_state &= 526 ~(SS_ISCONNECTED|SS_ISCONNECTING); 527 goto done; 528 } 529 } 530 /* X/Open requires this check */ 531 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 532 if (xnet_check_print) { 533 printf("sockfs: X/Open bind state check " 534 "caused EINVAL\n"); 535 } 536 error = EINVAL; 537 goto done; 538 } 539 540 switch (so->so_family) { 541 case AF_UNIX: 542 /* 543 * All AF_UNIX addresses are nul terminated 544 * when copied (copyin_name) in so the minimum 545 * length is 3 bytes. 546 */ 547 if (name == NULL || 548 (ssize_t)namelen <= sizeof (short) + 1) { 549 error = EISDIR; 550 eprintsoline(so, error); 551 goto done; 552 } 553 /* 554 * Verify so_family matches the bound family. 555 * BSD does not check this for AF_UNIX resulting 556 * in funny mknods. 557 */ 558 if (name->sa_family != so->so_family) { 559 error = EAFNOSUPPORT; 560 goto done; 561 } 562 break; 563 case AF_INET: 564 if (name == NULL) { 565 error = EINVAL; 566 eprintsoline(so, error); 567 goto done; 568 } 569 if ((size_t)namelen != sizeof (sin_t)) { 570 error = name->sa_family != so->so_family ? 571 EAFNOSUPPORT : EINVAL; 572 eprintsoline(so, error); 573 goto done; 574 } 575 if ((flags & _SOBIND_XPG4_2) && 576 (name->sa_family != so->so_family)) { 577 /* 578 * This check has to be made for X/Open 579 * sockets however application failures have 580 * been observed when it is applied to 581 * all sockets. 582 */ 583 error = EAFNOSUPPORT; 584 eprintsoline(so, error); 585 goto done; 586 } 587 /* 588 * Force a zero sa_family to match so_family. 589 * 590 * Some programs like inetd(1M) don't set the 591 * family field. Other programs leave 592 * sin_family set to garbage - SunOS 4.X does 593 * not check the family field on a bind. 594 * We use the family field that 595 * was passed in to the socket() call. 596 */ 597 name->sa_family = so->so_family; 598 break; 599 600 case AF_INET6: { 601 #ifdef DEBUG 602 sin6_t *sin6 = (sin6_t *)name; 603 #endif /* DEBUG */ 604 605 if (name == NULL) { 606 error = EINVAL; 607 eprintsoline(so, error); 608 goto done; 609 } 610 if ((size_t)namelen != sizeof (sin6_t)) { 611 error = name->sa_family != so->so_family ? 612 EAFNOSUPPORT : EINVAL; 613 eprintsoline(so, error); 614 goto done; 615 } 616 if (name->sa_family != so->so_family) { 617 /* 618 * With IPv6 we require the family to match 619 * unlike in IPv4. 620 */ 621 error = EAFNOSUPPORT; 622 eprintsoline(so, error); 623 goto done; 624 } 625 #ifdef DEBUG 626 /* 627 * Verify that apps don't forget to clear 628 * sin6_scope_id etc 629 */ 630 if (sin6->sin6_scope_id != 0 && 631 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 632 zcmn_err(getzoneid(), CE_WARN, 633 "bind with uninitialized sin6_scope_id " 634 "(%d) on socket. Pid = %d\n", 635 (int)sin6->sin6_scope_id, 636 (int)curproc->p_pid); 637 } 638 if (sin6->__sin6_src_id != 0) { 639 zcmn_err(getzoneid(), CE_WARN, 640 "bind with uninitialized __sin6_src_id " 641 "(%d) on socket. Pid = %d\n", 642 (int)sin6->__sin6_src_id, 643 (int)curproc->p_pid); 644 } 645 #endif /* DEBUG */ 646 break; 647 } 648 default: 649 /* 650 * Don't do any length or sa_family check to allow 651 * non-sockaddr style addresses. 652 */ 653 if (name == NULL) { 654 error = EINVAL; 655 eprintsoline(so, error); 656 goto done; 657 } 658 break; 659 } 660 661 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 662 error = ENAMETOOLONG; 663 eprintsoline(so, error); 664 goto done; 665 } 666 /* 667 * Save local address. 668 */ 669 so->so_laddr_len = (socklen_t)namelen; 670 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 671 bcopy(name, so->so_laddr_sa, namelen); 672 673 addr = so->so_laddr_sa; 674 addrlen = (t_uscalar_t)so->so_laddr_len; 675 switch (so->so_family) { 676 case AF_INET6: 677 case AF_INET: 678 break; 679 case AF_UNIX: { 680 struct sockaddr_un *soun = 681 (struct sockaddr_un *)so->so_laddr_sa; 682 struct vnode *vp; 683 struct vattr vattr; 684 685 ASSERT(so->so_ux_bound_vp == NULL); 686 /* 687 * Create vnode for the specified path name. 688 * Keep vnode held with a reference in so_ux_bound_vp. 689 * Use the vnode pointer as the address used in the 690 * bind with the transport. 691 * 692 * Use the same mode as in BSD. In particular this does 693 * not observe the umask. 694 */ 695 /* MAXPATHLEN + soun_family + nul termination */ 696 if (so->so_laddr_len > 697 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 698 error = ENAMETOOLONG; 699 eprintsoline(so, error); 700 goto done; 701 } 702 vattr.va_type = VSOCK; 703 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 704 vattr.va_mask = AT_TYPE|AT_MODE; 705 /* NOTE: holding so_lock */ 706 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 707 EXCL, 0, &vp, CRMKNOD, 0, 0); 708 if (error) { 709 if (error == EEXIST) 710 error = EADDRINUSE; 711 eprintsoline(so, error); 712 goto done; 713 } 714 /* 715 * Establish pointer from the underlying filesystem 716 * vnode to the socket node. 717 * so_ux_bound_vp and v_stream->sd_vnode form the 718 * cross-linkage between the underlying filesystem 719 * node and the socket node. 720 */ 721 ASSERT(SOTOV(so)->v_stream); 722 mutex_enter(&vp->v_lock); 723 vp->v_stream = SOTOV(so)->v_stream; 724 so->so_ux_bound_vp = vp; 725 mutex_exit(&vp->v_lock); 726 727 /* 728 * Use the vnode pointer value as a unique address 729 * (together with the magic number to avoid conflicts 730 * with implicit binds) in the transport provider. 731 */ 732 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 733 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 734 addr = &so->so_ux_laddr; 735 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 736 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 737 addrlen, 738 ((struct so_ux_addr *)addr)->soua_vp)); 739 break; 740 } 741 } /* end switch (so->so_family) */ 742 } 743 744 /* 745 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 746 * the transport can start passing up T_CONN_IND messages 747 * as soon as it receives the bind req and strsock_proto() 748 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 749 */ 750 if (flags & _SOBIND_LISTEN) { 751 if ((so->so_state & SS_ACCEPTCONN) == 0) 752 clear_acceptconn_on_err = B_TRUE; 753 save_so_backlog = so->so_backlog; 754 restore_backlog_on_err = B_TRUE; 755 so->so_state |= SS_ACCEPTCONN; 756 so->so_backlog = backlog; 757 } 758 759 /* 760 * If NL7C addr(s) have been configured check for addr/port match, 761 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 762 * 763 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 764 * family sockets only. If match mark as such. 765 */ 766 if (nl7c_enabled && ((addr != NULL && 767 (so->so_family == AF_INET || so->so_family == AF_INET6) && 768 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 769 so->so_nl7c_flags == NL7C_AF_NCA)) { 770 /* 771 * NL7C is not supported in non-global zones, 772 * we enforce this restriction here. 773 */ 774 if (so->so_zoneid == GLOBAL_ZONEID) { 775 /* An NL7C socket, mark it */ 776 so->so_nl7c_flags |= NL7C_ENABLED; 777 if (nl7c == NULL) { 778 /* 779 * Was an AF_NCA bind() so add it to the 780 * addr list for reporting purposes. 781 */ 782 nl7c = nl7c_add_addr(addr, addrlen); 783 } 784 } else 785 nl7c = NULL; 786 } 787 /* 788 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 789 * for other transports we will send in a O_T_BIND_REQ. 790 */ 791 if (tcp_udp_xport && 792 (so->so_family == AF_INET || so->so_family == AF_INET6)) 793 PRIM_type = T_BIND_REQ; 794 795 bind_req.PRIM_type = PRIM_type; 796 bind_req.ADDR_length = addrlen; 797 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 798 bind_req.CONIND_number = backlog; 799 /* NOTE: holding so_lock while sleeping */ 800 mp = soallocproto2(&bind_req, sizeof (bind_req), 801 addr, addrlen, 0, _ALLOC_SLEEP); 802 so->so_state &= ~SS_LADDR_VALID; 803 804 /* Done using so_laddr_sa - can drop the lock */ 805 mutex_exit(&so->so_lock); 806 807 /* 808 * Intercept the bind_req message here to check if this <address/port> 809 * was configured as an SSL proxy server, or if another endpoint was 810 * already configured to act as a proxy for us. 811 * 812 * Note, only if NL7C not enabled for this socket. 813 */ 814 if (nl7c == NULL && 815 (so->so_family == AF_INET || so->so_family == AF_INET6) && 816 so->so_type == SOCK_STREAM) { 817 818 if (so->so_kssl_ent != NULL) { 819 kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type); 820 so->so_kssl_ent = NULL; 821 } 822 823 so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent); 824 switch (so->so_kssl_type) { 825 case KSSL_NO_PROXY: 826 break; 827 828 case KSSL_HAS_PROXY: 829 mutex_enter(&so->so_lock); 830 goto skip_transport; 831 832 case KSSL_IS_PROXY: 833 break; 834 } 835 } 836 837 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 838 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 839 if (error) { 840 eprintsoline(so, error); 841 mutex_enter(&so->so_lock); 842 goto done; 843 } 844 845 mutex_enter(&so->so_lock); 846 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 847 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 848 if (error) { 849 eprintsoline(so, error); 850 goto done; 851 } 852 skip_transport: 853 ASSERT(mp); 854 /* 855 * Even if some TPI message (e.g. T_DISCON_IND) was received in 856 * strsock_proto while the lock was dropped above, the bind 857 * is allowed to complete. 858 */ 859 860 /* Mark as bound. This will be undone if we detect errors below. */ 861 if (flags & _SOBIND_NOXLATE) { 862 ASSERT(so->so_family == AF_UNIX); 863 so->so_state |= SS_FADDR_NOXLATE; 864 } 865 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 866 so->so_state |= SS_ISBOUND; 867 ASSERT(so->so_unbind_mp); 868 869 /* note that we've already set SS_ACCEPTCONN above */ 870 871 /* 872 * Recompute addrlen - an unspecied bind sent down an 873 * address of length zero but we expect the appropriate length 874 * in return. 875 */ 876 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 877 sizeof (so->so_ux_laddr) : so->so_laddr_len); 878 879 bind_ack = (struct T_bind_ack *)mp->b_rptr; 880 /* 881 * The alignment restriction is really too strict but 882 * we want enough alignment to inspect the fields of 883 * a sockaddr_in. 884 */ 885 addr = sogetoff(mp, bind_ack->ADDR_offset, 886 bind_ack->ADDR_length, 887 __TPI_ALIGN_SIZE); 888 if (addr == NULL) { 889 freemsg(mp); 890 error = EPROTO; 891 eprintsoline(so, error); 892 goto done; 893 } 894 if (!(flags & _SOBIND_UNSPEC)) { 895 /* 896 * Verify that the transport didn't return something we 897 * did not want e.g. an address other than what we asked for. 898 * 899 * NOTE: These checks would go away if/when we switch to 900 * using the new TPI (in which the transport would fail 901 * the request instead of assigning a different address). 902 * 903 * NOTE2: For protocols that we don't know (i.e. any 904 * other than AF_INET6, AF_INET and AF_UNIX), we 905 * cannot know if the transport should be expected to 906 * return the same address as that requested. 907 * 908 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 909 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 910 * 911 * For example, in the case of netatalk it may be 912 * inappropriate for the transport to return the 913 * requested address (as it may have allocated a local 914 * port number in behaviour similar to that of an 915 * AF_INET bind request with a port number of zero). 916 * 917 * Given the definition of O_T_BIND_REQ, where the 918 * transport may bind to an address other than the 919 * requested address, it's not possible to determine 920 * whether a returned address that differs from the 921 * requested address is a reason to fail (because the 922 * requested address was not available) or succeed 923 * (because the transport allocated an appropriate 924 * address and/or port). 925 * 926 * sockfs currently requires that the transport return 927 * the requested address in the T_BIND_ACK, unless 928 * there is code here to allow for any discrepancy. 929 * Such code exists for AF_INET and AF_INET6. 930 * 931 * Netatalk chooses to return the requested address 932 * rather than the (correct) allocated address. This 933 * means that netatalk violates the TPI specification 934 * (and would not function correctly if used from a 935 * TLI application), but it does mean that it works 936 * with sockfs. 937 * 938 * As noted above, using the newer XTI bind primitive 939 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 940 * allow sockfs to be more sure about whether or not 941 * the bind request had succeeded (as transports are 942 * not permitted to bind to a different address than 943 * that requested - they must return failure). 944 * Unfortunately, support for T_BIND_REQ may not be 945 * present in all transport implementations (netatalk, 946 * for example, doesn't have it), making the 947 * transition difficult. 948 */ 949 if (bind_ack->ADDR_length != addrlen) { 950 /* Assumes that the requested address was in use */ 951 freemsg(mp); 952 error = EADDRINUSE; 953 eprintsoline(so, error); 954 goto done; 955 } 956 957 switch (so->so_family) { 958 case AF_INET6: 959 case AF_INET: { 960 sin_t *rname, *aname; 961 962 rname = (sin_t *)addr; 963 aname = (sin_t *)so->so_laddr_sa; 964 965 /* 966 * Take advantage of the alignment 967 * of sin_port and sin6_port which fall 968 * in the same place in their data structures. 969 * Just use sin_port for either address family. 970 * 971 * This may become a problem if (heaven forbid) 972 * there's a separate ipv6port_reserved... :-P 973 * 974 * Binding to port 0 has the semantics of letting 975 * the transport bind to any port. 976 * 977 * If the transport is TCP or UDP since we had sent 978 * a T_BIND_REQ we would not get a port other than 979 * what we asked for. 980 */ 981 if (tcp_udp_xport) { 982 /* 983 * Pick up the new port number if we bound to 984 * port 0. 985 */ 986 if (aname->sin_port == 0) 987 aname->sin_port = rname->sin_port; 988 so->so_state |= SS_LADDR_VALID; 989 break; 990 } 991 if (aname->sin_port != 0 && 992 aname->sin_port != rname->sin_port) { 993 freemsg(mp); 994 error = EADDRINUSE; 995 eprintsoline(so, error); 996 goto done; 997 } 998 /* 999 * Pick up the new port number if we bound to port 0. 1000 */ 1001 aname->sin_port = rname->sin_port; 1002 1003 /* 1004 * Unfortunately, addresses aren't _quite_ the same. 1005 */ 1006 if (so->so_family == AF_INET) { 1007 if (aname->sin_addr.s_addr != 1008 rname->sin_addr.s_addr) { 1009 freemsg(mp); 1010 error = EADDRNOTAVAIL; 1011 eprintsoline(so, error); 1012 goto done; 1013 } 1014 } else { 1015 sin6_t *rname6 = (sin6_t *)rname; 1016 sin6_t *aname6 = (sin6_t *)aname; 1017 1018 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1019 &rname6->sin6_addr)) { 1020 freemsg(mp); 1021 error = EADDRNOTAVAIL; 1022 eprintsoline(so, error); 1023 goto done; 1024 } 1025 } 1026 break; 1027 } 1028 case AF_UNIX: 1029 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 1030 freemsg(mp); 1031 error = EADDRINUSE; 1032 eprintsoline(so, error); 1033 eprintso(so, 1034 ("addrlen %d, addr 0x%x, vp %p\n", 1035 addrlen, *((int *)addr), 1036 so->so_ux_bound_vp)); 1037 goto done; 1038 } 1039 so->so_state |= SS_LADDR_VALID; 1040 break; 1041 default: 1042 /* 1043 * NOTE: This assumes that addresses can be 1044 * byte-compared for equivalence. 1045 */ 1046 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 1047 freemsg(mp); 1048 error = EADDRINUSE; 1049 eprintsoline(so, error); 1050 goto done; 1051 } 1052 /* 1053 * Don't mark SS_LADDR_VALID, as we cannot be 1054 * sure that the returned address is the real 1055 * bound address when talking to an unknown 1056 * transport. 1057 */ 1058 break; 1059 } 1060 } else { 1061 /* 1062 * Save for returned address for getsockname. 1063 * Needed for unspecific bind unless transport supports 1064 * the TI_GETMYNAME ioctl. 1065 * Do this for AF_INET{,6} even though they do, as 1066 * caching info here is much better performance than 1067 * a TPI/STREAMS trip to the transport for getsockname. 1068 * Any which can't for some reason _must_ _not_ set 1069 * LADDR_VALID here for the caching version of getsockname 1070 * to not break; 1071 */ 1072 switch (so->so_family) { 1073 case AF_UNIX: 1074 /* 1075 * Record the address bound with the transport 1076 * for use by socketpair. 1077 */ 1078 bcopy(addr, &so->so_ux_laddr, addrlen); 1079 so->so_state |= SS_LADDR_VALID; 1080 break; 1081 case AF_INET: 1082 case AF_INET6: 1083 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1084 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1085 so->so_state |= SS_LADDR_VALID; 1086 break; 1087 default: 1088 /* 1089 * Don't mark SS_LADDR_VALID, as we cannot be 1090 * sure that the returned address is the real 1091 * bound address when talking to an unknown 1092 * transport. 1093 */ 1094 break; 1095 } 1096 } 1097 1098 if (nl7c != NULL) { 1099 /* Register listen()er sonode pointer with NL7C */ 1100 nl7c_listener_addr(nl7c, so); 1101 } 1102 1103 freemsg(mp); 1104 1105 done: 1106 if (error) { 1107 /* reset state & backlog to values held on entry */ 1108 if (clear_acceptconn_on_err == B_TRUE) 1109 so->so_state &= ~SS_ACCEPTCONN; 1110 if (restore_backlog_on_err == B_TRUE) 1111 so->so_backlog = save_so_backlog; 1112 1113 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1114 int err; 1115 1116 err = sotpi_unbind(so, 0); 1117 /* LINTED - statement has no consequent: if */ 1118 if (err) { 1119 eprintsoline(so, error); 1120 } else { 1121 ASSERT(!(so->so_state & SS_ISBOUND)); 1122 } 1123 } 1124 } 1125 if (!(flags & _SOBIND_LOCK_HELD)) { 1126 so_unlock_single(so, SOLOCKED); 1127 mutex_exit(&so->so_lock); 1128 } else { 1129 /* If the caller held the lock don't release it here */ 1130 ASSERT(MUTEX_HELD(&so->so_lock)); 1131 ASSERT(so->so_flag & SOLOCKED); 1132 } 1133 return (error); 1134 } 1135 1136 /* bind the socket */ 1137 static int 1138 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1139 int flags) 1140 { 1141 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1142 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1143 1144 flags &= ~_SOBIND_SOCKETPAIR; 1145 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1146 } 1147 1148 /* 1149 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1150 * address, or when listen needs to unbind and bind. 1151 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1152 * so that a sobind can pick them up. 1153 */ 1154 static int 1155 sotpi_unbind(struct sonode *so, int flags) 1156 { 1157 struct T_unbind_req unbind_req; 1158 int error = 0; 1159 mblk_t *mp; 1160 1161 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1162 so, flags, pr_state(so->so_state, so->so_mode))); 1163 1164 ASSERT(MUTEX_HELD(&so->so_lock)); 1165 ASSERT(so->so_flag & SOLOCKED); 1166 1167 if (!(so->so_state & SS_ISBOUND)) { 1168 error = EINVAL; 1169 eprintsoline(so, error); 1170 goto done; 1171 } 1172 1173 mutex_exit(&so->so_lock); 1174 1175 /* 1176 * Flush the read and write side (except stream head read queue) 1177 * and send down T_UNBIND_REQ. 1178 */ 1179 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1180 1181 unbind_req.PRIM_type = T_UNBIND_REQ; 1182 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1183 0, _ALLOC_SLEEP); 1184 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1185 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1186 mutex_enter(&so->so_lock); 1187 if (error) { 1188 eprintsoline(so, error); 1189 goto done; 1190 } 1191 1192 error = sowaitokack(so, T_UNBIND_REQ); 1193 if (error) { 1194 eprintsoline(so, error); 1195 goto done; 1196 } 1197 1198 /* 1199 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1200 * strsock_proto while the lock was dropped above, the unbind 1201 * is allowed to complete. 1202 */ 1203 if (!(flags & _SOUNBIND_REBIND)) { 1204 /* 1205 * Clear out bound address. 1206 */ 1207 vnode_t *vp; 1208 1209 if ((vp = so->so_ux_bound_vp) != NULL) { 1210 1211 /* Undo any SSL proxy setup */ 1212 if ((so->so_family == AF_INET || 1213 so->so_family == AF_INET6) && 1214 (so->so_type == SOCK_STREAM) && 1215 (so->so_kssl_ent != NULL)) { 1216 kssl_release_ent(so->so_kssl_ent, so, 1217 so->so_kssl_type); 1218 so->so_kssl_ent = NULL; 1219 so->so_kssl_type = KSSL_NO_PROXY; 1220 } 1221 1222 so->so_ux_bound_vp = NULL; 1223 vn_rele_stream(vp); 1224 } 1225 /* Clear out address */ 1226 so->so_laddr_len = 0; 1227 } 1228 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1229 1230 done: 1231 1232 /* If the caller held the lock don't release it here */ 1233 ASSERT(MUTEX_HELD(&so->so_lock)); 1234 ASSERT(so->so_flag & SOLOCKED); 1235 1236 return (error); 1237 } 1238 1239 /* 1240 * listen on the socket. 1241 * For TPI conforming transports this has to first unbind with the transport 1242 * and then bind again using the new backlog. 1243 */ 1244 int 1245 sotpi_listen(struct sonode *so, int backlog) 1246 { 1247 int error = 0; 1248 1249 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1250 so, backlog, pr_state(so->so_state, so->so_mode))); 1251 1252 if (so->so_serv_type == T_CLTS) 1253 return (EOPNOTSUPP); 1254 1255 /* 1256 * If the socket is ready to accept connections already, then 1257 * return without doing anything. This avoids a problem where 1258 * a second listen() call fails if a connection is pending and 1259 * leaves the socket unbound. Only when we are not unbinding 1260 * with the transport can we safely increase the backlog. 1261 */ 1262 if (so->so_state & SS_ACCEPTCONN && 1263 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1264 /*CONSTCOND*/ 1265 !solisten_tpi_tcp)) 1266 return (0); 1267 1268 if (so->so_state & SS_ISCONNECTED) 1269 return (EINVAL); 1270 1271 mutex_enter(&so->so_lock); 1272 so_lock_single(so); /* Set SOLOCKED */ 1273 1274 if (backlog < 0) 1275 backlog = 0; 1276 /* 1277 * Use the same qlimit as in BSD. BSD checks the qlimit 1278 * before queuing the next connection implying that a 1279 * listen(sock, 0) allows one connection to be queued. 1280 * BSD also uses 1.5 times the requested backlog. 1281 * 1282 * XNS Issue 4 required a strict interpretation of the backlog. 1283 * This has been waived subsequently for Issue 4 and the change 1284 * incorporated in XNS Issue 5. So we aren't required to do 1285 * anything special for XPG apps. 1286 */ 1287 if (backlog >= (INT_MAX - 1) / 3) 1288 backlog = INT_MAX; 1289 else 1290 backlog = backlog * 3 / 2 + 1; 1291 1292 /* 1293 * If the listen doesn't change the backlog we do nothing. 1294 * This avoids an EPROTO error from the transport. 1295 */ 1296 if ((so->so_state & SS_ACCEPTCONN) && 1297 so->so_backlog == backlog) 1298 goto done; 1299 1300 if (!(so->so_state & SS_ISBOUND)) { 1301 /* 1302 * Must have been explicitly bound in the UNIX domain. 1303 */ 1304 if (so->so_family == AF_UNIX) { 1305 error = EINVAL; 1306 goto done; 1307 } 1308 error = sotpi_bindlisten(so, NULL, 0, backlog, 1309 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1310 } else if (backlog > 0) { 1311 /* 1312 * AF_INET{,6} hack to avoid losing the port. 1313 * Assumes that all AF_INET{,6} transports can handle a 1314 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1315 * has already bound thus it is possible to avoid the unbind. 1316 */ 1317 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1318 /*CONSTCOND*/ 1319 !solisten_tpi_tcp)) { 1320 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1321 if (error) 1322 goto done; 1323 } 1324 error = sotpi_bindlisten(so, NULL, 0, backlog, 1325 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1326 } else { 1327 so->so_state |= SS_ACCEPTCONN; 1328 so->so_backlog = backlog; 1329 } 1330 if (error) 1331 goto done; 1332 ASSERT(so->so_state & SS_ACCEPTCONN); 1333 done: 1334 so_unlock_single(so, SOLOCKED); 1335 mutex_exit(&so->so_lock); 1336 return (error); 1337 } 1338 1339 /* 1340 * Disconnect either a specified seqno or all (-1). 1341 * The former is used on listening sockets only. 1342 * 1343 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1344 * the current use of sodisconnect(seqno == -1) is only for shutdown 1345 * so there is no point (and potentially incorrect) to unbind. 1346 */ 1347 int 1348 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1349 { 1350 struct T_discon_req discon_req; 1351 int error = 0; 1352 mblk_t *mp; 1353 1354 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1355 so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1356 1357 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1358 mutex_enter(&so->so_lock); 1359 so_lock_single(so); /* Set SOLOCKED */ 1360 } else { 1361 ASSERT(MUTEX_HELD(&so->so_lock)); 1362 ASSERT(so->so_flag & SOLOCKED); 1363 } 1364 1365 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1366 error = EINVAL; 1367 eprintsoline(so, error); 1368 goto done; 1369 } 1370 1371 mutex_exit(&so->so_lock); 1372 /* 1373 * Flush the write side (unless this is a listener) 1374 * and then send down a T_DISCON_REQ. 1375 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1376 * and other messages.) 1377 */ 1378 if (!(so->so_state & SS_ACCEPTCONN)) 1379 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1380 1381 discon_req.PRIM_type = T_DISCON_REQ; 1382 discon_req.SEQ_number = seqno; 1383 mp = soallocproto1(&discon_req, sizeof (discon_req), 1384 0, _ALLOC_SLEEP); 1385 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1386 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1387 mutex_enter(&so->so_lock); 1388 if (error) { 1389 eprintsoline(so, error); 1390 goto done; 1391 } 1392 1393 error = sowaitokack(so, T_DISCON_REQ); 1394 if (error) { 1395 eprintsoline(so, error); 1396 goto done; 1397 } 1398 /* 1399 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1400 * strsock_proto while the lock was dropped above, the disconnect 1401 * is allowed to complete. However, it is not possible to 1402 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1403 */ 1404 so->so_state &= 1405 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1406 done: 1407 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1408 so_unlock_single(so, SOLOCKED); 1409 mutex_exit(&so->so_lock); 1410 } else { 1411 /* If the caller held the lock don't release it here */ 1412 ASSERT(MUTEX_HELD(&so->so_lock)); 1413 ASSERT(so->so_flag & SOLOCKED); 1414 } 1415 return (error); 1416 } 1417 1418 int 1419 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1420 { 1421 struct T_conn_ind *conn_ind; 1422 struct T_conn_res *conn_res; 1423 int error = 0; 1424 mblk_t *mp, *ctxmp, *ack_mp; 1425 struct sonode *nso; 1426 vnode_t *nvp; 1427 void *src; 1428 t_uscalar_t srclen; 1429 void *opt; 1430 t_uscalar_t optlen; 1431 t_scalar_t PRIM_type; 1432 t_scalar_t SEQ_number; 1433 size_t sinlen; 1434 1435 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1436 so, fflag, nsop, pr_state(so->so_state, so->so_mode))); 1437 1438 /* 1439 * Defer single-threading the accepting socket until 1440 * the T_CONN_IND has been received and parsed and the 1441 * new sonode has been opened. 1442 */ 1443 1444 /* Check that we are not already connected */ 1445 if ((so->so_state & SS_ACCEPTCONN) == 0) 1446 goto conn_bad; 1447 again: 1448 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1449 goto e_bad; 1450 1451 ASSERT(mp); 1452 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1453 ctxmp = mp->b_cont; 1454 1455 /* 1456 * Save SEQ_number for error paths. 1457 */ 1458 SEQ_number = conn_ind->SEQ_number; 1459 1460 srclen = conn_ind->SRC_length; 1461 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1462 if (src == NULL) { 1463 error = EPROTO; 1464 freemsg(mp); 1465 eprintsoline(so, error); 1466 goto disconnect_unlocked; 1467 } 1468 optlen = conn_ind->OPT_length; 1469 switch (so->so_family) { 1470 case AF_INET: 1471 case AF_INET6: 1472 if ((optlen == sizeof (intptr_t)) && 1473 ((so->so_state & SS_DIRECT) != 0)) { 1474 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1475 &opt, conn_ind->OPT_length); 1476 } else { 1477 /* 1478 * The transport (in this case TCP) hasn't sent up 1479 * a pointer to an instance for the accept fast-path. 1480 * Disable fast-path completely because the call to 1481 * sotpi_create() below would otherwise create an 1482 * incomplete TCP instance, which would lead to 1483 * problems when sockfs sends a normal T_CONN_RES 1484 * message down the new stream. 1485 */ 1486 if (so->so_state & SS_DIRECT) { 1487 int rval; 1488 /* 1489 * For consistency we inform tcp to disable 1490 * direct interface on the listener, though 1491 * we can certainly live without doing this 1492 * because no data will ever travel upstream 1493 * on the listening socket. 1494 */ 1495 so->so_state &= ~SS_DIRECT; 1496 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1497 0, 0, K_TO_K, CRED(), &rval); 1498 } 1499 opt = NULL; 1500 optlen = 0; 1501 } 1502 break; 1503 case AF_UNIX: 1504 default: 1505 if (optlen != 0) { 1506 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1507 __TPI_ALIGN_SIZE); 1508 if (opt == NULL) { 1509 error = EPROTO; 1510 freemsg(mp); 1511 eprintsoline(so, error); 1512 goto disconnect_unlocked; 1513 } 1514 } 1515 if (so->so_family == AF_UNIX) { 1516 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1517 src = NULL; 1518 srclen = 0; 1519 } 1520 /* Extract src address from options */ 1521 if (optlen != 0) 1522 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1523 } 1524 break; 1525 } 1526 1527 /* 1528 * Create the new socket. 1529 */ 1530 VN_HOLD(so->so_accessvp); 1531 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1532 so->so_protocol, so->so_version, so, &error); 1533 if (nso == NULL) { 1534 ASSERT(error != 0); 1535 /* 1536 * Accept can not fail with ENOBUFS. sotpi_create 1537 * sleeps waiting for memory until a signal is caught 1538 * so return EINTR. 1539 */ 1540 freemsg(mp); 1541 if (error == ENOBUFS) 1542 error = EINTR; 1543 goto e_disc_unl; 1544 } 1545 nvp = SOTOV(nso); 1546 1547 /* 1548 * If the transport sent up an SSL connection context, then attach 1549 * it the new socket, and set the (sd_wputdatafunc)() and 1550 * (sd_rputdatafunc)() stream head hooks to intercept and process 1551 * SSL records. 1552 */ 1553 if (ctxmp != NULL) { 1554 /* 1555 * This kssl_ctx_t is already held for us by the transport. 1556 * So, we don't need to do a kssl_hold_ctx() here. 1557 */ 1558 nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1559 freemsg(ctxmp); 1560 mp->b_cont = NULL; 1561 strsetrwputdatahooks(nvp, strsock_kssl_input, 1562 strsock_kssl_output); 1563 } 1564 #ifdef DEBUG 1565 /* 1566 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1567 * it's inherited early to allow debugging of the accept code itself. 1568 */ 1569 nso->so_options |= so->so_options & SO_DEBUG; 1570 #endif /* DEBUG */ 1571 1572 /* 1573 * Save the SRC address from the T_CONN_IND 1574 * for getpeername to work on AF_UNIX and on transports that do not 1575 * support TI_GETPEERNAME. 1576 * 1577 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1578 * copyin_name(). 1579 */ 1580 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1581 error = EINVAL; 1582 freemsg(mp); 1583 eprintsoline(so, error); 1584 goto disconnect_vp_unlocked; 1585 } 1586 nso->so_faddr_len = (socklen_t)srclen; 1587 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1588 bcopy(src, nso->so_faddr_sa, srclen); 1589 nso->so_state |= SS_FADDR_VALID; 1590 1591 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1592 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1593 cred_t *cr; 1594 1595 if ((cr = DB_CRED(mp)) != NULL) { 1596 crhold(cr); 1597 nso->so_peercred = cr; 1598 nso->so_cpid = DB_CPID(mp); 1599 } 1600 freemsg(mp); 1601 1602 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1603 sizeof (intptr_t), 0, _ALLOC_INTR); 1604 if (mp == NULL) { 1605 /* 1606 * Accept can not fail with ENOBUFS. 1607 * A signal was caught so return EINTR. 1608 */ 1609 error = EINTR; 1610 eprintsoline(so, error); 1611 goto disconnect_vp_unlocked; 1612 } 1613 conn_res = (struct T_conn_res *)mp->b_rptr; 1614 } else { 1615 nso->so_peercred = DB_CRED(mp); 1616 nso->so_cpid = DB_CPID(mp); 1617 DB_CRED(mp) = NULL; 1618 1619 mp->b_rptr = DB_BASE(mp); 1620 conn_res = (struct T_conn_res *)mp->b_rptr; 1621 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1622 } 1623 1624 /* 1625 * New socket must be bound at least in sockfs and, except for AF_INET, 1626 * (or AF_INET6) it also has to be bound in the transport provider. 1627 * We set the local address in the sonode from the T_OK_ACK of the 1628 * T_CONN_RES. For this reason the address we bind to here isn't 1629 * important. 1630 */ 1631 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1632 /*CONSTCOND*/ 1633 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1634 /* 1635 * Optimization for AF_INET{,6} transports 1636 * that can handle a T_CONN_RES without being bound. 1637 */ 1638 mutex_enter(&nso->so_lock); 1639 so_automatic_bind(nso); 1640 mutex_exit(&nso->so_lock); 1641 } else { 1642 /* Perform NULL bind with the transport provider. */ 1643 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1644 ASSERT(error != ENOBUFS); 1645 freemsg(mp); 1646 eprintsoline(nso, error); 1647 goto disconnect_vp_unlocked; 1648 } 1649 } 1650 1651 /* 1652 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1653 * so that any data arriving on the new socket will cause the 1654 * appropriate signals to be delivered for the new socket. 1655 * 1656 * No other thread (except strsock_proto and strsock_misc) 1657 * can access the new socket thus we relax the locking. 1658 */ 1659 nso->so_pgrp = so->so_pgrp; 1660 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1661 1662 if (nso->so_pgrp != 0) { 1663 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1664 eprintsoline(nso, error); 1665 error = 0; 1666 nso->so_pgrp = 0; 1667 } 1668 } 1669 1670 /* 1671 * Make note of the socket level options. TCP and IP level options 1672 * are already inherited. We could do all this after accept is 1673 * successful but doing it here simplifies code and no harm done 1674 * for error case. 1675 */ 1676 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1677 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1678 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1679 nso->so_sndbuf = so->so_sndbuf; 1680 nso->so_rcvbuf = so->so_rcvbuf; 1681 if (nso->so_options & SO_LINGER) 1682 nso->so_linger = so->so_linger; 1683 1684 if ((so->so_state & SS_DIRECT) != 0) { 1685 1686 ASSERT(opt != NULL); 1687 1688 conn_res->OPT_length = optlen; 1689 conn_res->OPT_offset = MBLKL(mp); 1690 bcopy(&opt, mp->b_wptr, optlen); 1691 mp->b_wptr += optlen; 1692 conn_res->PRIM_type = T_CONN_RES; 1693 conn_res->ACCEPTOR_id = 0; 1694 PRIM_type = T_CONN_RES; 1695 1696 /* Send down the T_CONN_RES on acceptor STREAM */ 1697 error = kstrputmsg(SOTOV(nso), mp, NULL, 1698 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1699 if (error) { 1700 mutex_enter(&so->so_lock); 1701 so_lock_single(so); 1702 eprintsoline(so, error); 1703 goto disconnect_vp; 1704 } 1705 mutex_enter(&nso->so_lock); 1706 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1707 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1708 if (error) { 1709 mutex_exit(&nso->so_lock); 1710 mutex_enter(&so->so_lock); 1711 so_lock_single(so); 1712 eprintsoline(so, error); 1713 goto disconnect_vp; 1714 } 1715 if (nso->so_family == AF_INET) { 1716 sin_t *sin; 1717 1718 sin = (sin_t *)(ack_mp->b_rptr + 1719 sizeof (struct T_ok_ack)); 1720 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1721 nso->so_laddr_len = sizeof (sin_t); 1722 } else { 1723 sin6_t *sin6; 1724 1725 sin6 = (sin6_t *)(ack_mp->b_rptr + 1726 sizeof (struct T_ok_ack)); 1727 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1728 nso->so_laddr_len = sizeof (sin6_t); 1729 } 1730 freemsg(ack_mp); 1731 1732 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1733 nso->so_priv = opt; 1734 1735 if (so->so_nl7c_flags & NL7C_ENABLED) { 1736 /* 1737 * A NL7C marked listen()er so the new socket 1738 * inherits the listen()er's NL7C state, except 1739 * for NL7C_POLLIN. 1740 * 1741 * Only call NL7C to process the new socket if 1742 * the listen socket allows blocking i/o. 1743 */ 1744 nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN); 1745 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1746 /* 1747 * Nonblocking accept() just make it 1748 * persist to defer processing to the 1749 * read-side syscall (e.g. read). 1750 */ 1751 nso->so_nl7c_flags |= NL7C_SOPERSIST; 1752 } else if (nl7c_process(nso, B_FALSE)) { 1753 /* 1754 * NL7C has completed processing on the 1755 * socket, close the socket and back to 1756 * the top to await the next T_CONN_IND. 1757 */ 1758 mutex_exit(&nso->so_lock); 1759 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1760 CRED(), NULL); 1761 VN_RELE(nvp); 1762 goto again; 1763 } 1764 /* Pass the new socket out */ 1765 } 1766 1767 mutex_exit(&nso->so_lock); 1768 1769 /* 1770 * It's possible, through the use of autopush for example, 1771 * that the acceptor stream may not support SS_DIRECT 1772 * semantics. If the new socket does not support SS_DIRECT 1773 * we issue a _SIOCSOCKFALLBACK to inform the transport 1774 * as we would in the I_PUSH case. 1775 */ 1776 if (!(nso->so_state & SS_DIRECT)) { 1777 int rval; 1778 1779 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1780 0, 0, K_TO_K, CRED(), &rval)) != 0) { 1781 mutex_enter(&so->so_lock); 1782 so_lock_single(so); 1783 eprintsoline(so, error); 1784 goto disconnect_vp; 1785 } 1786 } 1787 1788 /* 1789 * Pass out new socket. 1790 */ 1791 if (nsop != NULL) 1792 *nsop = nso; 1793 1794 return (0); 1795 } 1796 1797 /* 1798 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1799 * which don't support the FireEngine accept fast-path. It is also 1800 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1801 * again. Neither sockfs nor TCP attempt to find out if some other 1802 * random module has been inserted in between (in which case we 1803 * should follow TLI accept behaviour). We blindly assume the worst 1804 * case and revert back to old behaviour i.e. TCP will not send us 1805 * any option (eager) and the accept should happen on the listener 1806 * queue. Any queued T_conn_ind have already got their options removed 1807 * by so_sock2_stream() when "sockmod" was I_POP'd. 1808 */ 1809 /* 1810 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1811 */ 1812 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1813 #ifdef _ILP32 1814 queue_t *q; 1815 1816 /* 1817 * Find read queue in driver 1818 * Can safely do this since we "own" nso/nvp. 1819 */ 1820 q = strvp2wq(nvp)->q_next; 1821 while (SAMESTR(q)) 1822 q = q->q_next; 1823 q = RD(q); 1824 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1825 #else 1826 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1827 #endif /* _ILP32 */ 1828 conn_res->PRIM_type = O_T_CONN_RES; 1829 PRIM_type = O_T_CONN_RES; 1830 } else { 1831 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1832 conn_res->PRIM_type = T_CONN_RES; 1833 PRIM_type = T_CONN_RES; 1834 } 1835 conn_res->SEQ_number = SEQ_number; 1836 conn_res->OPT_length = 0; 1837 conn_res->OPT_offset = 0; 1838 1839 mutex_enter(&so->so_lock); 1840 so_lock_single(so); /* Set SOLOCKED */ 1841 mutex_exit(&so->so_lock); 1842 1843 error = kstrputmsg(SOTOV(so), mp, NULL, 1844 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1845 mutex_enter(&so->so_lock); 1846 if (error) { 1847 eprintsoline(so, error); 1848 goto disconnect_vp; 1849 } 1850 error = sowaitprim(so, PRIM_type, T_OK_ACK, 1851 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1852 if (error) { 1853 eprintsoline(so, error); 1854 goto disconnect_vp; 1855 } 1856 /* 1857 * If there is a sin/sin6 appended onto the T_OK_ACK use 1858 * that to set the local address. If this is not present 1859 * then we zero out the address and don't set the 1860 * SS_LADDR_VALID bit. For AF_UNIX endpoints we copy over 1861 * the pathname from the listening socket. 1862 */ 1863 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 1864 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 1865 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 1866 ack_mp->b_rptr += sizeof (struct T_ok_ack); 1867 bcopy(ack_mp->b_rptr, nso->so_laddr_sa, sinlen); 1868 nso->so_laddr_len = sinlen; 1869 nso->so_state |= SS_LADDR_VALID; 1870 } else if (nso->so_family == AF_UNIX) { 1871 ASSERT(so->so_family == AF_UNIX); 1872 nso->so_laddr_len = so->so_laddr_len; 1873 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1874 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1875 nso->so_state |= SS_LADDR_VALID; 1876 } else { 1877 nso->so_laddr_len = so->so_laddr_len; 1878 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1879 bzero(nso->so_laddr_sa, nso->so_addr_size); 1880 nso->so_laddr_sa->sa_family = nso->so_family; 1881 } 1882 freemsg(ack_mp); 1883 1884 so_unlock_single(so, SOLOCKED); 1885 mutex_exit(&so->so_lock); 1886 1887 nso->so_state |= SS_ISCONNECTED; 1888 1889 /* 1890 * Pass out new socket. 1891 */ 1892 if (nsop != NULL) 1893 *nsop = nso; 1894 1895 return (0); 1896 1897 1898 eproto_disc_unl: 1899 error = EPROTO; 1900 e_disc_unl: 1901 eprintsoline(so, error); 1902 goto disconnect_unlocked; 1903 1904 pr_disc_vp_unl: 1905 eprintsoline(so, error); 1906 disconnect_vp_unlocked: 1907 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1908 VN_RELE(nvp); 1909 disconnect_unlocked: 1910 (void) sodisconnect(so, SEQ_number, 0); 1911 return (error); 1912 1913 pr_disc_vp: 1914 eprintsoline(so, error); 1915 disconnect_vp: 1916 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1917 so_unlock_single(so, SOLOCKED); 1918 mutex_exit(&so->so_lock); 1919 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1920 VN_RELE(nvp); 1921 return (error); 1922 1923 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1924 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1925 ? EOPNOTSUPP : EINVAL; 1926 e_bad: 1927 eprintsoline(so, error); 1928 return (error); 1929 } 1930 1931 /* 1932 * connect a socket. 1933 * 1934 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1935 * unconnect (by specifying a null address). 1936 */ 1937 int 1938 sotpi_connect(struct sonode *so, 1939 const struct sockaddr *name, 1940 socklen_t namelen, 1941 int fflag, 1942 int flags) 1943 { 1944 struct T_conn_req conn_req; 1945 int error = 0; 1946 mblk_t *mp; 1947 void *src; 1948 socklen_t srclen; 1949 void *addr; 1950 socklen_t addrlen; 1951 boolean_t need_unlock; 1952 1953 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1954 so, name, namelen, fflag, flags, 1955 pr_state(so->so_state, so->so_mode))); 1956 1957 /* 1958 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1959 * avoid sleeping for memory with SOLOCKED held. 1960 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1961 * + sizeof (struct T_opthdr). 1962 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1963 * exceed so_faddr_maxlen). 1964 */ 1965 mp = soallocproto(sizeof (struct T_conn_req) + 1966 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1967 if (mp == NULL) { 1968 /* 1969 * Connect can not fail with ENOBUFS. A signal was 1970 * caught so return EINTR. 1971 */ 1972 error = EINTR; 1973 eprintsoline(so, error); 1974 return (error); 1975 } 1976 1977 mutex_enter(&so->so_lock); 1978 /* 1979 * Make sure there is a preallocated T_unbind_req message 1980 * before any binding. This message is allocated when the 1981 * socket is created. Since another thread can consume 1982 * so_unbind_mp by the time we return from so_lock_single(), 1983 * we should check the availability of so_unbind_mp after 1984 * we return from so_lock_single(). 1985 */ 1986 1987 so_lock_single(so); /* Set SOLOCKED */ 1988 need_unlock = B_TRUE; 1989 1990 if (so->so_unbind_mp == NULL) { 1991 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1992 /* NOTE: holding so_lock while sleeping */ 1993 so->so_unbind_mp = 1994 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 1995 if (so->so_unbind_mp == NULL) { 1996 error = EINTR; 1997 goto done; 1998 } 1999 } 2000 2001 /* 2002 * Can't have done a listen before connecting. 2003 */ 2004 if (so->so_state & SS_ACCEPTCONN) { 2005 error = EOPNOTSUPP; 2006 goto done; 2007 } 2008 2009 /* 2010 * Must be bound with the transport 2011 */ 2012 if (!(so->so_state & SS_ISBOUND)) { 2013 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2014 /*CONSTCOND*/ 2015 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2016 /* 2017 * Optimization for AF_INET{,6} transports 2018 * that can handle a T_CONN_REQ without being bound. 2019 */ 2020 so_automatic_bind(so); 2021 } else { 2022 error = sotpi_bind(so, NULL, 0, 2023 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 2024 if (error) 2025 goto done; 2026 } 2027 ASSERT(so->so_state & SS_ISBOUND); 2028 flags |= _SOCONNECT_DID_BIND; 2029 } 2030 2031 /* 2032 * Handle a connect to a name parameter of type AF_UNSPEC like a 2033 * connect to a null address. This is the portable method to 2034 * unconnect a socket. 2035 */ 2036 if ((namelen >= sizeof (sa_family_t)) && 2037 (name->sa_family == AF_UNSPEC)) { 2038 name = NULL; 2039 namelen = 0; 2040 } 2041 2042 /* 2043 * Check that we are not already connected. 2044 * A connection-oriented socket cannot be reconnected. 2045 * A connected connection-less socket can be 2046 * - connected to a different address by a subsequent connect 2047 * - "unconnected" by a connect to the NULL address 2048 */ 2049 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2050 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2051 if (so->so_mode & SM_CONNREQUIRED) { 2052 /* Connection-oriented socket */ 2053 error = so->so_state & SS_ISCONNECTED ? 2054 EISCONN : EALREADY; 2055 goto done; 2056 } 2057 /* Connection-less socket */ 2058 if (name == NULL) { 2059 /* 2060 * Remove the connected state and clear SO_DGRAM_ERRIND 2061 * since it was set when the socket was connected. 2062 * If this is UDP also send down a T_DISCON_REQ. 2063 */ 2064 int val; 2065 2066 if ((so->so_family == AF_INET || 2067 so->so_family == AF_INET6) && 2068 (so->so_type == SOCK_DGRAM || 2069 so->so_type == SOCK_RAW) && 2070 /*CONSTCOND*/ 2071 !soconnect_tpi_udp) { 2072 /* XXX What about implicitly unbinding here? */ 2073 error = sodisconnect(so, -1, 2074 _SODISCONNECT_LOCK_HELD); 2075 } else { 2076 so->so_state &= 2077 ~(SS_ISCONNECTED | SS_ISCONNECTING | 2078 SS_FADDR_VALID); 2079 so->so_faddr_len = 0; 2080 } 2081 2082 so_unlock_single(so, SOLOCKED); 2083 mutex_exit(&so->so_lock); 2084 2085 val = 0; 2086 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2087 &val, (t_uscalar_t)sizeof (val)); 2088 2089 mutex_enter(&so->so_lock); 2090 so_lock_single(so); /* Set SOLOCKED */ 2091 goto done; 2092 } 2093 } 2094 ASSERT(so->so_state & SS_ISBOUND); 2095 2096 if (name == NULL || namelen == 0) { 2097 error = EINVAL; 2098 goto done; 2099 } 2100 /* 2101 * Mark the socket if so_faddr_sa represents the transport level 2102 * address. 2103 */ 2104 if (flags & _SOCONNECT_NOXLATE) { 2105 struct sockaddr_ux *soaddr_ux; 2106 2107 ASSERT(so->so_family == AF_UNIX); 2108 if (namelen != sizeof (struct sockaddr_ux)) { 2109 error = EINVAL; 2110 goto done; 2111 } 2112 soaddr_ux = (struct sockaddr_ux *)name; 2113 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2114 namelen = sizeof (soaddr_ux->sou_addr); 2115 so->so_state |= SS_FADDR_NOXLATE; 2116 } 2117 2118 /* 2119 * Length and family checks. 2120 */ 2121 error = so_addr_verify(so, name, namelen); 2122 if (error) 2123 goto bad; 2124 2125 /* 2126 * Save foreign address. Needed for AF_UNIX as well as 2127 * transport providers that do not support TI_GETPEERNAME. 2128 * Also used for cached foreign address for TCP and UDP. 2129 */ 2130 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2131 error = EINVAL; 2132 goto done; 2133 } 2134 so->so_faddr_len = (socklen_t)namelen; 2135 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2136 bcopy(name, so->so_faddr_sa, namelen); 2137 so->so_state |= SS_FADDR_VALID; 2138 2139 if (so->so_family == AF_UNIX) { 2140 if (so->so_state & SS_FADDR_NOXLATE) { 2141 /* 2142 * Already have a transport internal address. Do not 2143 * pass any (transport internal) source address. 2144 */ 2145 addr = so->so_faddr_sa; 2146 addrlen = (t_uscalar_t)so->so_faddr_len; 2147 src = NULL; 2148 srclen = 0; 2149 } else { 2150 /* 2151 * Pass the sockaddr_un source address as an option 2152 * and translate the remote address. 2153 * Holding so_lock thus so_laddr_sa can not change. 2154 */ 2155 src = so->so_laddr_sa; 2156 srclen = (t_uscalar_t)so->so_laddr_len; 2157 dprintso(so, 1, 2158 ("sotpi_connect UNIX: srclen %d, src %p\n", 2159 srclen, src)); 2160 error = so_ux_addr_xlate(so, 2161 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2162 (flags & _SOCONNECT_XPG4_2), 2163 &addr, &addrlen); 2164 if (error) 2165 goto bad; 2166 } 2167 } else { 2168 addr = so->so_faddr_sa; 2169 addrlen = (t_uscalar_t)so->so_faddr_len; 2170 src = NULL; 2171 srclen = 0; 2172 } 2173 /* 2174 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2175 * option which asks the transport provider to send T_UDERR_IND 2176 * messages. These T_UDERR_IND messages are used to return connected 2177 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2178 * 2179 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2180 * we send down a T_CONN_REQ. This is needed to let the 2181 * transport assign a local address that is consistent with 2182 * the remote address. Applications depend on a getsockname() 2183 * after a connect() to retrieve the "source" IP address for 2184 * the connected socket. Invalidate the cached local address 2185 * to force getsockname() to enquire of the transport. 2186 */ 2187 if (!(so->so_mode & SM_CONNREQUIRED)) { 2188 /* 2189 * Datagram socket. 2190 */ 2191 int32_t val; 2192 2193 so_unlock_single(so, SOLOCKED); 2194 mutex_exit(&so->so_lock); 2195 2196 val = 1; 2197 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2198 &val, (t_uscalar_t)sizeof (val)); 2199 2200 mutex_enter(&so->so_lock); 2201 so_lock_single(so); /* Set SOLOCKED */ 2202 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2203 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2204 soconnect_tpi_udp) { 2205 soisconnected(so); 2206 goto done; 2207 } 2208 /* 2209 * Send down T_CONN_REQ etc. 2210 * Clear fflag to avoid returning EWOULDBLOCK. 2211 */ 2212 fflag = 0; 2213 ASSERT(so->so_family != AF_UNIX); 2214 so->so_state &= ~SS_LADDR_VALID; 2215 } else if (so->so_laddr_len != 0) { 2216 /* 2217 * If the local address or port was "any" then it may be 2218 * changed by the transport as a result of the 2219 * connect. Invalidate the cached version if we have one. 2220 */ 2221 switch (so->so_family) { 2222 case AF_INET: 2223 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2224 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2225 INADDR_ANY || 2226 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2227 so->so_state &= ~SS_LADDR_VALID; 2228 break; 2229 2230 case AF_INET6: 2231 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2232 if (IN6_IS_ADDR_UNSPECIFIED( 2233 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2234 IN6_IS_ADDR_V4MAPPED_ANY( 2235 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2236 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2237 so->so_state &= ~SS_LADDR_VALID; 2238 break; 2239 2240 default: 2241 break; 2242 } 2243 } 2244 2245 /* 2246 * Check for failure of an earlier call 2247 */ 2248 if (so->so_error != 0) 2249 goto so_bad; 2250 2251 /* 2252 * Send down T_CONN_REQ. Message was allocated above. 2253 */ 2254 conn_req.PRIM_type = T_CONN_REQ; 2255 conn_req.DEST_length = addrlen; 2256 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2257 if (srclen == 0) { 2258 conn_req.OPT_length = 0; 2259 conn_req.OPT_offset = 0; 2260 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2261 soappendmsg(mp, addr, addrlen); 2262 } else { 2263 /* 2264 * There is a AF_UNIX sockaddr_un to include as a source 2265 * address option. 2266 */ 2267 struct T_opthdr toh; 2268 2269 toh.level = SOL_SOCKET; 2270 toh.name = SO_SRCADDR; 2271 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2272 toh.status = 0; 2273 conn_req.OPT_length = 2274 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2275 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2276 _TPI_ALIGN_TOPT(addrlen)); 2277 2278 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2279 soappendmsg(mp, addr, addrlen); 2280 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2281 soappendmsg(mp, &toh, sizeof (toh)); 2282 soappendmsg(mp, src, srclen); 2283 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2284 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2285 } 2286 /* 2287 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2288 * in order to have the right state when the T_CONN_CON shows up. 2289 */ 2290 soisconnecting(so); 2291 mutex_exit(&so->so_lock); 2292 2293 if (audit_active) 2294 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2295 2296 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2297 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2298 mp = NULL; 2299 mutex_enter(&so->so_lock); 2300 if (error != 0) 2301 goto bad; 2302 2303 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2304 goto bad; 2305 2306 /* Allow other threads to access the socket */ 2307 so_unlock_single(so, SOLOCKED); 2308 need_unlock = B_FALSE; 2309 2310 /* 2311 * Wait until we get a T_CONN_CON or an error 2312 */ 2313 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2314 so_lock_single(so); /* Set SOLOCKED */ 2315 need_unlock = B_TRUE; 2316 } 2317 2318 done: 2319 freemsg(mp); 2320 switch (error) { 2321 case EINPROGRESS: 2322 case EALREADY: 2323 case EISCONN: 2324 case EINTR: 2325 /* Non-fatal errors */ 2326 so->so_state &= ~SS_LADDR_VALID; 2327 /* FALLTHRU */ 2328 case 0: 2329 break; 2330 2331 case EHOSTUNREACH: 2332 if (flags & _SOCONNECT_XPG4_2) { 2333 /* 2334 * X/Open specification contains a requirement that 2335 * ENETUNREACH be returned but does not require 2336 * EHOSTUNREACH. In order to keep the test suite 2337 * happy we mess with the errno here. 2338 */ 2339 error = ENETUNREACH; 2340 } 2341 /* FALLTHRU */ 2342 2343 default: 2344 ASSERT(need_unlock); 2345 /* 2346 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2347 * and invalidate local-address cache 2348 */ 2349 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2350 /* A discon_ind might have already unbound us */ 2351 if ((flags & _SOCONNECT_DID_BIND) && 2352 (so->so_state & SS_ISBOUND)) { 2353 int err; 2354 2355 err = sotpi_unbind(so, 0); 2356 /* LINTED - statement has no conseq */ 2357 if (err) { 2358 eprintsoline(so, err); 2359 } 2360 } 2361 break; 2362 } 2363 if (need_unlock) 2364 so_unlock_single(so, SOLOCKED); 2365 mutex_exit(&so->so_lock); 2366 return (error); 2367 2368 so_bad: error = sogeterr(so); 2369 bad: eprintsoline(so, error); 2370 goto done; 2371 } 2372 2373 int 2374 sotpi_shutdown(struct sonode *so, int how) 2375 { 2376 struct T_ordrel_req ordrel_req; 2377 mblk_t *mp; 2378 uint_t old_state, state_change; 2379 int error = 0; 2380 2381 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2382 so, how, pr_state(so->so_state, so->so_mode))); 2383 2384 mutex_enter(&so->so_lock); 2385 so_lock_single(so); /* Set SOLOCKED */ 2386 2387 /* 2388 * SunOS 4.X has no check for datagram sockets. 2389 * 5.X checks that it is connected (ENOTCONN) 2390 * X/Open requires that we check the connected state. 2391 */ 2392 if (!(so->so_state & SS_ISCONNECTED)) { 2393 if (!xnet_skip_checks) { 2394 error = ENOTCONN; 2395 if (xnet_check_print) { 2396 printf("sockfs: X/Open shutdown check " 2397 "caused ENOTCONN\n"); 2398 } 2399 } 2400 goto done; 2401 } 2402 /* 2403 * Record the current state and then perform any state changes. 2404 * Then use the difference between the old and new states to 2405 * determine which messages need to be sent. 2406 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2407 * duplicate calls to shutdown(). 2408 */ 2409 old_state = so->so_state; 2410 2411 switch (how) { 2412 case 0: 2413 socantrcvmore(so); 2414 break; 2415 case 1: 2416 socantsendmore(so); 2417 break; 2418 case 2: 2419 socantsendmore(so); 2420 socantrcvmore(so); 2421 break; 2422 default: 2423 error = EINVAL; 2424 goto done; 2425 } 2426 2427 /* 2428 * Assumes that the SS_CANT* flags are never cleared in the above code. 2429 */ 2430 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2431 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2432 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2433 2434 switch (state_change) { 2435 case 0: 2436 dprintso(so, 1, 2437 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2438 so->so_state)); 2439 goto done; 2440 2441 case SS_CANTRCVMORE: 2442 mutex_exit(&so->so_lock); 2443 strseteof(SOTOV(so), 1); 2444 /* 2445 * strseteof takes care of read side wakeups, 2446 * pollwakeups, and signals. 2447 */ 2448 /* 2449 * Get the read lock before flushing data to avoid problems 2450 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2451 */ 2452 mutex_enter(&so->so_lock); 2453 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2454 mutex_exit(&so->so_lock); 2455 2456 /* Flush read side queue */ 2457 strflushrq(SOTOV(so), FLUSHALL); 2458 2459 mutex_enter(&so->so_lock); 2460 so_unlock_read(so); /* Clear SOREADLOCKED */ 2461 break; 2462 2463 case SS_CANTSENDMORE: 2464 mutex_exit(&so->so_lock); 2465 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2466 mutex_enter(&so->so_lock); 2467 break; 2468 2469 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2470 mutex_exit(&so->so_lock); 2471 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2472 strseteof(SOTOV(so), 1); 2473 /* 2474 * strseteof takes care of read side wakeups, 2475 * pollwakeups, and signals. 2476 */ 2477 /* 2478 * Get the read lock before flushing data to avoid problems 2479 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2480 */ 2481 mutex_enter(&so->so_lock); 2482 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2483 mutex_exit(&so->so_lock); 2484 2485 /* Flush read side queue */ 2486 strflushrq(SOTOV(so), FLUSHALL); 2487 2488 mutex_enter(&so->so_lock); 2489 so_unlock_read(so); /* Clear SOREADLOCKED */ 2490 break; 2491 } 2492 2493 ASSERT(MUTEX_HELD(&so->so_lock)); 2494 2495 /* 2496 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2497 * was set due to this call and the new state has both of them set: 2498 * Send the AF_UNIX close indication 2499 * For T_COTS send a discon_ind 2500 * 2501 * If cantsend was set due to this call: 2502 * For T_COTSORD send an ordrel_ind 2503 * 2504 * Note that for T_CLTS there is no message sent here. 2505 */ 2506 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2507 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2508 /* 2509 * For SunOS 4.X compatibility we tell the other end 2510 * that we are unable to receive at this point. 2511 */ 2512 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2513 so_unix_close(so); 2514 2515 if (so->so_serv_type == T_COTS) 2516 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2517 } 2518 if ((state_change & SS_CANTSENDMORE) && 2519 (so->so_serv_type == T_COTS_ORD)) { 2520 /* Send an orderly release */ 2521 ordrel_req.PRIM_type = T_ORDREL_REQ; 2522 2523 mutex_exit(&so->so_lock); 2524 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2525 0, _ALLOC_SLEEP); 2526 /* 2527 * Send down the T_ORDREL_REQ even if there is flow control. 2528 * This prevents shutdown from blocking. 2529 * Note that there is no T_OK_ACK for ordrel_req. 2530 */ 2531 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2532 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2533 mutex_enter(&so->so_lock); 2534 if (error) { 2535 eprintsoline(so, error); 2536 goto done; 2537 } 2538 } 2539 2540 done: 2541 so_unlock_single(so, SOLOCKED); 2542 mutex_exit(&so->so_lock); 2543 return (error); 2544 } 2545 2546 /* 2547 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2548 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2549 * that we have closed. 2550 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2551 * T_UNITDATA_REQ containing the same option. 2552 * 2553 * For SOCK_DGRAM half-connections (somebody connected to this end 2554 * but this end is not connect) we don't know where to send any 2555 * SO_UNIX_CLOSE. 2556 * 2557 * We have to ignore stream head errors just in case there has been 2558 * a shutdown(output). 2559 * Ignore any flow control to try to get the message more quickly to the peer. 2560 * While locally ignoring flow control solves the problem when there 2561 * is only the loopback transport on the stream it would not provide 2562 * the correct AF_UNIX socket semantics when one or more modules have 2563 * been pushed. 2564 */ 2565 void 2566 so_unix_close(struct sonode *so) 2567 { 2568 int error; 2569 struct T_opthdr toh; 2570 mblk_t *mp; 2571 2572 ASSERT(MUTEX_HELD(&so->so_lock)); 2573 2574 ASSERT(so->so_family == AF_UNIX); 2575 2576 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2577 (SS_ISCONNECTED|SS_ISBOUND)) 2578 return; 2579 2580 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2581 so, pr_state(so->so_state, so->so_mode))); 2582 2583 toh.level = SOL_SOCKET; 2584 toh.name = SO_UNIX_CLOSE; 2585 2586 /* zero length + header */ 2587 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2588 toh.status = 0; 2589 2590 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2591 struct T_optdata_req tdr; 2592 2593 tdr.PRIM_type = T_OPTDATA_REQ; 2594 tdr.DATA_flag = 0; 2595 2596 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2597 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2598 2599 /* NOTE: holding so_lock while sleeping */ 2600 mp = soallocproto2(&tdr, sizeof (tdr), 2601 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2602 } else { 2603 struct T_unitdata_req tudr; 2604 void *addr; 2605 socklen_t addrlen; 2606 void *src; 2607 socklen_t srclen; 2608 struct T_opthdr toh2; 2609 t_scalar_t size; 2610 2611 /* Connecteded DGRAM socket */ 2612 2613 /* 2614 * For AF_UNIX the destination address is translated to 2615 * an internal name and the source address is passed as 2616 * an option. 2617 */ 2618 /* 2619 * Length and family checks. 2620 */ 2621 error = so_addr_verify(so, so->so_faddr_sa, 2622 (t_uscalar_t)so->so_faddr_len); 2623 if (error) { 2624 eprintsoline(so, error); 2625 return; 2626 } 2627 if (so->so_state & SS_FADDR_NOXLATE) { 2628 /* 2629 * Already have a transport internal address. Do not 2630 * pass any (transport internal) source address. 2631 */ 2632 addr = so->so_faddr_sa; 2633 addrlen = (t_uscalar_t)so->so_faddr_len; 2634 src = NULL; 2635 srclen = 0; 2636 } else { 2637 /* 2638 * Pass the sockaddr_un source address as an option 2639 * and translate the remote address. 2640 * Holding so_lock thus so_laddr_sa can not change. 2641 */ 2642 src = so->so_laddr_sa; 2643 srclen = (socklen_t)so->so_laddr_len; 2644 dprintso(so, 1, 2645 ("so_ux_close: srclen %d, src %p\n", 2646 srclen, src)); 2647 error = so_ux_addr_xlate(so, 2648 so->so_faddr_sa, 2649 (socklen_t)so->so_faddr_len, 0, 2650 &addr, &addrlen); 2651 if (error) { 2652 eprintsoline(so, error); 2653 return; 2654 } 2655 } 2656 tudr.PRIM_type = T_UNITDATA_REQ; 2657 tudr.DEST_length = addrlen; 2658 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2659 if (srclen == 0) { 2660 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2661 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2662 _TPI_ALIGN_TOPT(addrlen)); 2663 2664 size = tudr.OPT_offset + tudr.OPT_length; 2665 /* NOTE: holding so_lock while sleeping */ 2666 mp = soallocproto2(&tudr, sizeof (tudr), 2667 addr, addrlen, size, _ALLOC_SLEEP); 2668 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2669 soappendmsg(mp, &toh, sizeof (toh)); 2670 } else { 2671 /* 2672 * There is a AF_UNIX sockaddr_un to include as a 2673 * source address option. 2674 */ 2675 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2676 _TPI_ALIGN_TOPT(srclen)); 2677 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2678 _TPI_ALIGN_TOPT(addrlen)); 2679 2680 toh2.level = SOL_SOCKET; 2681 toh2.name = SO_SRCADDR; 2682 toh2.len = (t_uscalar_t)(srclen + 2683 sizeof (struct T_opthdr)); 2684 toh2.status = 0; 2685 2686 size = tudr.OPT_offset + tudr.OPT_length; 2687 2688 /* NOTE: holding so_lock while sleeping */ 2689 mp = soallocproto2(&tudr, sizeof (tudr), 2690 addr, addrlen, size, _ALLOC_SLEEP); 2691 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2692 soappendmsg(mp, &toh, sizeof (toh)); 2693 soappendmsg(mp, &toh2, sizeof (toh2)); 2694 soappendmsg(mp, src, srclen); 2695 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2696 } 2697 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2698 } 2699 mutex_exit(&so->so_lock); 2700 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2701 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2702 mutex_enter(&so->so_lock); 2703 } 2704 2705 /* 2706 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2707 */ 2708 int 2709 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2710 { 2711 mblk_t *mp, *nmp; 2712 int error; 2713 2714 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", so, msg, flags)); 2715 2716 /* 2717 * There is never any oob data with addresses or control since 2718 * the T_EXDATA_IND does not carry any options. 2719 */ 2720 msg->msg_controllen = 0; 2721 msg->msg_namelen = 0; 2722 2723 mutex_enter(&so->so_lock); 2724 ASSERT(so_verify_oobstate(so)); 2725 if ((so->so_options & SO_OOBINLINE) || 2726 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2727 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2728 mutex_exit(&so->so_lock); 2729 return (EINVAL); 2730 } 2731 if (!(so->so_state & SS_HAVEOOBDATA)) { 2732 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2733 mutex_exit(&so->so_lock); 2734 return (EWOULDBLOCK); 2735 } 2736 ASSERT(so->so_oobmsg != NULL); 2737 mp = so->so_oobmsg; 2738 if (flags & MSG_PEEK) { 2739 /* 2740 * Since recv* can not return ENOBUFS we can not use dupmsg. 2741 * Instead we revert to the consolidation private 2742 * allocb_wait plus bcopy. 2743 */ 2744 mblk_t *mp1; 2745 2746 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2747 ASSERT(mp1); 2748 2749 while (mp != NULL) { 2750 ssize_t size; 2751 2752 size = MBLKL(mp); 2753 bcopy(mp->b_rptr, mp1->b_wptr, size); 2754 mp1->b_wptr += size; 2755 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2756 mp = mp->b_cont; 2757 } 2758 mp = mp1; 2759 } else { 2760 /* 2761 * Update the state indicating that the data has been consumed. 2762 * Keep SS_OOBPEND set until data is consumed past the mark. 2763 */ 2764 so->so_oobmsg = NULL; 2765 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2766 } 2767 dprintso(so, 1, 2768 ("after recvoob(%p): counts %d/%d state %s\n", 2769 so, so->so_oobsigcnt, 2770 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2771 ASSERT(so_verify_oobstate(so)); 2772 mutex_exit(&so->so_lock); 2773 2774 error = 0; 2775 nmp = mp; 2776 while (nmp != NULL && uiop->uio_resid > 0) { 2777 ssize_t n = MBLKL(nmp); 2778 2779 n = MIN(n, uiop->uio_resid); 2780 if (n > 0) 2781 error = uiomove(nmp->b_rptr, n, 2782 UIO_READ, uiop); 2783 if (error) 2784 break; 2785 nmp = nmp->b_cont; 2786 } 2787 freemsg(mp); 2788 return (error); 2789 } 2790 2791 /* 2792 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2793 * In addition, the caller typically verifies that there is some 2794 * potential state to clear by checking 2795 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2796 * before calling this routine. 2797 * Note that such a check can be made without holding so_lock since 2798 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2799 * decrements so_oobsigcnt. 2800 * 2801 * When data is read *after* the point that all pending 2802 * oob data has been consumed the oob indication is cleared. 2803 * 2804 * This logic keeps select/poll returning POLLRDBAND and 2805 * SIOCATMARK returning true until we have read past 2806 * the mark. 2807 */ 2808 static void 2809 sorecv_update_oobstate(struct sonode *so) 2810 { 2811 mutex_enter(&so->so_lock); 2812 ASSERT(so_verify_oobstate(so)); 2813 dprintso(so, 1, 2814 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2815 so->so_oobsigcnt, 2816 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2817 if (so->so_oobsigcnt == 0) { 2818 /* No more pending oob indications */ 2819 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2820 freemsg(so->so_oobmsg); 2821 so->so_oobmsg = NULL; 2822 } 2823 ASSERT(so_verify_oobstate(so)); 2824 mutex_exit(&so->so_lock); 2825 } 2826 2827 /* 2828 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2829 */ 2830 static int 2831 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2832 { 2833 int error = 0; 2834 mblk_t *tmp = NULL; 2835 mblk_t *pmp = NULL; 2836 mblk_t *nmp = so->so_nl7c_rcv_mp; 2837 2838 ASSERT(nmp != NULL); 2839 2840 while (nmp != NULL && uiop->uio_resid > 0) { 2841 ssize_t n; 2842 2843 if (DB_TYPE(nmp) == M_DATA) { 2844 /* 2845 * We have some data, uiomove up to resid bytes. 2846 */ 2847 n = MIN(MBLKL(nmp), uiop->uio_resid); 2848 if (n > 0) 2849 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2850 nmp->b_rptr += n; 2851 if (nmp->b_rptr == nmp->b_wptr) { 2852 pmp = nmp; 2853 nmp = nmp->b_cont; 2854 } 2855 if (error) 2856 break; 2857 } else { 2858 /* 2859 * We only handle data, save for caller to handle. 2860 */ 2861 if (pmp != NULL) { 2862 pmp->b_cont = nmp->b_cont; 2863 } 2864 nmp->b_cont = NULL; 2865 if (*rmp == NULL) { 2866 *rmp = nmp; 2867 } else { 2868 tmp->b_cont = nmp; 2869 } 2870 nmp = nmp->b_cont; 2871 tmp = nmp; 2872 } 2873 } 2874 if (pmp != NULL) { 2875 /* Free any mblk_t(s) which we have consumed */ 2876 pmp->b_cont = NULL; 2877 freemsg(so->so_nl7c_rcv_mp); 2878 } 2879 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2880 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 2881 if (error == 0) { 2882 rval_t *p = (rval_t *)&so->so_nl7c_rcv_rval; 2883 2884 error = p->r_v.r_v2; 2885 p->r_v.r_v2 = 0; 2886 } 2887 rp->r_vals = so->so_nl7c_rcv_rval; 2888 so->so_nl7c_rcv_rval = 0; 2889 } else { 2890 /* More mblk_t(s) to process so no rval to return */ 2891 rp->r_vals = 0; 2892 } 2893 return (error); 2894 } 2895 2896 /* 2897 * Receive the next message on the queue. 2898 * If msg_controllen is non-zero when called the caller is interested in 2899 * any received control info (options). 2900 * If msg_namelen is non-zero when called the caller is interested in 2901 * any received source address. 2902 * The routine returns with msg_control and msg_name pointing to 2903 * kmem_alloc'ed memory which the caller has to free. 2904 */ 2905 int 2906 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2907 { 2908 union T_primitives *tpr; 2909 mblk_t *mp; 2910 uchar_t pri; 2911 int pflag, opflag; 2912 void *control; 2913 t_uscalar_t controllen; 2914 t_uscalar_t namelen; 2915 int so_state = so->so_state; /* Snapshot */ 2916 ssize_t saved_resid; 2917 rval_t rval; 2918 int flags; 2919 clock_t timout; 2920 int first; 2921 int error = 0; 2922 struct uio *suiop = NULL; 2923 sodirect_t *sodp = so->so_direct; 2924 2925 flags = msg->msg_flags; 2926 msg->msg_flags = 0; 2927 2928 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2929 so, msg, flags, 2930 pr_state(so->so_state, so->so_mode), so->so_error)); 2931 2932 /* 2933 * If we are not connected because we have never been connected 2934 * we return ENOTCONN. If we have been connected (but are no longer 2935 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2936 * the EOF. 2937 * 2938 * An alternative would be to post an ENOTCONN error in stream head 2939 * (read+write) and clear it when we're connected. However, that error 2940 * would cause incorrect poll/select behavior! 2941 */ 2942 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2943 (so->so_mode & SM_CONNREQUIRED)) { 2944 return (ENOTCONN); 2945 } 2946 2947 /* 2948 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2949 * after checking that the read queue is empty) and returns zero. 2950 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2951 * is zero. 2952 */ 2953 2954 if (flags & MSG_OOB) { 2955 /* Check that the transport supports OOB */ 2956 if (!(so->so_mode & SM_EXDATA)) 2957 return (EOPNOTSUPP); 2958 return (sorecvoob(so, msg, uiop, flags)); 2959 } 2960 2961 /* 2962 * Set msg_controllen and msg_namelen to zero here to make it 2963 * simpler in the cases that no control or name is returned. 2964 */ 2965 controllen = msg->msg_controllen; 2966 namelen = msg->msg_namelen; 2967 msg->msg_controllen = 0; 2968 msg->msg_namelen = 0; 2969 2970 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2971 namelen, controllen)); 2972 2973 mutex_enter(&so->so_lock); 2974 /* 2975 * If an NL7C enabled socket and not waiting for write data. 2976 */ 2977 if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 2978 NL7C_ENABLED) { 2979 if (so->so_nl7c_uri) { 2980 /* Close uri processing for a previous request */ 2981 nl7c_close(so); 2982 } 2983 if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) { 2984 /* Nothing to process, EOF */ 2985 mutex_exit(&so->so_lock); 2986 return (0); 2987 } else if (so->so_nl7c_flags & NL7C_SOPERSIST) { 2988 /* Persistent NL7C socket, try to process request */ 2989 boolean_t ret; 2990 2991 ret = nl7c_process(so, 2992 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 2993 rval.r_vals = so->so_nl7c_rcv_rval; 2994 error = rval.r_v.r_v2; 2995 if (error) { 2996 /* Error of some sort, return it */ 2997 mutex_exit(&so->so_lock); 2998 return (error); 2999 } 3000 if (so->so_nl7c_flags && 3001 ! (so->so_nl7c_flags & NL7C_WAITWRITE)) { 3002 /* 3003 * Still an NL7C socket and no data 3004 * to pass up to the caller. 3005 */ 3006 mutex_exit(&so->so_lock); 3007 if (ret) { 3008 /* EOF */ 3009 return (0); 3010 } else { 3011 /* Need more data */ 3012 return (EAGAIN); 3013 } 3014 } 3015 } else { 3016 /* 3017 * Not persistent so no further NL7C processing. 3018 */ 3019 so->so_nl7c_flags = 0; 3020 } 3021 } 3022 /* 3023 * Only one reader is allowed at any given time. This is needed 3024 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3025 * 3026 * This is slightly different that BSD behavior in that it fails with 3027 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3028 * is single-threaded using sblock(), which is dropped while waiting 3029 * for data to appear. The difference shows up e.g. if one 3030 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3031 * does use nonblocking io and different threads are reading each 3032 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3033 * in this case as long as the read queue doesn't get empty. 3034 * In this implementation the thread using nonblocking io can 3035 * get an EWOULDBLOCK error due to the blocking thread executing 3036 * e.g. in the uiomove in kstrgetmsg. 3037 * This difference is not believed to be significant. 3038 */ 3039 /* Set SOREADLOCKED */ 3040 error = so_lock_read_intr(so, 3041 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3042 mutex_exit(&so->so_lock); 3043 if (error) 3044 return (error); 3045 3046 /* 3047 * Tell kstrgetmsg to not inspect the stream head errors until all 3048 * queued data has been consumed. 3049 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3050 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3051 * 3052 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3053 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3054 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3055 */ 3056 pflag = MSG_ANY | MSG_DELAYERROR; 3057 if (flags & MSG_PEEK) { 3058 pflag |= MSG_IPEEK; 3059 flags &= ~MSG_WAITALL; 3060 } 3061 if (so->so_mode & SM_ATOMIC) 3062 pflag |= MSG_DISCARDTAIL; 3063 3064 if (flags & MSG_DONTWAIT) 3065 timout = 0; 3066 else 3067 timout = -1; 3068 opflag = pflag; 3069 first = 1; 3070 3071 if (uiop->uio_resid >= uioasync.mincnt && 3072 sodp != NULL && (sodp->sod_state & SOD_ENABLED) && 3073 uioasync.enabled && !(flags & MSG_PEEK) && 3074 !(so_state & SS_CANTRCVMORE)) { 3075 /* 3076 * Big enough I/O for uioa min setup and an sodirect socket 3077 * and sodirect enabled and uioa enabled and I/O will be done 3078 * and not EOF so initialize the sodirect_t uioa_t with "uiop". 3079 */ 3080 mutex_enter(sodp->sod_lock); 3081 if (!uioainit(uiop, &sodp->sod_uioa)) { 3082 /* 3083 * Successful uioainit() so the uio_t part of the 3084 * uioa_t will be used for all uio_t work to follow, 3085 * we save the original "uiop" in "suiop". 3086 */ 3087 suiop = uiop; 3088 uiop = (uio_t *)&sodp->sod_uioa; 3089 /* 3090 * Before returning to the caller the passed in uio_t 3091 * "uiop" will be updated via a call to uioafini() 3092 * below. 3093 * 3094 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED 3095 * here as first we have to uioamove() any currently 3096 * queued M_DATA mblk_t(s) so it will be done in 3097 * kstrgetmsg(). 3098 */ 3099 } 3100 /* 3101 * In either uioainit() success or not case note the number 3102 * of uio bytes the caller wants for sod framework and/or 3103 * transport (e.g. TCP) strategy. 3104 */ 3105 sodp->sod_want = uiop->uio_resid; 3106 mutex_exit(sodp->sod_lock); 3107 } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { 3108 /* 3109 * No uioa but still using sodirect so note the number of 3110 * uio bytes the caller wants for sodirect framework and/or 3111 * transport (e.g. TCP) strategy. 3112 * 3113 * Note, sod_lock not held, only writer is in this function 3114 * and only one thread at a time so not needed just to init. 3115 */ 3116 sodp->sod_want = uiop->uio_resid; 3117 } 3118 retry: 3119 saved_resid = uiop->uio_resid; 3120 pri = 0; 3121 mp = NULL; 3122 if (so->so_nl7c_rcv_mp != NULL) { 3123 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3124 error = nl7c_sorecv(so, &mp, uiop, &rval); 3125 } else { 3126 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3127 timout, &rval); 3128 } 3129 if (error) { 3130 switch (error) { 3131 case EINTR: 3132 case EWOULDBLOCK: 3133 if (!first) 3134 error = 0; 3135 break; 3136 case ETIME: 3137 /* Returned from kstrgetmsg when timeout expires */ 3138 if (!first) 3139 error = 0; 3140 else 3141 error = EWOULDBLOCK; 3142 break; 3143 default: 3144 eprintsoline(so, error); 3145 break; 3146 } 3147 goto out; 3148 } 3149 /* 3150 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3151 * For non-datagrams MOREDATA is used to set MSG_EOR. 3152 */ 3153 ASSERT(!(rval.r_val1 & MORECTL)); 3154 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3155 msg->msg_flags |= MSG_TRUNC; 3156 3157 if (mp == NULL) { 3158 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3159 /* 3160 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3161 * The draft Posix socket spec states that the mark should 3162 * not be cleared when peeking. We follow the latter. 3163 */ 3164 if ((so->so_state & 3165 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3166 (uiop->uio_resid != saved_resid) && 3167 !(flags & MSG_PEEK)) { 3168 sorecv_update_oobstate(so); 3169 } 3170 3171 mutex_enter(&so->so_lock); 3172 /* Set MSG_EOR based on MOREDATA */ 3173 if (!(rval.r_val1 & MOREDATA)) { 3174 if (so->so_state & SS_SAVEDEOR) { 3175 msg->msg_flags |= MSG_EOR; 3176 so->so_state &= ~SS_SAVEDEOR; 3177 } 3178 } 3179 /* 3180 * If some data was received (i.e. not EOF) and the 3181 * read/recv* has not been satisfied wait for some more. 3182 */ 3183 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3184 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3185 mutex_exit(&so->so_lock); 3186 first = 0; 3187 pflag = opflag | MSG_NOMARK; 3188 goto retry; 3189 } 3190 goto out_locked; 3191 } 3192 3193 /* strsock_proto has already verified length and alignment */ 3194 tpr = (union T_primitives *)mp->b_rptr; 3195 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3196 3197 switch (tpr->type) { 3198 case T_DATA_IND: { 3199 if ((so->so_state & 3200 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3201 (uiop->uio_resid != saved_resid) && 3202 !(flags & MSG_PEEK)) { 3203 sorecv_update_oobstate(so); 3204 } 3205 3206 /* 3207 * Set msg_flags to MSG_EOR based on 3208 * MORE_flag and MOREDATA. 3209 */ 3210 mutex_enter(&so->so_lock); 3211 so->so_state &= ~SS_SAVEDEOR; 3212 if (!(tpr->data_ind.MORE_flag & 1)) { 3213 if (!(rval.r_val1 & MOREDATA)) 3214 msg->msg_flags |= MSG_EOR; 3215 else 3216 so->so_state |= SS_SAVEDEOR; 3217 } 3218 freemsg(mp); 3219 /* 3220 * If some data was received (i.e. not EOF) and the 3221 * read/recv* has not been satisfied wait for some more. 3222 */ 3223 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3224 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3225 mutex_exit(&so->so_lock); 3226 first = 0; 3227 pflag = opflag | MSG_NOMARK; 3228 goto retry; 3229 } 3230 goto out_locked; 3231 } 3232 case T_UNITDATA_IND: { 3233 void *addr; 3234 t_uscalar_t addrlen; 3235 void *abuf; 3236 t_uscalar_t optlen; 3237 void *opt; 3238 3239 if ((so->so_state & 3240 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3241 (uiop->uio_resid != saved_resid) && 3242 !(flags & MSG_PEEK)) { 3243 sorecv_update_oobstate(so); 3244 } 3245 3246 if (namelen != 0) { 3247 /* Caller wants source address */ 3248 addrlen = tpr->unitdata_ind.SRC_length; 3249 addr = sogetoff(mp, 3250 tpr->unitdata_ind.SRC_offset, 3251 addrlen, 1); 3252 if (addr == NULL) { 3253 freemsg(mp); 3254 error = EPROTO; 3255 eprintsoline(so, error); 3256 goto out; 3257 } 3258 if (so->so_family == AF_UNIX) { 3259 /* 3260 * Can not use the transport level address. 3261 * If there is a SO_SRCADDR option carrying 3262 * the socket level address it will be 3263 * extracted below. 3264 */ 3265 addr = NULL; 3266 addrlen = 0; 3267 } 3268 } 3269 optlen = tpr->unitdata_ind.OPT_length; 3270 if (optlen != 0) { 3271 t_uscalar_t ncontrollen; 3272 3273 /* 3274 * Extract any source address option. 3275 * Determine how large cmsg buffer is needed. 3276 */ 3277 opt = sogetoff(mp, 3278 tpr->unitdata_ind.OPT_offset, 3279 optlen, __TPI_ALIGN_SIZE); 3280 3281 if (opt == NULL) { 3282 freemsg(mp); 3283 error = EPROTO; 3284 eprintsoline(so, error); 3285 goto out; 3286 } 3287 if (so->so_family == AF_UNIX) 3288 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3289 ncontrollen = so_cmsglen(mp, opt, optlen, 3290 !(flags & MSG_XPG4_2)); 3291 if (controllen != 0) 3292 controllen = ncontrollen; 3293 else if (ncontrollen != 0) 3294 msg->msg_flags |= MSG_CTRUNC; 3295 } else { 3296 controllen = 0; 3297 } 3298 3299 if (namelen != 0) { 3300 /* 3301 * Return address to caller. 3302 * Caller handles truncation if length 3303 * exceeds msg_namelen. 3304 * NOTE: AF_UNIX NUL termination is ensured by 3305 * the sender's copyin_name(). 3306 */ 3307 abuf = kmem_alloc(addrlen, KM_SLEEP); 3308 3309 bcopy(addr, abuf, addrlen); 3310 msg->msg_name = abuf; 3311 msg->msg_namelen = addrlen; 3312 } 3313 3314 if (controllen != 0) { 3315 /* 3316 * Return control msg to caller. 3317 * Caller handles truncation if length 3318 * exceeds msg_controllen. 3319 */ 3320 control = kmem_zalloc(controllen, KM_SLEEP); 3321 3322 error = so_opt2cmsg(mp, opt, optlen, 3323 !(flags & MSG_XPG4_2), 3324 control, controllen); 3325 if (error) { 3326 freemsg(mp); 3327 if (msg->msg_namelen != 0) 3328 kmem_free(msg->msg_name, 3329 msg->msg_namelen); 3330 kmem_free(control, controllen); 3331 eprintsoline(so, error); 3332 goto out; 3333 } 3334 msg->msg_control = control; 3335 msg->msg_controllen = controllen; 3336 } 3337 3338 freemsg(mp); 3339 goto out; 3340 } 3341 case T_OPTDATA_IND: { 3342 struct T_optdata_req *tdr; 3343 void *opt; 3344 t_uscalar_t optlen; 3345 3346 if ((so->so_state & 3347 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3348 (uiop->uio_resid != saved_resid) && 3349 !(flags & MSG_PEEK)) { 3350 sorecv_update_oobstate(so); 3351 } 3352 3353 tdr = (struct T_optdata_req *)mp->b_rptr; 3354 optlen = tdr->OPT_length; 3355 if (optlen != 0) { 3356 t_uscalar_t ncontrollen; 3357 /* 3358 * Determine how large cmsg buffer is needed. 3359 */ 3360 opt = sogetoff(mp, 3361 tpr->optdata_ind.OPT_offset, 3362 optlen, __TPI_ALIGN_SIZE); 3363 3364 if (opt == NULL) { 3365 freemsg(mp); 3366 error = EPROTO; 3367 eprintsoline(so, error); 3368 goto out; 3369 } 3370 3371 ncontrollen = so_cmsglen(mp, opt, optlen, 3372 !(flags & MSG_XPG4_2)); 3373 if (controllen != 0) 3374 controllen = ncontrollen; 3375 else if (ncontrollen != 0) 3376 msg->msg_flags |= MSG_CTRUNC; 3377 } else { 3378 controllen = 0; 3379 } 3380 3381 if (controllen != 0) { 3382 /* 3383 * Return control msg to caller. 3384 * Caller handles truncation if length 3385 * exceeds msg_controllen. 3386 */ 3387 control = kmem_zalloc(controllen, KM_SLEEP); 3388 3389 error = so_opt2cmsg(mp, opt, optlen, 3390 !(flags & MSG_XPG4_2), 3391 control, controllen); 3392 if (error) { 3393 freemsg(mp); 3394 kmem_free(control, controllen); 3395 eprintsoline(so, error); 3396 goto out; 3397 } 3398 msg->msg_control = control; 3399 msg->msg_controllen = controllen; 3400 } 3401 3402 /* 3403 * Set msg_flags to MSG_EOR based on 3404 * DATA_flag and MOREDATA. 3405 */ 3406 mutex_enter(&so->so_lock); 3407 so->so_state &= ~SS_SAVEDEOR; 3408 if (!(tpr->data_ind.MORE_flag & 1)) { 3409 if (!(rval.r_val1 & MOREDATA)) 3410 msg->msg_flags |= MSG_EOR; 3411 else 3412 so->so_state |= SS_SAVEDEOR; 3413 } 3414 freemsg(mp); 3415 /* 3416 * If some data was received (i.e. not EOF) and the 3417 * read/recv* has not been satisfied wait for some more. 3418 * Not possible to wait if control info was received. 3419 */ 3420 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3421 controllen == 0 && 3422 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3423 mutex_exit(&so->so_lock); 3424 first = 0; 3425 pflag = opflag | MSG_NOMARK; 3426 goto retry; 3427 } 3428 goto out_locked; 3429 } 3430 case T_EXDATA_IND: { 3431 dprintso(so, 1, 3432 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3433 "state %s\n", 3434 so->so_oobsigcnt, so->so_oobcnt, 3435 saved_resid - uiop->uio_resid, 3436 pr_state(so->so_state, so->so_mode))); 3437 /* 3438 * kstrgetmsg handles MSGMARK so there is nothing to 3439 * inspect in the T_EXDATA_IND. 3440 * strsock_proto makes the stream head queue the T_EXDATA_IND 3441 * as a separate message with no M_DATA component. Furthermore, 3442 * the stream head does not consolidate M_DATA messages onto 3443 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3444 * remains a message by itself. This is needed since MSGMARK 3445 * marks both the whole message as well as the last byte 3446 * of the message. 3447 */ 3448 freemsg(mp); 3449 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3450 if (flags & MSG_PEEK) { 3451 /* 3452 * Even though we are peeking we consume the 3453 * T_EXDATA_IND thereby moving the mark information 3454 * to SS_RCVATMARK. Then the oob code below will 3455 * retry the peeking kstrgetmsg. 3456 * Note that the stream head read queue is 3457 * never flushed without holding SOREADLOCKED 3458 * thus the T_EXDATA_IND can not disappear 3459 * underneath us. 3460 */ 3461 dprintso(so, 1, 3462 ("sotpi_recvmsg: consume EXDATA_IND " 3463 "counts %d/%d state %s\n", 3464 so->so_oobsigcnt, 3465 so->so_oobcnt, 3466 pr_state(so->so_state, so->so_mode))); 3467 3468 pflag = MSG_ANY | MSG_DELAYERROR; 3469 if (so->so_mode & SM_ATOMIC) 3470 pflag |= MSG_DISCARDTAIL; 3471 3472 pri = 0; 3473 mp = NULL; 3474 3475 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3476 &pri, &pflag, (clock_t)-1, &rval); 3477 ASSERT(uiop->uio_resid == saved_resid); 3478 3479 if (error) { 3480 #ifdef SOCK_DEBUG 3481 if (error != EWOULDBLOCK && error != EINTR) { 3482 eprintsoline(so, error); 3483 } 3484 #endif /* SOCK_DEBUG */ 3485 goto out; 3486 } 3487 ASSERT(mp); 3488 tpr = (union T_primitives *)mp->b_rptr; 3489 ASSERT(tpr->type == T_EXDATA_IND); 3490 freemsg(mp); 3491 } /* end "if (flags & MSG_PEEK)" */ 3492 3493 /* 3494 * Decrement the number of queued and pending oob. 3495 * 3496 * SS_RCVATMARK is cleared when we read past a mark. 3497 * SS_HAVEOOBDATA is cleared when we've read past the 3498 * last mark. 3499 * SS_OOBPEND is cleared if we've read past the last 3500 * mark and no (new) SIGURG has been posted. 3501 */ 3502 mutex_enter(&so->so_lock); 3503 ASSERT(so_verify_oobstate(so)); 3504 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3505 ASSERT(so->so_oobsigcnt > 0); 3506 so->so_oobsigcnt--; 3507 ASSERT(so->so_oobcnt > 0); 3508 so->so_oobcnt--; 3509 /* 3510 * Since the T_EXDATA_IND has been removed from the stream 3511 * head, but we have not read data past the mark, 3512 * sockfs needs to track that the socket is still at the mark. 3513 * 3514 * Since no data was received call kstrgetmsg again to wait 3515 * for data. 3516 */ 3517 so->so_state |= SS_RCVATMARK; 3518 mutex_exit(&so->so_lock); 3519 dprintso(so, 1, 3520 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3521 so->so_oobsigcnt, so->so_oobcnt, 3522 pr_state(so->so_state, so->so_mode))); 3523 pflag = opflag; 3524 goto retry; 3525 } 3526 default: 3527 ASSERT(0); 3528 freemsg(mp); 3529 error = EPROTO; 3530 eprintsoline(so, error); 3531 goto out; 3532 } 3533 /* NOTREACHED */ 3534 out: 3535 mutex_enter(&so->so_lock); 3536 out_locked: 3537 if (sodp != NULL) { 3538 /* Finish any sodirect and uioa processing */ 3539 mutex_enter(sodp->sod_lock); 3540 if (suiop != NULL) { 3541 /* Finish any uioa_t processing */ 3542 int ret; 3543 3544 ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 3545 ret = uioafini(suiop, (uioa_t *)uiop); 3546 if (error == 0 && ret != 0) { 3547 /* If no error yet, set it */ 3548 error = ret; 3549 } 3550 if ((mp = sodp->sod_uioafh) != NULL) { 3551 sodp->sod_uioafh = NULL; 3552 sodp->sod_uioaft = NULL; 3553 freemsg(mp); 3554 } 3555 } 3556 if (!(sodp->sod_state & SOD_WAKE_NOT)) { 3557 /* Awoke */ 3558 sodp->sod_state &= SOD_WAKE_CLR; 3559 sodp->sod_state |= SOD_WAKE_NOT; 3560 } 3561 /* Last, clear sod_want value */ 3562 sodp->sod_want = 0; 3563 mutex_exit(sodp->sod_lock); 3564 } 3565 so_unlock_read(so); /* Clear SOREADLOCKED */ 3566 mutex_exit(&so->so_lock); 3567 return (error); 3568 } 3569 3570 /* 3571 * Sending data with options on a datagram socket. 3572 * Assumes caller has verified that SS_ISBOUND etc. are set. 3573 */ 3574 static int 3575 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3576 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3577 { 3578 struct T_unitdata_req tudr; 3579 mblk_t *mp; 3580 int error; 3581 void *addr; 3582 socklen_t addrlen; 3583 void *src; 3584 socklen_t srclen; 3585 ssize_t len; 3586 int size; 3587 struct T_opthdr toh; 3588 struct fdbuf *fdbuf; 3589 t_uscalar_t optlen; 3590 void *fds; 3591 int fdlen; 3592 3593 ASSERT(name && namelen); 3594 ASSERT(control && controllen); 3595 3596 len = uiop->uio_resid; 3597 if (len > (ssize_t)so->so_tidu_size) { 3598 return (EMSGSIZE); 3599 } 3600 3601 /* 3602 * For AF_UNIX the destination address is translated to an internal 3603 * name and the source address is passed as an option. 3604 * Also, file descriptors are passed as file pointers in an 3605 * option. 3606 */ 3607 3608 /* 3609 * Length and family checks. 3610 */ 3611 error = so_addr_verify(so, name, namelen); 3612 if (error) { 3613 eprintsoline(so, error); 3614 return (error); 3615 } 3616 if (so->so_family == AF_UNIX) { 3617 if (so->so_state & SS_FADDR_NOXLATE) { 3618 /* 3619 * Already have a transport internal address. Do not 3620 * pass any (transport internal) source address. 3621 */ 3622 addr = name; 3623 addrlen = namelen; 3624 src = NULL; 3625 srclen = 0; 3626 } else { 3627 /* 3628 * Pass the sockaddr_un source address as an option 3629 * and translate the remote address. 3630 * 3631 * Note that this code does not prevent so_laddr_sa 3632 * from changing while it is being used. Thus 3633 * if an unbind+bind occurs concurrently with this 3634 * send the peer might see a partially new and a 3635 * partially old "from" address. 3636 */ 3637 src = so->so_laddr_sa; 3638 srclen = (t_uscalar_t)so->so_laddr_len; 3639 dprintso(so, 1, 3640 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3641 srclen, src)); 3642 error = so_ux_addr_xlate(so, name, namelen, 3643 (flags & MSG_XPG4_2), 3644 &addr, &addrlen); 3645 if (error) { 3646 eprintsoline(so, error); 3647 return (error); 3648 } 3649 } 3650 } else { 3651 addr = name; 3652 addrlen = namelen; 3653 src = NULL; 3654 srclen = 0; 3655 } 3656 optlen = so_optlen(control, controllen, 3657 !(flags & MSG_XPG4_2)); 3658 tudr.PRIM_type = T_UNITDATA_REQ; 3659 tudr.DEST_length = addrlen; 3660 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3661 if (srclen != 0) 3662 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3663 _TPI_ALIGN_TOPT(srclen)); 3664 else 3665 tudr.OPT_length = optlen; 3666 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3667 _TPI_ALIGN_TOPT(addrlen)); 3668 3669 size = tudr.OPT_offset + tudr.OPT_length; 3670 3671 /* 3672 * File descriptors only when SM_FDPASSING set. 3673 */ 3674 error = so_getfdopt(control, controllen, 3675 !(flags & MSG_XPG4_2), &fds, &fdlen); 3676 if (error) 3677 return (error); 3678 if (fdlen != -1) { 3679 if (!(so->so_mode & SM_FDPASSING)) 3680 return (EOPNOTSUPP); 3681 3682 error = fdbuf_create(fds, fdlen, &fdbuf); 3683 if (error) 3684 return (error); 3685 mp = fdbuf_allocmsg(size, fdbuf); 3686 } else { 3687 mp = soallocproto(size, _ALLOC_INTR); 3688 if (mp == NULL) { 3689 /* 3690 * Caught a signal waiting for memory. 3691 * Let send* return EINTR. 3692 */ 3693 return (EINTR); 3694 } 3695 } 3696 soappendmsg(mp, &tudr, sizeof (tudr)); 3697 soappendmsg(mp, addr, addrlen); 3698 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3699 3700 if (fdlen != -1) { 3701 ASSERT(fdbuf != NULL); 3702 toh.level = SOL_SOCKET; 3703 toh.name = SO_FILEP; 3704 toh.len = fdbuf->fd_size + 3705 (t_uscalar_t)sizeof (struct T_opthdr); 3706 toh.status = 0; 3707 soappendmsg(mp, &toh, sizeof (toh)); 3708 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3709 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3710 } 3711 if (srclen != 0) { 3712 /* 3713 * There is a AF_UNIX sockaddr_un to include as a source 3714 * address option. 3715 */ 3716 toh.level = SOL_SOCKET; 3717 toh.name = SO_SRCADDR; 3718 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3719 toh.status = 0; 3720 soappendmsg(mp, &toh, sizeof (toh)); 3721 soappendmsg(mp, src, srclen); 3722 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3723 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3724 } 3725 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3726 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3727 /* At most 3 bytes left in the message */ 3728 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3729 ASSERT(MBLKL(mp) <= (ssize_t)size); 3730 3731 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3732 if (audit_active) 3733 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3734 3735 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3736 #ifdef SOCK_DEBUG 3737 if (error) { 3738 eprintsoline(so, error); 3739 } 3740 #endif /* SOCK_DEBUG */ 3741 return (error); 3742 } 3743 3744 /* 3745 * Sending data with options on a connected stream socket. 3746 * Assumes caller has verified that SS_ISCONNECTED is set. 3747 */ 3748 static int 3749 sosend_svccmsg(struct sonode *so, 3750 struct uio *uiop, 3751 int more, 3752 void *control, 3753 t_uscalar_t controllen, 3754 int flags) 3755 { 3756 struct T_optdata_req tdr; 3757 mblk_t *mp; 3758 int error; 3759 ssize_t iosize; 3760 int first = 1; 3761 int size; 3762 struct fdbuf *fdbuf; 3763 t_uscalar_t optlen; 3764 void *fds; 3765 int fdlen; 3766 struct T_opthdr toh; 3767 3768 dprintso(so, 1, 3769 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3770 3771 /* 3772 * Has to be bound and connected. However, since no locks are 3773 * held the state could have changed after sotpi_sendmsg checked it 3774 * thus it is not possible to ASSERT on the state. 3775 */ 3776 3777 /* Options on connection-oriented only when SM_OPTDATA set. */ 3778 if (!(so->so_mode & SM_OPTDATA)) 3779 return (EOPNOTSUPP); 3780 3781 do { 3782 /* 3783 * Set the MORE flag if uio_resid does not fit in this 3784 * message or if the caller passed in "more". 3785 * Error for transports with zero tidu_size. 3786 */ 3787 tdr.PRIM_type = T_OPTDATA_REQ; 3788 iosize = so->so_tidu_size; 3789 if (iosize <= 0) 3790 return (EMSGSIZE); 3791 if (uiop->uio_resid > iosize) { 3792 tdr.DATA_flag = 1; 3793 } else { 3794 if (more) 3795 tdr.DATA_flag = 1; 3796 else 3797 tdr.DATA_flag = 0; 3798 iosize = uiop->uio_resid; 3799 } 3800 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3801 tdr.DATA_flag, iosize)); 3802 3803 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3804 tdr.OPT_length = optlen; 3805 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3806 3807 size = (int)sizeof (tdr) + optlen; 3808 /* 3809 * File descriptors only when SM_FDPASSING set. 3810 */ 3811 error = so_getfdopt(control, controllen, 3812 !(flags & MSG_XPG4_2), &fds, &fdlen); 3813 if (error) 3814 return (error); 3815 if (fdlen != -1) { 3816 if (!(so->so_mode & SM_FDPASSING)) 3817 return (EOPNOTSUPP); 3818 3819 error = fdbuf_create(fds, fdlen, &fdbuf); 3820 if (error) 3821 return (error); 3822 mp = fdbuf_allocmsg(size, fdbuf); 3823 } else { 3824 mp = soallocproto(size, _ALLOC_INTR); 3825 if (mp == NULL) { 3826 /* 3827 * Caught a signal waiting for memory. 3828 * Let send* return EINTR. 3829 */ 3830 return (first ? EINTR : 0); 3831 } 3832 } 3833 soappendmsg(mp, &tdr, sizeof (tdr)); 3834 3835 if (fdlen != -1) { 3836 ASSERT(fdbuf != NULL); 3837 toh.level = SOL_SOCKET; 3838 toh.name = SO_FILEP; 3839 toh.len = fdbuf->fd_size + 3840 (t_uscalar_t)sizeof (struct T_opthdr); 3841 toh.status = 0; 3842 soappendmsg(mp, &toh, sizeof (toh)); 3843 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3844 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3845 } 3846 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3847 /* At most 3 bytes left in the message */ 3848 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3849 ASSERT(MBLKL(mp) <= (ssize_t)size); 3850 3851 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3852 3853 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3854 0, MSG_BAND, 0); 3855 if (error) { 3856 if (!first && error == EWOULDBLOCK) 3857 return (0); 3858 eprintsoline(so, error); 3859 return (error); 3860 } 3861 control = NULL; 3862 first = 0; 3863 if (uiop->uio_resid > 0) { 3864 /* 3865 * Recheck for fatal errors. Fail write even though 3866 * some data have been written. This is consistent 3867 * with strwrite semantics and BSD sockets semantics. 3868 */ 3869 if (so->so_state & SS_CANTSENDMORE) { 3870 tsignal(curthread, SIGPIPE); 3871 eprintsoline(so, error); 3872 return (EPIPE); 3873 } 3874 if (so->so_error != 0) { 3875 mutex_enter(&so->so_lock); 3876 error = sogeterr(so); 3877 mutex_exit(&so->so_lock); 3878 if (error != 0) { 3879 eprintsoline(so, error); 3880 return (error); 3881 } 3882 } 3883 } 3884 } while (uiop->uio_resid > 0); 3885 return (0); 3886 } 3887 3888 /* 3889 * Sending data on a datagram socket. 3890 * Assumes caller has verified that SS_ISBOUND etc. are set. 3891 * 3892 * For AF_UNIX the destination address is translated to an internal 3893 * name and the source address is passed as an option. 3894 */ 3895 int 3896 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3897 struct uio *uiop, int flags) 3898 { 3899 struct T_unitdata_req tudr; 3900 mblk_t *mp; 3901 int error; 3902 void *addr; 3903 socklen_t addrlen; 3904 void *src; 3905 socklen_t srclen; 3906 ssize_t len; 3907 3908 ASSERT(name != NULL && namelen != 0); 3909 3910 len = uiop->uio_resid; 3911 if (len > so->so_tidu_size) { 3912 error = EMSGSIZE; 3913 goto done; 3914 } 3915 3916 /* Length and family checks */ 3917 error = so_addr_verify(so, name, namelen); 3918 if (error != 0) 3919 goto done; 3920 3921 if (so->so_state & SS_DIRECT) 3922 return (sodgram_direct(so, name, namelen, uiop, flags)); 3923 3924 if (so->so_family == AF_UNIX) { 3925 if (so->so_state & SS_FADDR_NOXLATE) { 3926 /* 3927 * Already have a transport internal address. Do not 3928 * pass any (transport internal) source address. 3929 */ 3930 addr = name; 3931 addrlen = namelen; 3932 src = NULL; 3933 srclen = 0; 3934 } else { 3935 /* 3936 * Pass the sockaddr_un source address as an option 3937 * and translate the remote address. 3938 * 3939 * Note that this code does not prevent so_laddr_sa 3940 * from changing while it is being used. Thus 3941 * if an unbind+bind occurs concurrently with this 3942 * send the peer might see a partially new and a 3943 * partially old "from" address. 3944 */ 3945 src = so->so_laddr_sa; 3946 srclen = (socklen_t)so->so_laddr_len; 3947 dprintso(so, 1, 3948 ("sosend_dgram UNIX: srclen %d, src %p\n", 3949 srclen, src)); 3950 error = so_ux_addr_xlate(so, name, namelen, 3951 (flags & MSG_XPG4_2), 3952 &addr, &addrlen); 3953 if (error) { 3954 eprintsoline(so, error); 3955 goto done; 3956 } 3957 } 3958 } else { 3959 addr = name; 3960 addrlen = namelen; 3961 src = NULL; 3962 srclen = 0; 3963 } 3964 tudr.PRIM_type = T_UNITDATA_REQ; 3965 tudr.DEST_length = addrlen; 3966 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3967 if (srclen == 0) { 3968 tudr.OPT_length = 0; 3969 tudr.OPT_offset = 0; 3970 3971 mp = soallocproto2(&tudr, sizeof (tudr), 3972 addr, addrlen, 0, _ALLOC_INTR); 3973 if (mp == NULL) { 3974 /* 3975 * Caught a signal waiting for memory. 3976 * Let send* return EINTR. 3977 */ 3978 error = EINTR; 3979 goto done; 3980 } 3981 } else { 3982 /* 3983 * There is a AF_UNIX sockaddr_un to include as a source 3984 * address option. 3985 */ 3986 struct T_opthdr toh; 3987 ssize_t size; 3988 3989 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3990 _TPI_ALIGN_TOPT(srclen)); 3991 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3992 _TPI_ALIGN_TOPT(addrlen)); 3993 3994 toh.level = SOL_SOCKET; 3995 toh.name = SO_SRCADDR; 3996 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3997 toh.status = 0; 3998 3999 size = tudr.OPT_offset + tudr.OPT_length; 4000 mp = soallocproto2(&tudr, sizeof (tudr), 4001 addr, addrlen, size, _ALLOC_INTR); 4002 if (mp == NULL) { 4003 /* 4004 * Caught a signal waiting for memory. 4005 * Let send* return EINTR. 4006 */ 4007 error = EINTR; 4008 goto done; 4009 } 4010 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4011 soappendmsg(mp, &toh, sizeof (toh)); 4012 soappendmsg(mp, src, srclen); 4013 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4014 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4015 } 4016 4017 if (audit_active) 4018 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4019 4020 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4021 done: 4022 #ifdef SOCK_DEBUG 4023 if (error) { 4024 eprintsoline(so, error); 4025 } 4026 #endif /* SOCK_DEBUG */ 4027 return (error); 4028 } 4029 4030 /* 4031 * Sending data on a connected stream socket. 4032 * Assumes caller has verified that SS_ISCONNECTED is set. 4033 */ 4034 int 4035 sosend_svc(struct sonode *so, 4036 struct uio *uiop, 4037 t_scalar_t prim, 4038 int more, 4039 int sflag) 4040 { 4041 struct T_data_req tdr; 4042 mblk_t *mp; 4043 int error; 4044 ssize_t iosize; 4045 int first = 1; 4046 4047 dprintso(so, 1, 4048 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4049 so, uiop->uio_resid, prim, sflag)); 4050 4051 /* 4052 * Has to be bound and connected. However, since no locks are 4053 * held the state could have changed after sotpi_sendmsg checked it 4054 * thus it is not possible to ASSERT on the state. 4055 */ 4056 4057 do { 4058 /* 4059 * Set the MORE flag if uio_resid does not fit in this 4060 * message or if the caller passed in "more". 4061 * Error for transports with zero tidu_size. 4062 */ 4063 tdr.PRIM_type = prim; 4064 iosize = so->so_tidu_size; 4065 if (iosize <= 0) 4066 return (EMSGSIZE); 4067 if (uiop->uio_resid > iosize) { 4068 tdr.MORE_flag = 1; 4069 } else { 4070 if (more) 4071 tdr.MORE_flag = 1; 4072 else 4073 tdr.MORE_flag = 0; 4074 iosize = uiop->uio_resid; 4075 } 4076 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4077 prim, tdr.MORE_flag, iosize)); 4078 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 4079 if (mp == NULL) { 4080 /* 4081 * Caught a signal waiting for memory. 4082 * Let send* return EINTR. 4083 */ 4084 if (first) 4085 return (EINTR); 4086 else 4087 return (0); 4088 } 4089 4090 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4091 0, sflag | MSG_BAND, 0); 4092 if (error) { 4093 if (!first && error == EWOULDBLOCK) 4094 return (0); 4095 eprintsoline(so, error); 4096 return (error); 4097 } 4098 first = 0; 4099 if (uiop->uio_resid > 0) { 4100 /* 4101 * Recheck for fatal errors. Fail write even though 4102 * some data have been written. This is consistent 4103 * with strwrite semantics and BSD sockets semantics. 4104 */ 4105 if (so->so_state & SS_CANTSENDMORE) { 4106 tsignal(curthread, SIGPIPE); 4107 eprintsoline(so, error); 4108 return (EPIPE); 4109 } 4110 if (so->so_error != 0) { 4111 mutex_enter(&so->so_lock); 4112 error = sogeterr(so); 4113 mutex_exit(&so->so_lock); 4114 if (error != 0) { 4115 eprintsoline(so, error); 4116 return (error); 4117 } 4118 } 4119 } 4120 } while (uiop->uio_resid > 0); 4121 return (0); 4122 } 4123 4124 /* 4125 * Check the state for errors and call the appropriate send function. 4126 * 4127 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4128 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4129 * after sending the message. 4130 */ 4131 static int 4132 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 4133 { 4134 int so_state; 4135 int so_mode; 4136 int error; 4137 struct sockaddr *name; 4138 t_uscalar_t namelen; 4139 int dontroute; 4140 int flags; 4141 4142 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4143 so, msg, msg->msg_flags, 4144 pr_state(so->so_state, so->so_mode), so->so_error)); 4145 4146 mutex_enter(&so->so_lock); 4147 so_state = so->so_state; 4148 4149 if (so_state & SS_CANTSENDMORE) { 4150 mutex_exit(&so->so_lock); 4151 tsignal(curthread, SIGPIPE); 4152 return (EPIPE); 4153 } 4154 4155 if (so->so_error != 0) { 4156 error = sogeterr(so); 4157 if (error != 0) { 4158 mutex_exit(&so->so_lock); 4159 return (error); 4160 } 4161 } 4162 4163 name = (struct sockaddr *)msg->msg_name; 4164 namelen = msg->msg_namelen; 4165 4166 so_mode = so->so_mode; 4167 4168 if (name == NULL) { 4169 if (!(so_state & SS_ISCONNECTED)) { 4170 mutex_exit(&so->so_lock); 4171 if (so_mode & SM_CONNREQUIRED) 4172 return (ENOTCONN); 4173 else 4174 return (EDESTADDRREQ); 4175 } 4176 if (so_mode & SM_CONNREQUIRED) { 4177 name = NULL; 4178 namelen = 0; 4179 } else { 4180 /* 4181 * Note that this code does not prevent so_faddr_sa 4182 * from changing while it is being used. Thus 4183 * if an "unconnect"+connect occurs concurrently with 4184 * this send the datagram might be delivered to a 4185 * garbaled address. 4186 */ 4187 ASSERT(so->so_faddr_sa); 4188 name = so->so_faddr_sa; 4189 namelen = (t_uscalar_t)so->so_faddr_len; 4190 } 4191 } else { 4192 if (!(so_state & SS_ISCONNECTED) && 4193 (so_mode & SM_CONNREQUIRED)) { 4194 /* Required but not connected */ 4195 mutex_exit(&so->so_lock); 4196 return (ENOTCONN); 4197 } 4198 /* 4199 * Ignore the address on connection-oriented sockets. 4200 * Just like BSD this code does not generate an error for 4201 * TCP (a CONNREQUIRED socket) when sending to an address 4202 * passed in with sendto/sendmsg. Instead the data is 4203 * delivered on the connection as if no address had been 4204 * supplied. 4205 */ 4206 if ((so_state & SS_ISCONNECTED) && 4207 !(so_mode & SM_CONNREQUIRED)) { 4208 mutex_exit(&so->so_lock); 4209 return (EISCONN); 4210 } 4211 if (!(so_state & SS_ISBOUND)) { 4212 so_lock_single(so); /* Set SOLOCKED */ 4213 error = sotpi_bind(so, NULL, 0, 4214 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 4215 so_unlock_single(so, SOLOCKED); 4216 if (error) { 4217 mutex_exit(&so->so_lock); 4218 eprintsoline(so, error); 4219 return (error); 4220 } 4221 } 4222 /* 4223 * Handle delayed datagram errors. These are only queued 4224 * when the application sets SO_DGRAM_ERRIND. 4225 * Return the error if we are sending to the address 4226 * that was returned in the last T_UDERROR_IND. 4227 * If sending to some other address discard the delayed 4228 * error indication. 4229 */ 4230 if (so->so_delayed_error) { 4231 struct T_uderror_ind *tudi; 4232 void *addr; 4233 t_uscalar_t addrlen; 4234 boolean_t match = B_FALSE; 4235 4236 ASSERT(so->so_eaddr_mp); 4237 error = so->so_delayed_error; 4238 so->so_delayed_error = 0; 4239 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4240 addrlen = tudi->DEST_length; 4241 addr = sogetoff(so->so_eaddr_mp, 4242 tudi->DEST_offset, 4243 addrlen, 1); 4244 ASSERT(addr); /* Checked by strsock_proto */ 4245 switch (so->so_family) { 4246 case AF_INET: { 4247 /* Compare just IP address and port */ 4248 sin_t *sin1 = (sin_t *)name; 4249 sin_t *sin2 = (sin_t *)addr; 4250 4251 if (addrlen == sizeof (sin_t) && 4252 namelen == addrlen && 4253 sin1->sin_port == sin2->sin_port && 4254 sin1->sin_addr.s_addr == 4255 sin2->sin_addr.s_addr) 4256 match = B_TRUE; 4257 break; 4258 } 4259 case AF_INET6: { 4260 /* Compare just IP address and port. Not flow */ 4261 sin6_t *sin1 = (sin6_t *)name; 4262 sin6_t *sin2 = (sin6_t *)addr; 4263 4264 if (addrlen == sizeof (sin6_t) && 4265 namelen == addrlen && 4266 sin1->sin6_port == sin2->sin6_port && 4267 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4268 &sin2->sin6_addr)) 4269 match = B_TRUE; 4270 break; 4271 } 4272 case AF_UNIX: 4273 default: 4274 if (namelen == addrlen && 4275 bcmp(name, addr, namelen) == 0) 4276 match = B_TRUE; 4277 } 4278 if (match) { 4279 freemsg(so->so_eaddr_mp); 4280 so->so_eaddr_mp = NULL; 4281 mutex_exit(&so->so_lock); 4282 #ifdef DEBUG 4283 dprintso(so, 0, 4284 ("sockfs delayed error %d for %s\n", 4285 error, 4286 pr_addr(so->so_family, name, namelen))); 4287 #endif /* DEBUG */ 4288 return (error); 4289 } 4290 freemsg(so->so_eaddr_mp); 4291 so->so_eaddr_mp = NULL; 4292 } 4293 } 4294 mutex_exit(&so->so_lock); 4295 4296 flags = msg->msg_flags; 4297 dontroute = 0; 4298 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4299 uint32_t val; 4300 4301 val = 1; 4302 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4303 &val, (t_uscalar_t)sizeof (val)); 4304 if (error) 4305 return (error); 4306 dontroute = 1; 4307 } 4308 4309 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4310 error = EOPNOTSUPP; 4311 goto done; 4312 } 4313 if (msg->msg_controllen != 0) { 4314 if (!(so_mode & SM_CONNREQUIRED)) { 4315 error = sosend_dgramcmsg(so, name, namelen, uiop, 4316 msg->msg_control, msg->msg_controllen, flags); 4317 } else { 4318 if (flags & MSG_OOB) { 4319 /* Can't generate T_EXDATA_REQ with options */ 4320 error = EOPNOTSUPP; 4321 goto done; 4322 } 4323 error = sosend_svccmsg(so, uiop, 4324 !(flags & MSG_EOR), 4325 msg->msg_control, msg->msg_controllen, 4326 flags); 4327 } 4328 goto done; 4329 } 4330 4331 if (!(so_mode & SM_CONNREQUIRED)) { 4332 /* 4333 * If there is no SO_DONTROUTE to turn off return immediately 4334 * from send_dgram. This can allow tail-call optimizations. 4335 */ 4336 if (!dontroute) { 4337 return (sosend_dgram(so, name, namelen, uiop, flags)); 4338 } 4339 error = sosend_dgram(so, name, namelen, uiop, flags); 4340 } else { 4341 t_scalar_t prim; 4342 int sflag; 4343 4344 /* Ignore msg_name in the connected state */ 4345 if (flags & MSG_OOB) { 4346 prim = T_EXDATA_REQ; 4347 /* 4348 * Send down T_EXDATA_REQ even if there is flow 4349 * control for data. 4350 */ 4351 sflag = MSG_IGNFLOW; 4352 } else { 4353 if (so_mode & SM_BYTESTREAM) { 4354 /* Byte stream transport - use write */ 4355 4356 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4357 /* 4358 * If there is no SO_DONTROUTE to turn off, 4359 * SS_DIRECT is on, and there is no flow 4360 * control, we can take the fast path. 4361 */ 4362 if (!dontroute && 4363 (so_state & SS_DIRECT) && 4364 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4365 return (sostream_direct(so, uiop, 4366 NULL, CRED())); 4367 } 4368 error = strwrite(SOTOV(so), uiop, CRED()); 4369 goto done; 4370 } 4371 prim = T_DATA_REQ; 4372 sflag = 0; 4373 } 4374 /* 4375 * If there is no SO_DONTROUTE to turn off return immediately 4376 * from sosend_svc. This can allow tail-call optimizations. 4377 */ 4378 if (!dontroute) 4379 return (sosend_svc(so, uiop, prim, 4380 !(flags & MSG_EOR), sflag)); 4381 error = sosend_svc(so, uiop, prim, 4382 !(flags & MSG_EOR), sflag); 4383 } 4384 ASSERT(dontroute); 4385 done: 4386 if (dontroute) { 4387 uint32_t val; 4388 4389 val = 0; 4390 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4391 &val, (t_uscalar_t)sizeof (val)); 4392 } 4393 return (error); 4394 } 4395 4396 /* 4397 * Sending data on a datagram socket. 4398 * Assumes caller has verified that SS_ISBOUND etc. are set. 4399 */ 4400 /* ARGSUSED */ 4401 static int 4402 sodgram_direct(struct sonode *so, struct sockaddr *name, 4403 socklen_t namelen, struct uio *uiop, int flags) 4404 { 4405 struct T_unitdata_req tudr; 4406 mblk_t *mp = NULL; 4407 int error = 0; 4408 void *addr; 4409 socklen_t addrlen; 4410 ssize_t len; 4411 struct stdata *stp = SOTOV(so)->v_stream; 4412 int so_state; 4413 queue_t *udp_wq; 4414 boolean_t connected; 4415 mblk_t *mpdata = NULL; 4416 4417 ASSERT(name != NULL && namelen != 0); 4418 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4419 ASSERT(!(so->so_mode & SM_EXDATA)); 4420 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4421 ASSERT(SOTOV(so)->v_type == VSOCK); 4422 4423 /* Caller checked for proper length */ 4424 len = uiop->uio_resid; 4425 ASSERT(len <= so->so_tidu_size); 4426 4427 /* Length and family checks have been done by caller */ 4428 ASSERT(name->sa_family == so->so_family); 4429 ASSERT(so->so_family == AF_INET || 4430 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4431 ASSERT(so->so_family == AF_INET6 || 4432 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4433 4434 addr = name; 4435 addrlen = namelen; 4436 4437 if (stp->sd_sidp != NULL && 4438 (error = straccess(stp, JCWRITE)) != 0) 4439 goto done; 4440 4441 so_state = so->so_state; 4442 4443 connected = so_state & SS_ISCONNECTED; 4444 if (!connected) { 4445 tudr.PRIM_type = T_UNITDATA_REQ; 4446 tudr.DEST_length = addrlen; 4447 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4448 tudr.OPT_length = 0; 4449 tudr.OPT_offset = 0; 4450 4451 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4452 _ALLOC_INTR); 4453 if (mp == NULL) { 4454 /* 4455 * Caught a signal waiting for memory. 4456 * Let send* return EINTR. 4457 */ 4458 error = EINTR; 4459 goto done; 4460 } 4461 } 4462 4463 /* 4464 * For UDP we don't break up the copyin into smaller pieces 4465 * as in the TCP case. That means if ENOMEM is returned by 4466 * mcopyinuio() then the uio vector has not been modified at 4467 * all and we fallback to either strwrite() or kstrputmsg() 4468 * below. Note also that we never generate priority messages 4469 * from here. 4470 */ 4471 udp_wq = stp->sd_wrq->q_next; 4472 if (canput(udp_wq) && 4473 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4474 ASSERT(DB_TYPE(mpdata) == M_DATA); 4475 ASSERT(uiop->uio_resid == 0); 4476 if (!connected) 4477 linkb(mp, mpdata); 4478 else 4479 mp = mpdata; 4480 if (audit_active) 4481 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4482 4483 udp_wput(udp_wq, mp); 4484 return (0); 4485 } 4486 4487 ASSERT(mpdata == NULL); 4488 if (error != 0 && error != ENOMEM) { 4489 freemsg(mp); 4490 return (error); 4491 } 4492 4493 /* 4494 * For connected, let strwrite() handle the blocking case. 4495 * Otherwise we fall thru and use kstrputmsg(). 4496 */ 4497 if (connected) 4498 return (strwrite(SOTOV(so), uiop, CRED())); 4499 4500 if (audit_active) 4501 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4502 4503 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4504 done: 4505 #ifdef SOCK_DEBUG 4506 if (error != 0) { 4507 eprintsoline(so, error); 4508 } 4509 #endif /* SOCK_DEBUG */ 4510 return (error); 4511 } 4512 4513 int 4514 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4515 { 4516 struct stdata *stp = SOTOV(so)->v_stream; 4517 ssize_t iosize, rmax, maxblk; 4518 queue_t *tcp_wq = stp->sd_wrq->q_next; 4519 mblk_t *newmp; 4520 int error = 0, wflag = 0; 4521 4522 ASSERT(so->so_mode & SM_BYTESTREAM); 4523 ASSERT(SOTOV(so)->v_type == VSOCK); 4524 4525 if (stp->sd_sidp != NULL && 4526 (error = straccess(stp, JCWRITE)) != 0) 4527 return (error); 4528 4529 if (uiop == NULL) { 4530 /* 4531 * kstrwritemp() should have checked sd_flag and 4532 * flow-control before coming here. If we end up 4533 * here it means that we can simply pass down the 4534 * data to tcp. 4535 */ 4536 ASSERT(mp != NULL); 4537 if (stp->sd_wputdatafunc != NULL) { 4538 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4539 NULL, NULL, NULL); 4540 if (newmp == NULL) { 4541 /* The caller will free mp */ 4542 return (ECOMM); 4543 } 4544 mp = newmp; 4545 } 4546 tcp_wput(tcp_wq, mp); 4547 return (0); 4548 } 4549 4550 /* Fallback to strwrite() to do proper error handling */ 4551 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4552 return (strwrite(SOTOV(so), uiop, cr)); 4553 4554 rmax = stp->sd_qn_maxpsz; 4555 ASSERT(rmax >= 0 || rmax == INFPSZ); 4556 if (rmax == 0 || uiop->uio_resid <= 0) 4557 return (0); 4558 4559 if (rmax == INFPSZ) 4560 rmax = uiop->uio_resid; 4561 4562 maxblk = stp->sd_maxblk; 4563 4564 for (;;) { 4565 iosize = MIN(uiop->uio_resid, rmax); 4566 4567 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4568 if (mp == NULL) { 4569 /* 4570 * Fallback to strwrite() for ENOMEM; if this 4571 * is our first time in this routine and the uio 4572 * vector has not been modified, we will end up 4573 * calling strwrite() without any flag set. 4574 */ 4575 if (error == ENOMEM) 4576 goto slow_send; 4577 else 4578 return (error); 4579 } 4580 ASSERT(uiop->uio_resid >= 0); 4581 /* 4582 * If mp is non-NULL and ENOMEM is set, it means that 4583 * mcopyinuio() was able to break down some of the user 4584 * data into one or more mblks. Send the partial data 4585 * to tcp and let the rest be handled in strwrite(). 4586 */ 4587 ASSERT(error == 0 || error == ENOMEM); 4588 if (stp->sd_wputdatafunc != NULL) { 4589 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4590 NULL, NULL, NULL); 4591 if (newmp == NULL) { 4592 /* The caller will free mp */ 4593 return (ECOMM); 4594 } 4595 mp = newmp; 4596 } 4597 tcp_wput(tcp_wq, mp); 4598 4599 wflag |= NOINTR; 4600 4601 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4602 ASSERT(error == 0); 4603 break; 4604 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4605 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4606 slow_send: 4607 /* 4608 * We were able to send down partial data using 4609 * the direct call interface, but are now relying 4610 * on strwrite() to handle the non-fastpath cases. 4611 * If the socket is blocking we will sleep in 4612 * strwaitq() until write is permitted, otherwise, 4613 * we will need to return the amount of bytes 4614 * written so far back to the app. This is the 4615 * reason why we pass NOINTR flag to strwrite() 4616 * for non-blocking socket, because we don't want 4617 * to return EAGAIN when portion of the user data 4618 * has actually been sent down. 4619 */ 4620 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4621 } 4622 } 4623 return (0); 4624 } 4625 4626 /* 4627 * Update so_faddr by asking the transport (unless AF_UNIX). 4628 */ 4629 int 4630 sotpi_getpeername(struct sonode *so) 4631 { 4632 struct strbuf strbuf; 4633 int error = 0, res; 4634 void *addr; 4635 t_uscalar_t addrlen; 4636 k_sigset_t smask; 4637 4638 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4639 so, pr_state(so->so_state, so->so_mode))); 4640 4641 mutex_enter(&so->so_lock); 4642 so_lock_single(so); /* Set SOLOCKED */ 4643 if (!(so->so_state & SS_ISCONNECTED)) { 4644 error = ENOTCONN; 4645 goto done; 4646 } 4647 /* Added this check for X/Open */ 4648 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4649 error = EINVAL; 4650 if (xnet_check_print) { 4651 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4652 } 4653 goto done; 4654 } 4655 #ifdef DEBUG 4656 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4657 pr_addr(so->so_family, so->so_faddr_sa, 4658 (t_uscalar_t)so->so_faddr_len))); 4659 #endif /* DEBUG */ 4660 4661 if (so->so_family == AF_UNIX) { 4662 /* Transport has different name space - return local info */ 4663 error = 0; 4664 goto done; 4665 } 4666 4667 ASSERT(so->so_faddr_sa); 4668 /* Allocate local buffer to use with ioctl */ 4669 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4670 mutex_exit(&so->so_lock); 4671 addr = kmem_alloc(addrlen, KM_SLEEP); 4672 4673 /* 4674 * Issue TI_GETPEERNAME with signals masked. 4675 * Put the result in so_faddr_sa so that getpeername works after 4676 * a shutdown(output). 4677 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4678 * back to the socket. 4679 */ 4680 strbuf.buf = addr; 4681 strbuf.maxlen = addrlen; 4682 strbuf.len = 0; 4683 4684 sigintr(&smask, 0); 4685 res = 0; 4686 ASSERT(CRED()); 4687 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4688 0, K_TO_K, CRED(), &res); 4689 sigunintr(&smask); 4690 4691 mutex_enter(&so->so_lock); 4692 /* 4693 * If there is an error record the error in so_error put don't fail 4694 * the getpeername. Instead fallback on the recorded 4695 * so->so_faddr_sa. 4696 */ 4697 if (error) { 4698 /* 4699 * Various stream head errors can be returned to the ioctl. 4700 * However, it is impossible to determine which ones of 4701 * these are really socket level errors that were incorrectly 4702 * consumed by the ioctl. Thus this code silently ignores the 4703 * error - to code explicitly does not reinstate the error 4704 * using soseterror(). 4705 * Experiments have shows that at least this set of 4706 * errors are reported and should not be reinstated on the 4707 * socket: 4708 * EINVAL E.g. if an I_LINK was in effect when 4709 * getpeername was called. 4710 * EPIPE The ioctl error semantics prefer the write 4711 * side error over the read side error. 4712 * ENOTCONN The transport just got disconnected but 4713 * sockfs had not yet seen the T_DISCON_IND 4714 * when issuing the ioctl. 4715 */ 4716 error = 0; 4717 } else if (res == 0 && strbuf.len > 0 && 4718 (so->so_state & SS_ISCONNECTED)) { 4719 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4720 so->so_faddr_len = (socklen_t)strbuf.len; 4721 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4722 so->so_state |= SS_FADDR_VALID; 4723 } 4724 kmem_free(addr, addrlen); 4725 #ifdef DEBUG 4726 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4727 pr_addr(so->so_family, so->so_faddr_sa, 4728 (t_uscalar_t)so->so_faddr_len))); 4729 #endif /* DEBUG */ 4730 done: 4731 so_unlock_single(so, SOLOCKED); 4732 mutex_exit(&so->so_lock); 4733 return (error); 4734 } 4735 4736 /* 4737 * Update so_laddr by asking the transport (unless AF_UNIX). 4738 */ 4739 int 4740 sotpi_getsockname(struct sonode *so) 4741 { 4742 struct strbuf strbuf; 4743 int error = 0, res; 4744 void *addr; 4745 t_uscalar_t addrlen; 4746 k_sigset_t smask; 4747 4748 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4749 so, pr_state(so->so_state, so->so_mode))); 4750 4751 mutex_enter(&so->so_lock); 4752 so_lock_single(so); /* Set SOLOCKED */ 4753 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4754 /* Return an all zero address except for the family */ 4755 if (so->so_family == AF_INET) 4756 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4757 else if (so->so_family == AF_INET6) 4758 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4759 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4760 bzero(so->so_laddr_sa, so->so_laddr_len); 4761 /* 4762 * Can not assume there is a sa_family for all 4763 * protocol families. 4764 */ 4765 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4766 so->so_laddr_sa->sa_family = so->so_family; 4767 } 4768 #ifdef DEBUG 4769 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4770 pr_addr(so->so_family, so->so_laddr_sa, 4771 (t_uscalar_t)so->so_laddr_len))); 4772 #endif /* DEBUG */ 4773 if (so->so_family == AF_UNIX) { 4774 /* Transport has different name space - return local info */ 4775 error = 0; 4776 goto done; 4777 } 4778 if (!(so->so_state & SS_ISBOUND)) { 4779 /* If not bound, then nothing to return. */ 4780 error = 0; 4781 goto done; 4782 } 4783 /* Allocate local buffer to use with ioctl */ 4784 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4785 mutex_exit(&so->so_lock); 4786 addr = kmem_alloc(addrlen, KM_SLEEP); 4787 4788 /* 4789 * Issue TI_GETMYNAME with signals masked. 4790 * Put the result in so_laddr_sa so that getsockname works after 4791 * a shutdown(output). 4792 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4793 * back to the socket. 4794 */ 4795 strbuf.buf = addr; 4796 strbuf.maxlen = addrlen; 4797 strbuf.len = 0; 4798 4799 sigintr(&smask, 0); 4800 res = 0; 4801 ASSERT(CRED()); 4802 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4803 0, K_TO_K, CRED(), &res); 4804 sigunintr(&smask); 4805 4806 mutex_enter(&so->so_lock); 4807 /* 4808 * If there is an error record the error in so_error put don't fail 4809 * the getsockname. Instead fallback on the recorded 4810 * so->so_laddr_sa. 4811 */ 4812 if (error) { 4813 /* 4814 * Various stream head errors can be returned to the ioctl. 4815 * However, it is impossible to determine which ones of 4816 * these are really socket level errors that were incorrectly 4817 * consumed by the ioctl. Thus this code silently ignores the 4818 * error - to code explicitly does not reinstate the error 4819 * using soseterror(). 4820 * Experiments have shows that at least this set of 4821 * errors are reported and should not be reinstated on the 4822 * socket: 4823 * EINVAL E.g. if an I_LINK was in effect when 4824 * getsockname was called. 4825 * EPIPE The ioctl error semantics prefer the write 4826 * side error over the read side error. 4827 */ 4828 error = 0; 4829 } else if (res == 0 && strbuf.len > 0 && 4830 (so->so_state & SS_ISBOUND)) { 4831 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4832 so->so_laddr_len = (socklen_t)strbuf.len; 4833 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4834 so->so_state |= SS_LADDR_VALID; 4835 } 4836 kmem_free(addr, addrlen); 4837 #ifdef DEBUG 4838 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4839 pr_addr(so->so_family, so->so_laddr_sa, 4840 (t_uscalar_t)so->so_laddr_len))); 4841 #endif /* DEBUG */ 4842 done: 4843 so_unlock_single(so, SOLOCKED); 4844 mutex_exit(&so->so_lock); 4845 return (error); 4846 } 4847 4848 /* 4849 * Get socket options. For SOL_SOCKET options some options are handled 4850 * by the sockfs while others use the value recorded in the sonode as a 4851 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4852 * 4853 * On the return most *optlenp bytes are copied to optval. 4854 */ 4855 int 4856 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4857 void *optval, socklen_t *optlenp, int flags) 4858 { 4859 struct T_optmgmt_req optmgmt_req; 4860 struct T_optmgmt_ack *optmgmt_ack; 4861 struct opthdr oh; 4862 struct opthdr *opt_res; 4863 mblk_t *mp = NULL; 4864 int error = 0; 4865 void *option = NULL; /* Set if fallback value */ 4866 t_uscalar_t maxlen = *optlenp; 4867 t_uscalar_t len; 4868 uint32_t value; 4869 4870 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4871 so, level, option_name, optval, optlenp, 4872 pr_state(so->so_state, so->so_mode))); 4873 4874 mutex_enter(&so->so_lock); 4875 so_lock_single(so); /* Set SOLOCKED */ 4876 4877 /* 4878 * Check for SOL_SOCKET options. 4879 * Certain SOL_SOCKET options are returned directly whereas 4880 * others only provide a default (fallback) value should 4881 * the T_SVR4_OPTMGMT_REQ fail. 4882 */ 4883 if (level == SOL_SOCKET) { 4884 /* Check parameters */ 4885 switch (option_name) { 4886 case SO_TYPE: 4887 case SO_ERROR: 4888 case SO_DEBUG: 4889 case SO_ACCEPTCONN: 4890 case SO_REUSEADDR: 4891 case SO_KEEPALIVE: 4892 case SO_DONTROUTE: 4893 case SO_BROADCAST: 4894 case SO_USELOOPBACK: 4895 case SO_OOBINLINE: 4896 case SO_SNDBUF: 4897 case SO_RCVBUF: 4898 #ifdef notyet 4899 case SO_SNDLOWAT: 4900 case SO_RCVLOWAT: 4901 case SO_SNDTIMEO: 4902 case SO_RCVTIMEO: 4903 #endif /* notyet */ 4904 case SO_DOMAIN: 4905 case SO_DGRAM_ERRIND: 4906 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4907 error = EINVAL; 4908 eprintsoline(so, error); 4909 goto done2; 4910 } 4911 break; 4912 case SO_LINGER: 4913 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4914 error = EINVAL; 4915 eprintsoline(so, error); 4916 goto done2; 4917 } 4918 break; 4919 } 4920 4921 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4922 4923 switch (option_name) { 4924 case SO_TYPE: 4925 value = so->so_type; 4926 option = &value; 4927 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4928 4929 case SO_ERROR: 4930 value = sogeterr(so); 4931 option = &value; 4932 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4933 4934 case SO_ACCEPTCONN: 4935 if (so->so_state & SS_ACCEPTCONN) 4936 value = SO_ACCEPTCONN; 4937 else 4938 value = 0; 4939 #ifdef DEBUG 4940 if (value) { 4941 dprintso(so, 1, 4942 ("sotpi_getsockopt: 0x%x is set\n", 4943 option_name)); 4944 } else { 4945 dprintso(so, 1, 4946 ("sotpi_getsockopt: 0x%x not set\n", 4947 option_name)); 4948 } 4949 #endif /* DEBUG */ 4950 option = &value; 4951 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4952 4953 case SO_DEBUG: 4954 case SO_REUSEADDR: 4955 case SO_KEEPALIVE: 4956 case SO_DONTROUTE: 4957 case SO_BROADCAST: 4958 case SO_USELOOPBACK: 4959 case SO_OOBINLINE: 4960 case SO_DGRAM_ERRIND: 4961 value = (so->so_options & option_name); 4962 #ifdef DEBUG 4963 if (value) { 4964 dprintso(so, 1, 4965 ("sotpi_getsockopt: 0x%x is set\n", 4966 option_name)); 4967 } else { 4968 dprintso(so, 1, 4969 ("sotpi_getsockopt: 0x%x not set\n", 4970 option_name)); 4971 } 4972 #endif /* DEBUG */ 4973 option = &value; 4974 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4975 4976 /* 4977 * The following options are only returned by sockfs when the 4978 * T_SVR4_OPTMGMT_REQ fails. 4979 */ 4980 case SO_LINGER: 4981 option = &so->so_linger; 4982 len = (t_uscalar_t)sizeof (struct linger); 4983 break; 4984 case SO_SNDBUF: { 4985 ssize_t lvalue; 4986 4987 /* 4988 * If the option has not been set then get a default 4989 * value from the read queue. This value is 4990 * returned if the transport fails 4991 * the T_SVR4_OPTMGMT_REQ. 4992 */ 4993 lvalue = so->so_sndbuf; 4994 if (lvalue == 0) { 4995 mutex_exit(&so->so_lock); 4996 (void) strqget(strvp2wq(SOTOV(so))->q_next, 4997 QHIWAT, 0, &lvalue); 4998 mutex_enter(&so->so_lock); 4999 dprintso(so, 1, 5000 ("got SO_SNDBUF %ld from q\n", lvalue)); 5001 } 5002 value = (int)lvalue; 5003 option = &value; 5004 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5005 break; 5006 } 5007 case SO_RCVBUF: { 5008 ssize_t lvalue; 5009 5010 /* 5011 * If the option has not been set then get a default 5012 * value from the read queue. This value is 5013 * returned if the transport fails 5014 * the T_SVR4_OPTMGMT_REQ. 5015 * 5016 * XXX If SO_RCVBUF has been set and this is an 5017 * XPG 4.2 application then do not ask the transport 5018 * since the transport might adjust the value and not 5019 * return exactly what was set by the application. 5020 * For non-XPG 4.2 application we return the value 5021 * that the transport is actually using. 5022 */ 5023 lvalue = so->so_rcvbuf; 5024 if (lvalue == 0) { 5025 mutex_exit(&so->so_lock); 5026 (void) strqget(RD(strvp2wq(SOTOV(so))), 5027 QHIWAT, 0, &lvalue); 5028 mutex_enter(&so->so_lock); 5029 dprintso(so, 1, 5030 ("got SO_RCVBUF %ld from q\n", lvalue)); 5031 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5032 value = (int)lvalue; 5033 option = &value; 5034 goto copyout; /* skip asking transport */ 5035 } 5036 value = (int)lvalue; 5037 option = &value; 5038 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5039 break; 5040 } 5041 case SO_DOMAIN: 5042 value = so->so_family; 5043 option = &value; 5044 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5045 5046 #ifdef notyet 5047 /* 5048 * We do not implement the semantics of these options 5049 * thus we shouldn't implement the options either. 5050 */ 5051 case SO_SNDLOWAT: 5052 value = so->so_sndlowat; 5053 option = &value; 5054 break; 5055 case SO_RCVLOWAT: 5056 value = so->so_rcvlowat; 5057 option = &value; 5058 break; 5059 case SO_SNDTIMEO: 5060 value = so->so_sndtimeo; 5061 option = &value; 5062 break; 5063 case SO_RCVTIMEO: 5064 value = so->so_rcvtimeo; 5065 option = &value; 5066 break; 5067 #endif /* notyet */ 5068 } 5069 } 5070 5071 mutex_exit(&so->so_lock); 5072 5073 /* Send request */ 5074 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5075 optmgmt_req.MGMT_flags = T_CHECK; 5076 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5077 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5078 5079 oh.level = level; 5080 oh.name = option_name; 5081 oh.len = maxlen; 5082 5083 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5084 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 5085 /* Let option management work in the presence of data flow control */ 5086 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5087 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5088 mp = NULL; 5089 mutex_enter(&so->so_lock); 5090 if (error) { 5091 eprintsoline(so, error); 5092 goto done2; 5093 } 5094 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5095 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5096 if (error) { 5097 if (option != NULL) { 5098 /* We have a fallback value */ 5099 error = 0; 5100 goto copyout; 5101 } 5102 eprintsoline(so, error); 5103 goto done2; 5104 } 5105 ASSERT(mp); 5106 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5107 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5108 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5109 if (opt_res == NULL) { 5110 if (option != NULL) { 5111 /* We have a fallback value */ 5112 error = 0; 5113 goto copyout; 5114 } 5115 error = EPROTO; 5116 eprintsoline(so, error); 5117 goto done; 5118 } 5119 option = &opt_res[1]; 5120 5121 /* check to ensure that the option is within bounds */ 5122 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5123 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5124 if (option != NULL) { 5125 /* We have a fallback value */ 5126 error = 0; 5127 goto copyout; 5128 } 5129 error = EPROTO; 5130 eprintsoline(so, error); 5131 goto done; 5132 } 5133 5134 len = opt_res->len; 5135 5136 copyout: { 5137 t_uscalar_t size = MIN(len, maxlen); 5138 bcopy(option, optval, size); 5139 bcopy(&size, optlenp, sizeof (size)); 5140 } 5141 done: 5142 freemsg(mp); 5143 done2: 5144 so_unlock_single(so, SOLOCKED); 5145 mutex_exit(&so->so_lock); 5146 return (error); 5147 } 5148 5149 /* 5150 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5151 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5152 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5153 * setsockopt has to work even if the transport does not support the option. 5154 */ 5155 int 5156 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5157 const void *optval, t_uscalar_t optlen) 5158 { 5159 struct T_optmgmt_req optmgmt_req; 5160 struct opthdr oh; 5161 mblk_t *mp; 5162 int error = 0; 5163 boolean_t handled = B_FALSE; 5164 5165 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5166 so, level, option_name, optval, optlen, 5167 pr_state(so->so_state, so->so_mode))); 5168 5169 5170 /* X/Open requires this check */ 5171 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5172 if (xnet_check_print) 5173 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5174 return (EINVAL); 5175 } 5176 5177 /* Caller allocates aligned optval, or passes null */ 5178 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 5179 /* If optval is null optlen is 0, and vice-versa */ 5180 ASSERT(optval != NULL || optlen == 0); 5181 ASSERT(optlen != 0 || optval == NULL); 5182 5183 mutex_enter(&so->so_lock); 5184 so_lock_single(so); /* Set SOLOCKED */ 5185 mutex_exit(&so->so_lock); 5186 5187 /* 5188 * For SOCKET or TCP level options, try to set it here itself 5189 * provided socket has not been popped and we know the tcp 5190 * structure (stored in so_priv). 5191 */ 5192 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5193 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5194 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 5195 tcp_t *tcp = so->so_priv; 5196 boolean_t onoff; 5197 5198 #define intvalue (*(int32_t *)optval) 5199 5200 switch (level) { 5201 case SOL_SOCKET: 5202 switch (option_name) { /* Check length param */ 5203 case SO_DEBUG: 5204 case SO_REUSEADDR: 5205 case SO_DONTROUTE: 5206 case SO_BROADCAST: 5207 case SO_USELOOPBACK: 5208 case SO_OOBINLINE: 5209 case SO_DGRAM_ERRIND: 5210 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5211 error = EINVAL; 5212 eprintsoline(so, error); 5213 mutex_enter(&so->so_lock); 5214 goto done2; 5215 } 5216 ASSERT(optval); 5217 onoff = intvalue != 0; 5218 handled = B_TRUE; 5219 break; 5220 case SO_LINGER: 5221 if (optlen != 5222 (t_uscalar_t)sizeof (struct linger)) { 5223 error = EINVAL; 5224 eprintsoline(so, error); 5225 mutex_enter(&so->so_lock); 5226 goto done2; 5227 } 5228 ASSERT(optval); 5229 handled = B_TRUE; 5230 break; 5231 } 5232 5233 switch (option_name) { /* Do actions */ 5234 case SO_LINGER: { 5235 struct linger *lgr = (struct linger *)optval; 5236 5237 if (lgr->l_onoff) { 5238 tcp->tcp_linger = 1; 5239 tcp->tcp_lingertime = lgr->l_linger; 5240 so->so_linger.l_onoff = SO_LINGER; 5241 so->so_options |= SO_LINGER; 5242 } else { 5243 tcp->tcp_linger = 0; 5244 tcp->tcp_lingertime = 0; 5245 so->so_linger.l_onoff = 0; 5246 so->so_options &= ~SO_LINGER; 5247 } 5248 so->so_linger.l_linger = lgr->l_linger; 5249 handled = B_TRUE; 5250 break; 5251 } 5252 case SO_DEBUG: 5253 tcp->tcp_debug = onoff; 5254 #ifdef SOCK_TEST 5255 if (intvalue & 2) 5256 sock_test_timelimit = 10 * hz; 5257 else 5258 sock_test_timelimit = 0; 5259 5260 if (intvalue & 4) 5261 do_useracc = 0; 5262 else 5263 do_useracc = 1; 5264 #endif /* SOCK_TEST */ 5265 break; 5266 case SO_DONTROUTE: 5267 /* 5268 * SO_DONTROUTE, SO_USELOOPBACK and 5269 * SO_BROADCAST are only of interest to IP. 5270 * We track them here only so 5271 * that we can report their current value. 5272 */ 5273 tcp->tcp_dontroute = onoff; 5274 if (onoff) 5275 so->so_options |= option_name; 5276 else 5277 so->so_options &= ~option_name; 5278 break; 5279 case SO_USELOOPBACK: 5280 tcp->tcp_useloopback = onoff; 5281 if (onoff) 5282 so->so_options |= option_name; 5283 else 5284 so->so_options &= ~option_name; 5285 break; 5286 case SO_BROADCAST: 5287 tcp->tcp_broadcast = onoff; 5288 if (onoff) 5289 so->so_options |= option_name; 5290 else 5291 so->so_options &= ~option_name; 5292 break; 5293 case SO_REUSEADDR: 5294 tcp->tcp_reuseaddr = onoff; 5295 if (onoff) 5296 so->so_options |= option_name; 5297 else 5298 so->so_options &= ~option_name; 5299 break; 5300 case SO_OOBINLINE: 5301 tcp->tcp_oobinline = onoff; 5302 if (onoff) 5303 so->so_options |= option_name; 5304 else 5305 so->so_options &= ~option_name; 5306 break; 5307 case SO_DGRAM_ERRIND: 5308 tcp->tcp_dgram_errind = onoff; 5309 if (onoff) 5310 so->so_options |= option_name; 5311 else 5312 so->so_options &= ~option_name; 5313 break; 5314 } 5315 break; 5316 case IPPROTO_TCP: 5317 switch (option_name) { 5318 case TCP_NODELAY: 5319 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5320 error = EINVAL; 5321 eprintsoline(so, error); 5322 mutex_enter(&so->so_lock); 5323 goto done2; 5324 } 5325 ASSERT(optval); 5326 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5327 handled = B_TRUE; 5328 break; 5329 } 5330 break; 5331 default: 5332 handled = B_FALSE; 5333 break; 5334 } 5335 } 5336 5337 if (handled) { 5338 mutex_enter(&so->so_lock); 5339 goto done2; 5340 } 5341 5342 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5343 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5344 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5345 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5346 5347 oh.level = level; 5348 oh.name = option_name; 5349 oh.len = optlen; 5350 5351 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5352 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5353 /* Let option management work in the presence of data flow control */ 5354 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5355 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5356 mp = NULL; 5357 mutex_enter(&so->so_lock); 5358 if (error) { 5359 eprintsoline(so, error); 5360 goto done; 5361 } 5362 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5363 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5364 if (error) { 5365 eprintsoline(so, error); 5366 goto done; 5367 } 5368 ASSERT(mp); 5369 /* No need to verify T_optmgmt_ack */ 5370 freemsg(mp); 5371 done: 5372 /* 5373 * Check for SOL_SOCKET options and record their values. 5374 * If we know about a SOL_SOCKET parameter and the transport 5375 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5376 * EPROTO) we let the setsockopt succeed. 5377 */ 5378 if (level == SOL_SOCKET) { 5379 /* Check parameters */ 5380 switch (option_name) { 5381 case SO_DEBUG: 5382 case SO_REUSEADDR: 5383 case SO_KEEPALIVE: 5384 case SO_DONTROUTE: 5385 case SO_BROADCAST: 5386 case SO_USELOOPBACK: 5387 case SO_OOBINLINE: 5388 case SO_SNDBUF: 5389 case SO_RCVBUF: 5390 #ifdef notyet 5391 case SO_SNDLOWAT: 5392 case SO_RCVLOWAT: 5393 case SO_SNDTIMEO: 5394 case SO_RCVTIMEO: 5395 #endif /* notyet */ 5396 case SO_DGRAM_ERRIND: 5397 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5398 error = EINVAL; 5399 eprintsoline(so, error); 5400 goto done2; 5401 } 5402 ASSERT(optval); 5403 handled = B_TRUE; 5404 break; 5405 case SO_LINGER: 5406 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5407 error = EINVAL; 5408 eprintsoline(so, error); 5409 goto done2; 5410 } 5411 ASSERT(optval); 5412 handled = B_TRUE; 5413 break; 5414 } 5415 5416 #define intvalue (*(int32_t *)optval) 5417 5418 switch (option_name) { 5419 case SO_TYPE: 5420 case SO_ERROR: 5421 case SO_ACCEPTCONN: 5422 /* Can't be set */ 5423 error = ENOPROTOOPT; 5424 goto done2; 5425 case SO_LINGER: { 5426 struct linger *l = (struct linger *)optval; 5427 5428 so->so_linger.l_linger = l->l_linger; 5429 if (l->l_onoff) { 5430 so->so_linger.l_onoff = SO_LINGER; 5431 so->so_options |= SO_LINGER; 5432 } else { 5433 so->so_linger.l_onoff = 0; 5434 so->so_options &= ~SO_LINGER; 5435 } 5436 break; 5437 } 5438 5439 case SO_DEBUG: 5440 #ifdef SOCK_TEST 5441 if (intvalue & 2) 5442 sock_test_timelimit = 10 * hz; 5443 else 5444 sock_test_timelimit = 0; 5445 5446 if (intvalue & 4) 5447 do_useracc = 0; 5448 else 5449 do_useracc = 1; 5450 #endif /* SOCK_TEST */ 5451 /* FALLTHRU */ 5452 case SO_REUSEADDR: 5453 case SO_KEEPALIVE: 5454 case SO_DONTROUTE: 5455 case SO_BROADCAST: 5456 case SO_USELOOPBACK: 5457 case SO_OOBINLINE: 5458 case SO_DGRAM_ERRIND: 5459 if (intvalue != 0) { 5460 dprintso(so, 1, 5461 ("sotpi_setsockopt: setting 0x%x\n", 5462 option_name)); 5463 so->so_options |= option_name; 5464 } else { 5465 dprintso(so, 1, 5466 ("sotpi_setsockopt: clearing 0x%x\n", 5467 option_name)); 5468 so->so_options &= ~option_name; 5469 } 5470 break; 5471 /* 5472 * The following options are only returned by us when the 5473 * T_SVR4_OPTMGMT_REQ fails. 5474 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5475 * since the transport might adjust the value and not 5476 * return exactly what was set by the application. 5477 */ 5478 case SO_SNDBUF: 5479 so->so_sndbuf = intvalue; 5480 break; 5481 case SO_RCVBUF: 5482 so->so_rcvbuf = intvalue; 5483 break; 5484 #ifdef notyet 5485 /* 5486 * We do not implement the semantics of these options 5487 * thus we shouldn't implement the options either. 5488 */ 5489 case SO_SNDLOWAT: 5490 so->so_sndlowat = intvalue; 5491 break; 5492 case SO_RCVLOWAT: 5493 so->so_rcvlowat = intvalue; 5494 break; 5495 case SO_SNDTIMEO: 5496 so->so_sndtimeo = intvalue; 5497 break; 5498 case SO_RCVTIMEO: 5499 so->so_rcvtimeo = intvalue; 5500 break; 5501 #endif /* notyet */ 5502 } 5503 #undef intvalue 5504 5505 if (error) { 5506 if ((error == ENOPROTOOPT || error == EPROTO || 5507 error == EINVAL) && handled) { 5508 dprintso(so, 1, 5509 ("setsockopt: ignoring error %d for 0x%x\n", 5510 error, option_name)); 5511 error = 0; 5512 } 5513 } 5514 } 5515 done2: 5516 ret: 5517 so_unlock_single(so, SOLOCKED); 5518 mutex_exit(&so->so_lock); 5519 return (error); 5520 } 5521