1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/sysmacros.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/time.h> 41 #include <sys/file.h> 42 #include <sys/open.h> 43 #include <sys/user.h> 44 #include <sys/termios.h> 45 #include <sys/stream.h> 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/ddi.h> 49 #include <sys/esunddi.h> 50 #include <sys/flock.h> 51 #include <sys/modctl.h> 52 #include <sys/vtrace.h> 53 #include <sys/cmn_err.h> 54 #include <sys/pathname.h> 55 56 #include <sys/socket.h> 57 #include <sys/socketvar.h> 58 #include <sys/sockio.h> 59 #include <sys/sodirect.h> 60 #include <netinet/in.h> 61 #include <sys/un.h> 62 #include <sys/strsun.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 68 69 #include <c2/audit.h> 70 71 #include <inet/common.h> 72 #include <inet/ip.h> 73 #include <inet/ip6.h> 74 #include <inet/tcp.h> 75 #include <inet/udp_impl.h> 76 77 #include <sys/zone.h> 78 79 #include <fs/sockfs/nl7c.h> 80 #include <fs/sockfs/nl7curi.h> 81 82 #include <inet/kssl/ksslapi.h> 83 84 /* 85 * Possible failures when memory can't be allocated. The documented behavior: 86 * 87 * 5.5: 4.X: XNET: 88 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 89 * EINTR 90 * (4.X does not document EINTR but returns it) 91 * bind: ENOSR - ENOBUFS/ENOSR 92 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 93 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 94 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 95 * (4.X getpeername and getsockname do not fail in practice) 96 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 97 * listen: - - ENOBUFS 98 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 99 * EINTR 100 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 101 * EINTR 102 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 103 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 104 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 105 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 106 * 107 * Resolution. When allocation fails: 108 * recv: return EINTR 109 * send: return EINTR 110 * connect, accept: EINTR 111 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 112 * socket, socketpair: ENOBUFS 113 * getpeername, getsockname: sleep 114 * getsockopt, setsockopt: sleep 115 */ 116 117 #ifdef SOCK_TEST 118 /* 119 * Variables that make sockfs do something other than the standard TPI 120 * for the AF_INET transports. 121 * 122 * solisten_tpi_tcp: 123 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 124 * the transport is already bound. This is needed to avoid loosing the 125 * port number should listen() do a T_UNBIND_REQ followed by a 126 * O_T_BIND_REQ. 127 * 128 * soconnect_tpi_udp: 129 * UDP and ICMP can handle a T_CONN_REQ. 130 * This is needed to make the sequence of connect(), getsockname() 131 * return the local IP address used to send packets to the connected to 132 * destination. 133 * 134 * soconnect_tpi_tcp: 135 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 136 * Set this to non-zero to send TPI conformant messages to TCP in this 137 * respect. This is a performance optimization. 138 * 139 * soaccept_tpi_tcp: 140 * TCP can handle a T_CONN_REQ without the acceptor being bound. 141 * This is a performance optimization that has been picked up in XTI. 142 * 143 * soaccept_tpi_multioptions: 144 * When inheriting SOL_SOCKET options from the listener to the accepting 145 * socket send them as a single message for AF_INET{,6}. 146 */ 147 int solisten_tpi_tcp = 0; 148 int soconnect_tpi_udp = 0; 149 int soconnect_tpi_tcp = 0; 150 int soaccept_tpi_tcp = 0; 151 int soaccept_tpi_multioptions = 1; 152 #else /* SOCK_TEST */ 153 #define soconnect_tpi_tcp 0 154 #define soconnect_tpi_udp 0 155 #define solisten_tpi_tcp 0 156 #define soaccept_tpi_tcp 0 157 #define soaccept_tpi_multioptions 1 158 #endif /* SOCK_TEST */ 159 160 #ifdef SOCK_TEST 161 extern int do_useracc; 162 extern clock_t sock_test_timelimit; 163 #endif /* SOCK_TEST */ 164 165 /* 166 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 167 * applications working. Turn on this flag to disable these checks. 168 */ 169 int xnet_skip_checks = 0; 170 int xnet_check_print = 0; 171 int xnet_truncate_print = 0; 172 173 extern void sigintr(k_sigset_t *, int); 174 extern void sigunintr(k_sigset_t *); 175 176 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 177 extern void *nl7c_add_addr(void *, t_uscalar_t); 178 extern void nl7c_listener_addr(void *, struct sonode *); 179 180 /* Sockets acting as an in-kernel SSL proxy */ 181 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 182 strsigset_t *, strsigset_t *, strpollset_t *); 183 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 184 strsigset_t *, strsigset_t *, strpollset_t *); 185 186 static int sotpi_unbind(struct sonode *, int); 187 188 extern int sodput(sodirect_t *, mblk_t *); 189 extern void sodwakeup(sodirect_t *); 190 191 /* TPI sockfs sonode operations */ 192 static int sotpi_accept(struct sonode *, int, struct sonode **); 193 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 194 int); 195 static int sotpi_connect(struct sonode *, const struct sockaddr *, 196 socklen_t, int, int); 197 static int sotpi_listen(struct sonode *, int); 198 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 199 struct uio *); 200 static int sotpi_shutdown(struct sonode *, int); 201 static int sotpi_getsockname(struct sonode *); 202 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 203 struct uio *, void *, t_uscalar_t, int); 204 static int sodgram_direct(struct sonode *, struct sockaddr *, 205 socklen_t, struct uio *, int); 206 207 sonodeops_t sotpi_sonodeops = { 208 sotpi_accept, /* sop_accept */ 209 sotpi_bind, /* sop_bind */ 210 sotpi_listen, /* sop_listen */ 211 sotpi_connect, /* sop_connect */ 212 sotpi_recvmsg, /* sop_recvmsg */ 213 sotpi_sendmsg, /* sop_sendmsg */ 214 sotpi_getpeername, /* sop_getpeername */ 215 sotpi_getsockname, /* sop_getsockname */ 216 sotpi_shutdown, /* sop_shutdown */ 217 sotpi_getsockopt, /* sop_getsockopt */ 218 sotpi_setsockopt /* sop_setsockopt */ 219 }; 220 221 /* 222 * Common create code for socket and accept. If tso is set the values 223 * from that node is used instead of issuing a T_INFO_REQ. 224 * 225 * Assumes that the caller has a VN_HOLD on accessvp. 226 * The VN_RELE will occur either when sotpi_create() fails or when 227 * the returned sonode is freed. 228 */ 229 struct sonode * 230 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 231 struct sonode *tso, int *errorp) 232 { 233 struct sonode *so; 234 vnode_t *vp; 235 int flags, error; 236 237 ASSERT(accessvp != NULL); 238 vp = makesockvp(accessvp, domain, type, protocol); 239 ASSERT(vp != NULL); 240 so = VTOSO(vp); 241 242 flags = FREAD|FWRITE; 243 244 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 245 (domain == AF_INET || domain == AF_INET6) && 246 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 247 protocol == IPPROTO_IP)) { 248 /* Tell tcp or udp that it's talking to sockets */ 249 flags |= SO_SOCKSTR; 250 251 /* 252 * Here we indicate to socktpi_open() our attempt to 253 * make direct calls between sockfs and transport. 254 * The final decision is left to socktpi_open(). 255 */ 256 so->so_state |= SS_DIRECT; 257 258 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 259 if (so->so_type == SOCK_STREAM && tso != NULL) { 260 if (tso->so_state & SS_DIRECT) { 261 /* 262 * Inherit SS_DIRECT from listener and pass 263 * SO_ACCEPTOR open flag to tcp, indicating 264 * that this is an accept fast-path instance. 265 */ 266 flags |= SO_ACCEPTOR; 267 } else { 268 /* 269 * SS_DIRECT is not set on listener, meaning 270 * that the listener has been converted from 271 * a socket to a stream. Ensure that the 272 * acceptor inherits these settings. 273 */ 274 so->so_state &= ~SS_DIRECT; 275 flags &= ~SO_SOCKSTR; 276 } 277 } 278 } 279 280 /* 281 * Tell local transport that it is talking to sockets. 282 */ 283 if (so->so_family == AF_UNIX) { 284 flags |= SO_SOCKSTR; 285 } 286 287 /* Initialize the kernel SSL proxy fields */ 288 so->so_kssl_type = KSSL_NO_PROXY; 289 so->so_kssl_ent = NULL; 290 so->so_kssl_ctx = NULL; 291 292 if (error = socktpi_open(&vp, flags, CRED(), NULL)) { 293 VN_RELE(vp); 294 *errorp = error; 295 return (NULL); 296 } 297 298 if (error = so_strinit(so, tso)) { 299 (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL); 300 VN_RELE(vp); 301 *errorp = error; 302 return (NULL); 303 } 304 305 if (version == SOV_DEFAULT) 306 version = so_default_version; 307 308 so->so_version = (short)version; 309 310 return (so); 311 } 312 313 /* 314 * Bind the socket to an unspecified address in sockfs only. 315 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 316 * required in all cases. 317 */ 318 static void 319 so_automatic_bind(struct sonode *so) 320 { 321 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 322 323 ASSERT(MUTEX_HELD(&so->so_lock)); 324 ASSERT(!(so->so_state & SS_ISBOUND)); 325 ASSERT(so->so_unbind_mp); 326 327 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 328 bzero(so->so_laddr_sa, so->so_laddr_len); 329 so->so_laddr_sa->sa_family = so->so_family; 330 so->so_state |= SS_ISBOUND; 331 } 332 333 334 /* 335 * bind the socket. 336 * 337 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 338 * are passed in we allow rebinding. Note that for backwards compatibility 339 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 340 * Thus the rebinding code is currently not executed. 341 * 342 * The constraints for rebinding are: 343 * - it is a SOCK_DGRAM, or 344 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 345 * and no listen() has been done. 346 * This rebinding code was added based on some language in the XNET book 347 * about not returning EINVAL it the protocol allows rebinding. However, 348 * this language is not present in the Posix socket draft. Thus maybe the 349 * rebinding logic should be deleted from the source. 350 * 351 * A null "name" can be used to unbind the socket if: 352 * - it is a SOCK_DGRAM, or 353 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 354 * and no listen() has been done. 355 */ 356 static int 357 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 358 socklen_t namelen, int backlog, int flags) 359 { 360 struct T_bind_req bind_req; 361 struct T_bind_ack *bind_ack; 362 int error = 0; 363 mblk_t *mp; 364 void *addr; 365 t_uscalar_t addrlen; 366 int unbind_on_err = 1; 367 boolean_t clear_acceptconn_on_err = B_FALSE; 368 boolean_t restore_backlog_on_err = B_FALSE; 369 int save_so_backlog; 370 t_scalar_t PRIM_type = O_T_BIND_REQ; 371 boolean_t tcp_udp_xport; 372 void *nl7c = NULL; 373 374 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 375 (void *)so, (void *)name, namelen, backlog, flags, 376 pr_state(so->so_state, so->so_mode))); 377 378 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 379 380 if (!(flags & _SOBIND_LOCK_HELD)) { 381 mutex_enter(&so->so_lock); 382 so_lock_single(so); /* Set SOLOCKED */ 383 } else { 384 ASSERT(MUTEX_HELD(&so->so_lock)); 385 ASSERT(so->so_flag & SOLOCKED); 386 } 387 388 /* 389 * Make sure that there is a preallocated unbind_req message 390 * before binding. This message allocated when the socket is 391 * created but it might be have been consumed. 392 */ 393 if (so->so_unbind_mp == NULL) { 394 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 395 /* NOTE: holding so_lock while sleeping */ 396 so->so_unbind_mp = 397 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 398 } 399 400 if (flags & _SOBIND_REBIND) { 401 /* 402 * Called from solisten after doing an sotpi_unbind() or 403 * potentially without the unbind (latter for AF_INET{,6}). 404 */ 405 ASSERT(name == NULL && namelen == 0); 406 407 if (so->so_family == AF_UNIX) { 408 ASSERT(so->so_ux_bound_vp); 409 addr = &so->so_ux_laddr; 410 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 411 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 412 "addr 0x%p, vp %p\n", 413 addrlen, 414 (void *)((struct so_ux_addr *)addr)->soua_vp, 415 (void *)so->so_ux_bound_vp)); 416 } else { 417 addr = so->so_laddr_sa; 418 addrlen = (t_uscalar_t)so->so_laddr_len; 419 } 420 } else if (flags & _SOBIND_UNSPEC) { 421 ASSERT(name == NULL && namelen == 0); 422 423 /* 424 * The caller checked SS_ISBOUND but not necessarily 425 * under so_lock 426 */ 427 if (so->so_state & SS_ISBOUND) { 428 /* No error */ 429 goto done; 430 } 431 432 /* Set an initial local address */ 433 switch (so->so_family) { 434 case AF_UNIX: 435 /* 436 * Use an address with same size as struct sockaddr 437 * just like BSD. 438 */ 439 so->so_laddr_len = 440 (socklen_t)sizeof (struct sockaddr); 441 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 442 bzero(so->so_laddr_sa, so->so_laddr_len); 443 so->so_laddr_sa->sa_family = so->so_family; 444 445 /* 446 * Pass down an address with the implicit bind 447 * magic number and the rest all zeros. 448 * The transport will return a unique address. 449 */ 450 so->so_ux_laddr.soua_vp = NULL; 451 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 452 addr = &so->so_ux_laddr; 453 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 454 break; 455 456 case AF_INET: 457 case AF_INET6: 458 /* 459 * An unspecified bind in TPI has a NULL address. 460 * Set the address in sockfs to have the sa_family. 461 */ 462 so->so_laddr_len = (so->so_family == AF_INET) ? 463 (socklen_t)sizeof (sin_t) : 464 (socklen_t)sizeof (sin6_t); 465 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 466 bzero(so->so_laddr_sa, so->so_laddr_len); 467 so->so_laddr_sa->sa_family = so->so_family; 468 addr = NULL; 469 addrlen = 0; 470 break; 471 472 default: 473 /* 474 * An unspecified bind in TPI has a NULL address. 475 * Set the address in sockfs to be zero length. 476 * 477 * Can not assume there is a sa_family for all 478 * protocol families. For example, AF_X25 does not 479 * have a family field. 480 */ 481 bzero(so->so_laddr_sa, so->so_laddr_len); 482 so->so_laddr_len = 0; /* XXX correct? */ 483 addr = NULL; 484 addrlen = 0; 485 break; 486 } 487 488 } else { 489 if (so->so_state & SS_ISBOUND) { 490 /* 491 * If it is ok to rebind the socket, first unbind 492 * with the transport. A rebind to the NULL address 493 * is interpreted as an unbind. 494 * Note that a bind to NULL in BSD does unbind the 495 * socket but it fails with EINVAL. 496 * Note that regular sockets set SOV_SOCKBSD i.e. 497 * _SOBIND_SOCKBSD gets set here hence no type of 498 * socket does currently allow rebinding. 499 * 500 * If the name is NULL just do an unbind. 501 */ 502 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 503 name != NULL) { 504 error = EINVAL; 505 unbind_on_err = 0; 506 eprintsoline(so, error); 507 goto done; 508 } 509 if ((so->so_mode & SM_CONNREQUIRED) && 510 (so->so_state & SS_CANTREBIND)) { 511 error = EINVAL; 512 unbind_on_err = 0; 513 eprintsoline(so, error); 514 goto done; 515 } 516 error = sotpi_unbind(so, 0); 517 if (error) { 518 eprintsoline(so, error); 519 goto done; 520 } 521 ASSERT(!(so->so_state & SS_ISBOUND)); 522 if (name == NULL) { 523 so->so_state &= 524 ~(SS_ISCONNECTED|SS_ISCONNECTING); 525 goto done; 526 } 527 } 528 /* X/Open requires this check */ 529 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 530 if (xnet_check_print) { 531 printf("sockfs: X/Open bind state check " 532 "caused EINVAL\n"); 533 } 534 error = EINVAL; 535 goto done; 536 } 537 538 switch (so->so_family) { 539 case AF_UNIX: 540 /* 541 * All AF_UNIX addresses are nul terminated 542 * when copied (copyin_name) in so the minimum 543 * length is 3 bytes. 544 */ 545 if (name == NULL || 546 (ssize_t)namelen <= sizeof (short) + 1) { 547 error = EISDIR; 548 eprintsoline(so, error); 549 goto done; 550 } 551 /* 552 * Verify so_family matches the bound family. 553 * BSD does not check this for AF_UNIX resulting 554 * in funny mknods. 555 */ 556 if (name->sa_family != so->so_family) { 557 error = EAFNOSUPPORT; 558 goto done; 559 } 560 break; 561 case AF_INET: 562 if (name == NULL) { 563 error = EINVAL; 564 eprintsoline(so, error); 565 goto done; 566 } 567 if ((size_t)namelen != sizeof (sin_t)) { 568 error = name->sa_family != so->so_family ? 569 EAFNOSUPPORT : EINVAL; 570 eprintsoline(so, error); 571 goto done; 572 } 573 if ((flags & _SOBIND_XPG4_2) && 574 (name->sa_family != so->so_family)) { 575 /* 576 * This check has to be made for X/Open 577 * sockets however application failures have 578 * been observed when it is applied to 579 * all sockets. 580 */ 581 error = EAFNOSUPPORT; 582 eprintsoline(so, error); 583 goto done; 584 } 585 /* 586 * Force a zero sa_family to match so_family. 587 * 588 * Some programs like inetd(1M) don't set the 589 * family field. Other programs leave 590 * sin_family set to garbage - SunOS 4.X does 591 * not check the family field on a bind. 592 * We use the family field that 593 * was passed in to the socket() call. 594 */ 595 name->sa_family = so->so_family; 596 break; 597 598 case AF_INET6: { 599 #ifdef DEBUG 600 sin6_t *sin6 = (sin6_t *)name; 601 #endif /* DEBUG */ 602 603 if (name == NULL) { 604 error = EINVAL; 605 eprintsoline(so, error); 606 goto done; 607 } 608 if ((size_t)namelen != sizeof (sin6_t)) { 609 error = name->sa_family != so->so_family ? 610 EAFNOSUPPORT : EINVAL; 611 eprintsoline(so, error); 612 goto done; 613 } 614 if (name->sa_family != so->so_family) { 615 /* 616 * With IPv6 we require the family to match 617 * unlike in IPv4. 618 */ 619 error = EAFNOSUPPORT; 620 eprintsoline(so, error); 621 goto done; 622 } 623 #ifdef DEBUG 624 /* 625 * Verify that apps don't forget to clear 626 * sin6_scope_id etc 627 */ 628 if (sin6->sin6_scope_id != 0 && 629 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 630 zcmn_err(getzoneid(), CE_WARN, 631 "bind with uninitialized sin6_scope_id " 632 "(%d) on socket. Pid = %d\n", 633 (int)sin6->sin6_scope_id, 634 (int)curproc->p_pid); 635 } 636 if (sin6->__sin6_src_id != 0) { 637 zcmn_err(getzoneid(), CE_WARN, 638 "bind with uninitialized __sin6_src_id " 639 "(%d) on socket. Pid = %d\n", 640 (int)sin6->__sin6_src_id, 641 (int)curproc->p_pid); 642 } 643 #endif /* DEBUG */ 644 break; 645 } 646 default: 647 /* 648 * Don't do any length or sa_family check to allow 649 * non-sockaddr style addresses. 650 */ 651 if (name == NULL) { 652 error = EINVAL; 653 eprintsoline(so, error); 654 goto done; 655 } 656 break; 657 } 658 659 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 660 error = ENAMETOOLONG; 661 eprintsoline(so, error); 662 goto done; 663 } 664 /* 665 * Save local address. 666 */ 667 so->so_laddr_len = (socklen_t)namelen; 668 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 669 bcopy(name, so->so_laddr_sa, namelen); 670 671 addr = so->so_laddr_sa; 672 addrlen = (t_uscalar_t)so->so_laddr_len; 673 switch (so->so_family) { 674 case AF_INET6: 675 case AF_INET: 676 break; 677 case AF_UNIX: { 678 struct sockaddr_un *soun = 679 (struct sockaddr_un *)so->so_laddr_sa; 680 struct vnode *vp; 681 struct vattr vattr; 682 683 ASSERT(so->so_ux_bound_vp == NULL); 684 /* 685 * Create vnode for the specified path name. 686 * Keep vnode held with a reference in so_ux_bound_vp. 687 * Use the vnode pointer as the address used in the 688 * bind with the transport. 689 * 690 * Use the same mode as in BSD. In particular this does 691 * not observe the umask. 692 */ 693 /* MAXPATHLEN + soun_family + nul termination */ 694 if (so->so_laddr_len > 695 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 696 error = ENAMETOOLONG; 697 eprintsoline(so, error); 698 goto done; 699 } 700 vattr.va_type = VSOCK; 701 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 702 vattr.va_mask = AT_TYPE|AT_MODE; 703 /* NOTE: holding so_lock */ 704 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 705 EXCL, 0, &vp, CRMKNOD, 0, 0); 706 if (error) { 707 if (error == EEXIST) 708 error = EADDRINUSE; 709 eprintsoline(so, error); 710 goto done; 711 } 712 /* 713 * Establish pointer from the underlying filesystem 714 * vnode to the socket node. 715 * so_ux_bound_vp and v_stream->sd_vnode form the 716 * cross-linkage between the underlying filesystem 717 * node and the socket node. 718 */ 719 ASSERT(SOTOV(so)->v_stream); 720 mutex_enter(&vp->v_lock); 721 vp->v_stream = SOTOV(so)->v_stream; 722 so->so_ux_bound_vp = vp; 723 mutex_exit(&vp->v_lock); 724 725 /* 726 * Use the vnode pointer value as a unique address 727 * (together with the magic number to avoid conflicts 728 * with implicit binds) in the transport provider. 729 */ 730 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 731 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 732 addr = &so->so_ux_laddr; 733 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 734 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 735 addrlen, 736 ((struct so_ux_addr *)addr)->soua_vp)); 737 break; 738 } 739 } /* end switch (so->so_family) */ 740 } 741 742 /* 743 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 744 * the transport can start passing up T_CONN_IND messages 745 * as soon as it receives the bind req and strsock_proto() 746 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 747 */ 748 if (flags & _SOBIND_LISTEN) { 749 if ((so->so_state & SS_ACCEPTCONN) == 0) 750 clear_acceptconn_on_err = B_TRUE; 751 save_so_backlog = so->so_backlog; 752 restore_backlog_on_err = B_TRUE; 753 so->so_state |= SS_ACCEPTCONN; 754 so->so_backlog = backlog; 755 } 756 757 /* 758 * If NL7C addr(s) have been configured check for addr/port match, 759 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 760 * 761 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 762 * family sockets only. If match mark as such. 763 */ 764 if (nl7c_enabled && ((addr != NULL && 765 (so->so_family == AF_INET || so->so_family == AF_INET6) && 766 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 767 so->so_nl7c_flags == NL7C_AF_NCA)) { 768 /* 769 * NL7C is not supported in non-global zones, 770 * we enforce this restriction here. 771 */ 772 if (so->so_zoneid == GLOBAL_ZONEID) { 773 /* An NL7C socket, mark it */ 774 so->so_nl7c_flags |= NL7C_ENABLED; 775 if (nl7c == NULL) { 776 /* 777 * Was an AF_NCA bind() so add it to the 778 * addr list for reporting purposes. 779 */ 780 nl7c = nl7c_add_addr(addr, addrlen); 781 } 782 } else 783 nl7c = NULL; 784 } 785 /* 786 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 787 * for other transports we will send in a O_T_BIND_REQ. 788 */ 789 if (tcp_udp_xport && 790 (so->so_family == AF_INET || so->so_family == AF_INET6)) 791 PRIM_type = T_BIND_REQ; 792 793 bind_req.PRIM_type = PRIM_type; 794 bind_req.ADDR_length = addrlen; 795 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 796 bind_req.CONIND_number = backlog; 797 /* NOTE: holding so_lock while sleeping */ 798 mp = soallocproto2(&bind_req, sizeof (bind_req), 799 addr, addrlen, 0, _ALLOC_SLEEP); 800 so->so_state &= ~SS_LADDR_VALID; 801 802 /* Done using so_laddr_sa - can drop the lock */ 803 mutex_exit(&so->so_lock); 804 805 /* 806 * Intercept the bind_req message here to check if this <address/port> 807 * was configured as an SSL proxy server, or if another endpoint was 808 * already configured to act as a proxy for us. 809 * 810 * Note, only if NL7C not enabled for this socket. 811 */ 812 if (nl7c == NULL && 813 (so->so_family == AF_INET || so->so_family == AF_INET6) && 814 so->so_type == SOCK_STREAM) { 815 816 if (so->so_kssl_ent != NULL) { 817 kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type); 818 so->so_kssl_ent = NULL; 819 } 820 821 so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent); 822 switch (so->so_kssl_type) { 823 case KSSL_NO_PROXY: 824 break; 825 826 case KSSL_HAS_PROXY: 827 mutex_enter(&so->so_lock); 828 goto skip_transport; 829 830 case KSSL_IS_PROXY: 831 break; 832 } 833 } 834 835 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 836 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 837 if (error) { 838 eprintsoline(so, error); 839 mutex_enter(&so->so_lock); 840 goto done; 841 } 842 843 mutex_enter(&so->so_lock); 844 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 845 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 846 if (error) { 847 eprintsoline(so, error); 848 goto done; 849 } 850 skip_transport: 851 ASSERT(mp); 852 /* 853 * Even if some TPI message (e.g. T_DISCON_IND) was received in 854 * strsock_proto while the lock was dropped above, the bind 855 * is allowed to complete. 856 */ 857 858 /* Mark as bound. This will be undone if we detect errors below. */ 859 if (flags & _SOBIND_NOXLATE) { 860 ASSERT(so->so_family == AF_UNIX); 861 so->so_state |= SS_FADDR_NOXLATE; 862 } 863 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 864 so->so_state |= SS_ISBOUND; 865 ASSERT(so->so_unbind_mp); 866 867 /* note that we've already set SS_ACCEPTCONN above */ 868 869 /* 870 * Recompute addrlen - an unspecied bind sent down an 871 * address of length zero but we expect the appropriate length 872 * in return. 873 */ 874 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 875 sizeof (so->so_ux_laddr) : so->so_laddr_len); 876 877 bind_ack = (struct T_bind_ack *)mp->b_rptr; 878 /* 879 * The alignment restriction is really too strict but 880 * we want enough alignment to inspect the fields of 881 * a sockaddr_in. 882 */ 883 addr = sogetoff(mp, bind_ack->ADDR_offset, 884 bind_ack->ADDR_length, 885 __TPI_ALIGN_SIZE); 886 if (addr == NULL) { 887 freemsg(mp); 888 error = EPROTO; 889 eprintsoline(so, error); 890 goto done; 891 } 892 if (!(flags & _SOBIND_UNSPEC)) { 893 /* 894 * Verify that the transport didn't return something we 895 * did not want e.g. an address other than what we asked for. 896 * 897 * NOTE: These checks would go away if/when we switch to 898 * using the new TPI (in which the transport would fail 899 * the request instead of assigning a different address). 900 * 901 * NOTE2: For protocols that we don't know (i.e. any 902 * other than AF_INET6, AF_INET and AF_UNIX), we 903 * cannot know if the transport should be expected to 904 * return the same address as that requested. 905 * 906 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 907 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 908 * 909 * For example, in the case of netatalk it may be 910 * inappropriate for the transport to return the 911 * requested address (as it may have allocated a local 912 * port number in behaviour similar to that of an 913 * AF_INET bind request with a port number of zero). 914 * 915 * Given the definition of O_T_BIND_REQ, where the 916 * transport may bind to an address other than the 917 * requested address, it's not possible to determine 918 * whether a returned address that differs from the 919 * requested address is a reason to fail (because the 920 * requested address was not available) or succeed 921 * (because the transport allocated an appropriate 922 * address and/or port). 923 * 924 * sockfs currently requires that the transport return 925 * the requested address in the T_BIND_ACK, unless 926 * there is code here to allow for any discrepancy. 927 * Such code exists for AF_INET and AF_INET6. 928 * 929 * Netatalk chooses to return the requested address 930 * rather than the (correct) allocated address. This 931 * means that netatalk violates the TPI specification 932 * (and would not function correctly if used from a 933 * TLI application), but it does mean that it works 934 * with sockfs. 935 * 936 * As noted above, using the newer XTI bind primitive 937 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 938 * allow sockfs to be more sure about whether or not 939 * the bind request had succeeded (as transports are 940 * not permitted to bind to a different address than 941 * that requested - they must return failure). 942 * Unfortunately, support for T_BIND_REQ may not be 943 * present in all transport implementations (netatalk, 944 * for example, doesn't have it), making the 945 * transition difficult. 946 */ 947 if (bind_ack->ADDR_length != addrlen) { 948 /* Assumes that the requested address was in use */ 949 freemsg(mp); 950 error = EADDRINUSE; 951 eprintsoline(so, error); 952 goto done; 953 } 954 955 switch (so->so_family) { 956 case AF_INET6: 957 case AF_INET: { 958 sin_t *rname, *aname; 959 960 rname = (sin_t *)addr; 961 aname = (sin_t *)so->so_laddr_sa; 962 963 /* 964 * Take advantage of the alignment 965 * of sin_port and sin6_port which fall 966 * in the same place in their data structures. 967 * Just use sin_port for either address family. 968 * 969 * This may become a problem if (heaven forbid) 970 * there's a separate ipv6port_reserved... :-P 971 * 972 * Binding to port 0 has the semantics of letting 973 * the transport bind to any port. 974 * 975 * If the transport is TCP or UDP since we had sent 976 * a T_BIND_REQ we would not get a port other than 977 * what we asked for. 978 */ 979 if (tcp_udp_xport) { 980 /* 981 * Pick up the new port number if we bound to 982 * port 0. 983 */ 984 if (aname->sin_port == 0) 985 aname->sin_port = rname->sin_port; 986 so->so_state |= SS_LADDR_VALID; 987 break; 988 } 989 if (aname->sin_port != 0 && 990 aname->sin_port != rname->sin_port) { 991 freemsg(mp); 992 error = EADDRINUSE; 993 eprintsoline(so, error); 994 goto done; 995 } 996 /* 997 * Pick up the new port number if we bound to port 0. 998 */ 999 aname->sin_port = rname->sin_port; 1000 1001 /* 1002 * Unfortunately, addresses aren't _quite_ the same. 1003 */ 1004 if (so->so_family == AF_INET) { 1005 if (aname->sin_addr.s_addr != 1006 rname->sin_addr.s_addr) { 1007 freemsg(mp); 1008 error = EADDRNOTAVAIL; 1009 eprintsoline(so, error); 1010 goto done; 1011 } 1012 } else { 1013 sin6_t *rname6 = (sin6_t *)rname; 1014 sin6_t *aname6 = (sin6_t *)aname; 1015 1016 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1017 &rname6->sin6_addr)) { 1018 freemsg(mp); 1019 error = EADDRNOTAVAIL; 1020 eprintsoline(so, error); 1021 goto done; 1022 } 1023 } 1024 break; 1025 } 1026 case AF_UNIX: 1027 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 1028 freemsg(mp); 1029 error = EADDRINUSE; 1030 eprintsoline(so, error); 1031 eprintso(so, 1032 ("addrlen %d, addr 0x%x, vp %p\n", 1033 addrlen, *((int *)addr), 1034 (void *)so->so_ux_bound_vp)); 1035 goto done; 1036 } 1037 so->so_state |= SS_LADDR_VALID; 1038 break; 1039 default: 1040 /* 1041 * NOTE: This assumes that addresses can be 1042 * byte-compared for equivalence. 1043 */ 1044 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 1045 freemsg(mp); 1046 error = EADDRINUSE; 1047 eprintsoline(so, error); 1048 goto done; 1049 } 1050 /* 1051 * Don't mark SS_LADDR_VALID, as we cannot be 1052 * sure that the returned address is the real 1053 * bound address when talking to an unknown 1054 * transport. 1055 */ 1056 break; 1057 } 1058 } else { 1059 /* 1060 * Save for returned address for getsockname. 1061 * Needed for unspecific bind unless transport supports 1062 * the TI_GETMYNAME ioctl. 1063 * Do this for AF_INET{,6} even though they do, as 1064 * caching info here is much better performance than 1065 * a TPI/STREAMS trip to the transport for getsockname. 1066 * Any which can't for some reason _must_ _not_ set 1067 * LADDR_VALID here for the caching version of getsockname 1068 * to not break; 1069 */ 1070 switch (so->so_family) { 1071 case AF_UNIX: 1072 /* 1073 * Record the address bound with the transport 1074 * for use by socketpair. 1075 */ 1076 bcopy(addr, &so->so_ux_laddr, addrlen); 1077 so->so_state |= SS_LADDR_VALID; 1078 break; 1079 case AF_INET: 1080 case AF_INET6: 1081 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1082 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1083 so->so_state |= SS_LADDR_VALID; 1084 break; 1085 default: 1086 /* 1087 * Don't mark SS_LADDR_VALID, as we cannot be 1088 * sure that the returned address is the real 1089 * bound address when talking to an unknown 1090 * transport. 1091 */ 1092 break; 1093 } 1094 } 1095 1096 if (nl7c != NULL) { 1097 /* Register listen()er sonode pointer with NL7C */ 1098 nl7c_listener_addr(nl7c, so); 1099 } 1100 1101 freemsg(mp); 1102 1103 done: 1104 if (error) { 1105 /* reset state & backlog to values held on entry */ 1106 if (clear_acceptconn_on_err == B_TRUE) 1107 so->so_state &= ~SS_ACCEPTCONN; 1108 if (restore_backlog_on_err == B_TRUE) 1109 so->so_backlog = save_so_backlog; 1110 1111 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1112 int err; 1113 1114 err = sotpi_unbind(so, 0); 1115 /* LINTED - statement has no consequent: if */ 1116 if (err) { 1117 eprintsoline(so, error); 1118 } else { 1119 ASSERT(!(so->so_state & SS_ISBOUND)); 1120 } 1121 } 1122 } 1123 if (!(flags & _SOBIND_LOCK_HELD)) { 1124 so_unlock_single(so, SOLOCKED); 1125 mutex_exit(&so->so_lock); 1126 } else { 1127 /* If the caller held the lock don't release it here */ 1128 ASSERT(MUTEX_HELD(&so->so_lock)); 1129 ASSERT(so->so_flag & SOLOCKED); 1130 } 1131 return (error); 1132 } 1133 1134 /* bind the socket */ 1135 static int 1136 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1137 int flags) 1138 { 1139 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1140 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1141 1142 flags &= ~_SOBIND_SOCKETPAIR; 1143 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1144 } 1145 1146 /* 1147 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1148 * address, or when listen needs to unbind and bind. 1149 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1150 * so that a sobind can pick them up. 1151 */ 1152 static int 1153 sotpi_unbind(struct sonode *so, int flags) 1154 { 1155 struct T_unbind_req unbind_req; 1156 int error = 0; 1157 mblk_t *mp; 1158 1159 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1160 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1161 1162 ASSERT(MUTEX_HELD(&so->so_lock)); 1163 ASSERT(so->so_flag & SOLOCKED); 1164 1165 if (!(so->so_state & SS_ISBOUND)) { 1166 error = EINVAL; 1167 eprintsoline(so, error); 1168 goto done; 1169 } 1170 1171 mutex_exit(&so->so_lock); 1172 1173 /* 1174 * Flush the read and write side (except stream head read queue) 1175 * and send down T_UNBIND_REQ. 1176 */ 1177 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1178 1179 unbind_req.PRIM_type = T_UNBIND_REQ; 1180 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1181 0, _ALLOC_SLEEP); 1182 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1183 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1184 mutex_enter(&so->so_lock); 1185 if (error) { 1186 eprintsoline(so, error); 1187 goto done; 1188 } 1189 1190 error = sowaitokack(so, T_UNBIND_REQ); 1191 if (error) { 1192 eprintsoline(so, error); 1193 goto done; 1194 } 1195 1196 /* 1197 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1198 * strsock_proto while the lock was dropped above, the unbind 1199 * is allowed to complete. 1200 */ 1201 if (!(flags & _SOUNBIND_REBIND)) { 1202 /* 1203 * Clear out bound address. 1204 */ 1205 vnode_t *vp; 1206 1207 if ((vp = so->so_ux_bound_vp) != NULL) { 1208 1209 /* Undo any SSL proxy setup */ 1210 if ((so->so_family == AF_INET || 1211 so->so_family == AF_INET6) && 1212 (so->so_type == SOCK_STREAM) && 1213 (so->so_kssl_ent != NULL)) { 1214 kssl_release_ent(so->so_kssl_ent, so, 1215 so->so_kssl_type); 1216 so->so_kssl_ent = NULL; 1217 so->so_kssl_type = KSSL_NO_PROXY; 1218 } 1219 1220 so->so_ux_bound_vp = NULL; 1221 vn_rele_stream(vp); 1222 } 1223 /* Clear out address */ 1224 so->so_laddr_len = 0; 1225 } 1226 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1227 1228 done: 1229 1230 /* If the caller held the lock don't release it here */ 1231 ASSERT(MUTEX_HELD(&so->so_lock)); 1232 ASSERT(so->so_flag & SOLOCKED); 1233 1234 return (error); 1235 } 1236 1237 /* 1238 * listen on the socket. 1239 * For TPI conforming transports this has to first unbind with the transport 1240 * and then bind again using the new backlog. 1241 */ 1242 int 1243 sotpi_listen(struct sonode *so, int backlog) 1244 { 1245 int error = 0; 1246 1247 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1248 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1249 1250 if (so->so_serv_type == T_CLTS) 1251 return (EOPNOTSUPP); 1252 1253 /* 1254 * If the socket is ready to accept connections already, then 1255 * return without doing anything. This avoids a problem where 1256 * a second listen() call fails if a connection is pending and 1257 * leaves the socket unbound. Only when we are not unbinding 1258 * with the transport can we safely increase the backlog. 1259 */ 1260 if (so->so_state & SS_ACCEPTCONN && 1261 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1262 /*CONSTCOND*/ 1263 !solisten_tpi_tcp)) 1264 return (0); 1265 1266 if (so->so_state & SS_ISCONNECTED) 1267 return (EINVAL); 1268 1269 mutex_enter(&so->so_lock); 1270 so_lock_single(so); /* Set SOLOCKED */ 1271 1272 if (backlog < 0) 1273 backlog = 0; 1274 /* 1275 * Use the same qlimit as in BSD. BSD checks the qlimit 1276 * before queuing the next connection implying that a 1277 * listen(sock, 0) allows one connection to be queued. 1278 * BSD also uses 1.5 times the requested backlog. 1279 * 1280 * XNS Issue 4 required a strict interpretation of the backlog. 1281 * This has been waived subsequently for Issue 4 and the change 1282 * incorporated in XNS Issue 5. So we aren't required to do 1283 * anything special for XPG apps. 1284 */ 1285 if (backlog >= (INT_MAX - 1) / 3) 1286 backlog = INT_MAX; 1287 else 1288 backlog = backlog * 3 / 2 + 1; 1289 1290 /* 1291 * If the listen doesn't change the backlog we do nothing. 1292 * This avoids an EPROTO error from the transport. 1293 */ 1294 if ((so->so_state & SS_ACCEPTCONN) && 1295 so->so_backlog == backlog) 1296 goto done; 1297 1298 if (!(so->so_state & SS_ISBOUND)) { 1299 /* 1300 * Must have been explicitly bound in the UNIX domain. 1301 */ 1302 if (so->so_family == AF_UNIX) { 1303 error = EINVAL; 1304 goto done; 1305 } 1306 error = sotpi_bindlisten(so, NULL, 0, backlog, 1307 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1308 } else if (backlog > 0) { 1309 /* 1310 * AF_INET{,6} hack to avoid losing the port. 1311 * Assumes that all AF_INET{,6} transports can handle a 1312 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1313 * has already bound thus it is possible to avoid the unbind. 1314 */ 1315 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1316 /*CONSTCOND*/ 1317 !solisten_tpi_tcp)) { 1318 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1319 if (error) 1320 goto done; 1321 } 1322 error = sotpi_bindlisten(so, NULL, 0, backlog, 1323 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1324 } else { 1325 so->so_state |= SS_ACCEPTCONN; 1326 so->so_backlog = backlog; 1327 } 1328 if (error) 1329 goto done; 1330 ASSERT(so->so_state & SS_ACCEPTCONN); 1331 done: 1332 so_unlock_single(so, SOLOCKED); 1333 mutex_exit(&so->so_lock); 1334 return (error); 1335 } 1336 1337 /* 1338 * Disconnect either a specified seqno or all (-1). 1339 * The former is used on listening sockets only. 1340 * 1341 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1342 * the current use of sodisconnect(seqno == -1) is only for shutdown 1343 * so there is no point (and potentially incorrect) to unbind. 1344 */ 1345 int 1346 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1347 { 1348 struct T_discon_req discon_req; 1349 int error = 0; 1350 mblk_t *mp; 1351 1352 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1353 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1354 1355 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1356 mutex_enter(&so->so_lock); 1357 so_lock_single(so); /* Set SOLOCKED */ 1358 } else { 1359 ASSERT(MUTEX_HELD(&so->so_lock)); 1360 ASSERT(so->so_flag & SOLOCKED); 1361 } 1362 1363 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1364 error = EINVAL; 1365 eprintsoline(so, error); 1366 goto done; 1367 } 1368 1369 mutex_exit(&so->so_lock); 1370 /* 1371 * Flush the write side (unless this is a listener) 1372 * and then send down a T_DISCON_REQ. 1373 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1374 * and other messages.) 1375 */ 1376 if (!(so->so_state & SS_ACCEPTCONN)) 1377 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1378 1379 discon_req.PRIM_type = T_DISCON_REQ; 1380 discon_req.SEQ_number = seqno; 1381 mp = soallocproto1(&discon_req, sizeof (discon_req), 1382 0, _ALLOC_SLEEP); 1383 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1384 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1385 mutex_enter(&so->so_lock); 1386 if (error) { 1387 eprintsoline(so, error); 1388 goto done; 1389 } 1390 1391 error = sowaitokack(so, T_DISCON_REQ); 1392 if (error) { 1393 eprintsoline(so, error); 1394 goto done; 1395 } 1396 /* 1397 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1398 * strsock_proto while the lock was dropped above, the disconnect 1399 * is allowed to complete. However, it is not possible to 1400 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1401 */ 1402 so->so_state &= 1403 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1404 done: 1405 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1406 so_unlock_single(so, SOLOCKED); 1407 mutex_exit(&so->so_lock); 1408 } else { 1409 /* If the caller held the lock don't release it here */ 1410 ASSERT(MUTEX_HELD(&so->so_lock)); 1411 ASSERT(so->so_flag & SOLOCKED); 1412 } 1413 return (error); 1414 } 1415 1416 int 1417 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1418 { 1419 struct T_conn_ind *conn_ind; 1420 struct T_conn_res *conn_res; 1421 int error = 0; 1422 mblk_t *mp, *ctxmp, *ack_mp; 1423 struct sonode *nso; 1424 vnode_t *nvp; 1425 void *src; 1426 t_uscalar_t srclen; 1427 void *opt; 1428 t_uscalar_t optlen; 1429 t_scalar_t PRIM_type; 1430 t_scalar_t SEQ_number; 1431 size_t sinlen; 1432 1433 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1434 (void *)so, fflag, (void *)nsop, 1435 pr_state(so->so_state, so->so_mode))); 1436 1437 /* 1438 * Defer single-threading the accepting socket until 1439 * the T_CONN_IND has been received and parsed and the 1440 * new sonode has been opened. 1441 */ 1442 1443 /* Check that we are not already connected */ 1444 if ((so->so_state & SS_ACCEPTCONN) == 0) 1445 goto conn_bad; 1446 again: 1447 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1448 goto e_bad; 1449 1450 ASSERT(mp); 1451 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1452 ctxmp = mp->b_cont; 1453 1454 /* 1455 * Save SEQ_number for error paths. 1456 */ 1457 SEQ_number = conn_ind->SEQ_number; 1458 1459 srclen = conn_ind->SRC_length; 1460 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1461 if (src == NULL) { 1462 error = EPROTO; 1463 freemsg(mp); 1464 eprintsoline(so, error); 1465 goto disconnect_unlocked; 1466 } 1467 optlen = conn_ind->OPT_length; 1468 switch (so->so_family) { 1469 case AF_INET: 1470 case AF_INET6: 1471 if ((optlen == sizeof (intptr_t)) && 1472 ((so->so_state & SS_DIRECT) != 0)) { 1473 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1474 &opt, conn_ind->OPT_length); 1475 } else { 1476 /* 1477 * The transport (in this case TCP) hasn't sent up 1478 * a pointer to an instance for the accept fast-path. 1479 * Disable fast-path completely because the call to 1480 * sotpi_create() below would otherwise create an 1481 * incomplete TCP instance, which would lead to 1482 * problems when sockfs sends a normal T_CONN_RES 1483 * message down the new stream. 1484 */ 1485 if (so->so_state & SS_DIRECT) { 1486 int rval; 1487 /* 1488 * For consistency we inform tcp to disable 1489 * direct interface on the listener, though 1490 * we can certainly live without doing this 1491 * because no data will ever travel upstream 1492 * on the listening socket. 1493 */ 1494 so->so_state &= ~SS_DIRECT; 1495 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1496 0, 0, K_TO_K, CRED(), &rval); 1497 } 1498 opt = NULL; 1499 optlen = 0; 1500 } 1501 break; 1502 case AF_UNIX: 1503 default: 1504 if (optlen != 0) { 1505 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1506 __TPI_ALIGN_SIZE); 1507 if (opt == NULL) { 1508 error = EPROTO; 1509 freemsg(mp); 1510 eprintsoline(so, error); 1511 goto disconnect_unlocked; 1512 } 1513 } 1514 if (so->so_family == AF_UNIX) { 1515 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1516 src = NULL; 1517 srclen = 0; 1518 } 1519 /* Extract src address from options */ 1520 if (optlen != 0) 1521 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1522 } 1523 break; 1524 } 1525 1526 /* 1527 * Create the new socket. 1528 */ 1529 VN_HOLD(so->so_accessvp); 1530 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1531 so->so_protocol, so->so_version, so, &error); 1532 if (nso == NULL) { 1533 ASSERT(error != 0); 1534 /* 1535 * Accept can not fail with ENOBUFS. sotpi_create 1536 * sleeps waiting for memory until a signal is caught 1537 * so return EINTR. 1538 */ 1539 freemsg(mp); 1540 if (error == ENOBUFS) 1541 error = EINTR; 1542 goto e_disc_unl; 1543 } 1544 nvp = SOTOV(nso); 1545 1546 /* 1547 * If the transport sent up an SSL connection context, then attach 1548 * it the new socket, and set the (sd_wputdatafunc)() and 1549 * (sd_rputdatafunc)() stream head hooks to intercept and process 1550 * SSL records. 1551 */ 1552 if (ctxmp != NULL) { 1553 /* 1554 * This kssl_ctx_t is already held for us by the transport. 1555 * So, we don't need to do a kssl_hold_ctx() here. 1556 */ 1557 nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1558 freemsg(ctxmp); 1559 mp->b_cont = NULL; 1560 strsetrwputdatahooks(nvp, strsock_kssl_input, 1561 strsock_kssl_output); 1562 1563 /* Disable sodirect if any */ 1564 if (nso->so_direct != NULL) { 1565 mutex_enter(nso->so_direct->sod_lockp); 1566 SOD_DISABLE(nso->so_direct); 1567 mutex_exit(nso->so_direct->sod_lockp); 1568 nso->so_direct = NULL; 1569 } 1570 } 1571 #ifdef DEBUG 1572 /* 1573 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1574 * it's inherited early to allow debugging of the accept code itself. 1575 */ 1576 nso->so_options |= so->so_options & SO_DEBUG; 1577 #endif /* DEBUG */ 1578 1579 /* 1580 * Save the SRC address from the T_CONN_IND 1581 * for getpeername to work on AF_UNIX and on transports that do not 1582 * support TI_GETPEERNAME. 1583 * 1584 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1585 * copyin_name(). 1586 */ 1587 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1588 error = EINVAL; 1589 freemsg(mp); 1590 eprintsoline(so, error); 1591 goto disconnect_vp_unlocked; 1592 } 1593 nso->so_faddr_len = (socklen_t)srclen; 1594 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1595 bcopy(src, nso->so_faddr_sa, srclen); 1596 nso->so_state |= SS_FADDR_VALID; 1597 1598 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1599 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1600 cred_t *cr; 1601 1602 if ((cr = DB_CRED(mp)) != NULL) { 1603 crhold(cr); 1604 nso->so_peercred = cr; 1605 nso->so_cpid = DB_CPID(mp); 1606 } 1607 freemsg(mp); 1608 1609 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1610 sizeof (intptr_t), 0, _ALLOC_INTR); 1611 if (mp == NULL) { 1612 /* 1613 * Accept can not fail with ENOBUFS. 1614 * A signal was caught so return EINTR. 1615 */ 1616 error = EINTR; 1617 eprintsoline(so, error); 1618 goto disconnect_vp_unlocked; 1619 } 1620 conn_res = (struct T_conn_res *)mp->b_rptr; 1621 } else { 1622 nso->so_peercred = DB_CRED(mp); 1623 nso->so_cpid = DB_CPID(mp); 1624 DB_CRED(mp) = NULL; 1625 1626 mp->b_rptr = DB_BASE(mp); 1627 conn_res = (struct T_conn_res *)mp->b_rptr; 1628 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1629 } 1630 1631 /* 1632 * New socket must be bound at least in sockfs and, except for AF_INET, 1633 * (or AF_INET6) it also has to be bound in the transport provider. 1634 * We set the local address in the sonode from the T_OK_ACK of the 1635 * T_CONN_RES. For this reason the address we bind to here isn't 1636 * important. 1637 */ 1638 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1639 /*CONSTCOND*/ 1640 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1641 /* 1642 * Optimization for AF_INET{,6} transports 1643 * that can handle a T_CONN_RES without being bound. 1644 */ 1645 mutex_enter(&nso->so_lock); 1646 so_automatic_bind(nso); 1647 mutex_exit(&nso->so_lock); 1648 } else { 1649 /* Perform NULL bind with the transport provider. */ 1650 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1651 ASSERT(error != ENOBUFS); 1652 freemsg(mp); 1653 eprintsoline(nso, error); 1654 goto disconnect_vp_unlocked; 1655 } 1656 } 1657 1658 /* 1659 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1660 * so that any data arriving on the new socket will cause the 1661 * appropriate signals to be delivered for the new socket. 1662 * 1663 * No other thread (except strsock_proto and strsock_misc) 1664 * can access the new socket thus we relax the locking. 1665 */ 1666 nso->so_pgrp = so->so_pgrp; 1667 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1668 1669 if (nso->so_pgrp != 0) { 1670 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1671 eprintsoline(nso, error); 1672 error = 0; 1673 nso->so_pgrp = 0; 1674 } 1675 } 1676 1677 /* 1678 * Make note of the socket level options. TCP and IP level options 1679 * are already inherited. We could do all this after accept is 1680 * successful but doing it here simplifies code and no harm done 1681 * for error case. 1682 */ 1683 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1684 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1685 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1686 nso->so_sndbuf = so->so_sndbuf; 1687 nso->so_rcvbuf = so->so_rcvbuf; 1688 if (nso->so_options & SO_LINGER) 1689 nso->so_linger = so->so_linger; 1690 1691 if ((so->so_state & SS_DIRECT) != 0) { 1692 1693 ASSERT(opt != NULL); 1694 1695 conn_res->OPT_length = optlen; 1696 conn_res->OPT_offset = MBLKL(mp); 1697 bcopy(&opt, mp->b_wptr, optlen); 1698 mp->b_wptr += optlen; 1699 conn_res->PRIM_type = T_CONN_RES; 1700 conn_res->ACCEPTOR_id = 0; 1701 PRIM_type = T_CONN_RES; 1702 1703 /* Send down the T_CONN_RES on acceptor STREAM */ 1704 error = kstrputmsg(SOTOV(nso), mp, NULL, 1705 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1706 if (error) { 1707 mutex_enter(&so->so_lock); 1708 so_lock_single(so); 1709 eprintsoline(so, error); 1710 goto disconnect_vp; 1711 } 1712 mutex_enter(&nso->so_lock); 1713 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1714 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1715 if (error) { 1716 mutex_exit(&nso->so_lock); 1717 mutex_enter(&so->so_lock); 1718 so_lock_single(so); 1719 eprintsoline(so, error); 1720 goto disconnect_vp; 1721 } 1722 if (nso->so_family == AF_INET) { 1723 sin_t *sin; 1724 1725 sin = (sin_t *)(ack_mp->b_rptr + 1726 sizeof (struct T_ok_ack)); 1727 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1728 nso->so_laddr_len = sizeof (sin_t); 1729 } else { 1730 sin6_t *sin6; 1731 1732 sin6 = (sin6_t *)(ack_mp->b_rptr + 1733 sizeof (struct T_ok_ack)); 1734 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1735 nso->so_laddr_len = sizeof (sin6_t); 1736 } 1737 freemsg(ack_mp); 1738 1739 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1740 nso->so_priv = opt; 1741 1742 if (so->so_nl7c_flags & NL7C_ENABLED) { 1743 /* 1744 * A NL7C marked listen()er so the new socket 1745 * inherits the listen()er's NL7C state, except 1746 * for NL7C_POLLIN. 1747 * 1748 * Only call NL7C to process the new socket if 1749 * the listen socket allows blocking i/o. 1750 */ 1751 nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN); 1752 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1753 /* 1754 * Nonblocking accept() just make it 1755 * persist to defer processing to the 1756 * read-side syscall (e.g. read). 1757 */ 1758 nso->so_nl7c_flags |= NL7C_SOPERSIST; 1759 } else if (nl7c_process(nso, B_FALSE)) { 1760 /* 1761 * NL7C has completed processing on the 1762 * socket, close the socket and back to 1763 * the top to await the next T_CONN_IND. 1764 */ 1765 mutex_exit(&nso->so_lock); 1766 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1767 CRED(), NULL); 1768 VN_RELE(nvp); 1769 goto again; 1770 } 1771 /* Pass the new socket out */ 1772 } 1773 1774 mutex_exit(&nso->so_lock); 1775 1776 /* 1777 * It's possible, through the use of autopush for example, 1778 * that the acceptor stream may not support SS_DIRECT 1779 * semantics. If the new socket does not support SS_DIRECT 1780 * we issue a _SIOCSOCKFALLBACK to inform the transport 1781 * as we would in the I_PUSH case. 1782 */ 1783 if (!(nso->so_state & SS_DIRECT)) { 1784 int rval; 1785 1786 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1787 0, 0, K_TO_K, CRED(), &rval)) != 0) { 1788 mutex_enter(&so->so_lock); 1789 so_lock_single(so); 1790 eprintsoline(so, error); 1791 goto disconnect_vp; 1792 } 1793 } 1794 1795 /* 1796 * Pass out new socket. 1797 */ 1798 if (nsop != NULL) 1799 *nsop = nso; 1800 1801 return (0); 1802 } 1803 1804 /* 1805 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1806 * which don't support the FireEngine accept fast-path. It is also 1807 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1808 * again. Neither sockfs nor TCP attempt to find out if some other 1809 * random module has been inserted in between (in which case we 1810 * should follow TLI accept behaviour). We blindly assume the worst 1811 * case and revert back to old behaviour i.e. TCP will not send us 1812 * any option (eager) and the accept should happen on the listener 1813 * queue. Any queued T_conn_ind have already got their options removed 1814 * by so_sock2_stream() when "sockmod" was I_POP'd. 1815 */ 1816 /* 1817 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1818 */ 1819 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1820 #ifdef _ILP32 1821 queue_t *q; 1822 1823 /* 1824 * Find read queue in driver 1825 * Can safely do this since we "own" nso/nvp. 1826 */ 1827 q = strvp2wq(nvp)->q_next; 1828 while (SAMESTR(q)) 1829 q = q->q_next; 1830 q = RD(q); 1831 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1832 #else 1833 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1834 #endif /* _ILP32 */ 1835 conn_res->PRIM_type = O_T_CONN_RES; 1836 PRIM_type = O_T_CONN_RES; 1837 } else { 1838 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1839 conn_res->PRIM_type = T_CONN_RES; 1840 PRIM_type = T_CONN_RES; 1841 } 1842 conn_res->SEQ_number = SEQ_number; 1843 conn_res->OPT_length = 0; 1844 conn_res->OPT_offset = 0; 1845 1846 mutex_enter(&so->so_lock); 1847 so_lock_single(so); /* Set SOLOCKED */ 1848 mutex_exit(&so->so_lock); 1849 1850 error = kstrputmsg(SOTOV(so), mp, NULL, 1851 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1852 mutex_enter(&so->so_lock); 1853 if (error) { 1854 eprintsoline(so, error); 1855 goto disconnect_vp; 1856 } 1857 error = sowaitprim(so, PRIM_type, T_OK_ACK, 1858 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1859 if (error) { 1860 eprintsoline(so, error); 1861 goto disconnect_vp; 1862 } 1863 /* 1864 * If there is a sin/sin6 appended onto the T_OK_ACK use 1865 * that to set the local address. If this is not present 1866 * then we zero out the address and don't set the 1867 * SS_LADDR_VALID bit. For AF_UNIX endpoints we copy over 1868 * the pathname from the listening socket. 1869 */ 1870 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 1871 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 1872 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 1873 ack_mp->b_rptr += sizeof (struct T_ok_ack); 1874 bcopy(ack_mp->b_rptr, nso->so_laddr_sa, sinlen); 1875 nso->so_laddr_len = sinlen; 1876 nso->so_state |= SS_LADDR_VALID; 1877 } else if (nso->so_family == AF_UNIX) { 1878 ASSERT(so->so_family == AF_UNIX); 1879 nso->so_laddr_len = so->so_laddr_len; 1880 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1881 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1882 nso->so_state |= SS_LADDR_VALID; 1883 } else { 1884 nso->so_laddr_len = so->so_laddr_len; 1885 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1886 bzero(nso->so_laddr_sa, nso->so_addr_size); 1887 nso->so_laddr_sa->sa_family = nso->so_family; 1888 } 1889 freemsg(ack_mp); 1890 1891 so_unlock_single(so, SOLOCKED); 1892 mutex_exit(&so->so_lock); 1893 1894 nso->so_state |= SS_ISCONNECTED; 1895 1896 /* 1897 * Pass out new socket. 1898 */ 1899 if (nsop != NULL) 1900 *nsop = nso; 1901 1902 return (0); 1903 1904 1905 eproto_disc_unl: 1906 error = EPROTO; 1907 e_disc_unl: 1908 eprintsoline(so, error); 1909 goto disconnect_unlocked; 1910 1911 pr_disc_vp_unl: 1912 eprintsoline(so, error); 1913 disconnect_vp_unlocked: 1914 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1915 VN_RELE(nvp); 1916 disconnect_unlocked: 1917 (void) sodisconnect(so, SEQ_number, 0); 1918 return (error); 1919 1920 pr_disc_vp: 1921 eprintsoline(so, error); 1922 disconnect_vp: 1923 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1924 so_unlock_single(so, SOLOCKED); 1925 mutex_exit(&so->so_lock); 1926 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1927 VN_RELE(nvp); 1928 return (error); 1929 1930 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1931 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1932 ? EOPNOTSUPP : EINVAL; 1933 e_bad: 1934 eprintsoline(so, error); 1935 return (error); 1936 } 1937 1938 /* 1939 * connect a socket. 1940 * 1941 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1942 * unconnect (by specifying a null address). 1943 */ 1944 int 1945 sotpi_connect(struct sonode *so, 1946 const struct sockaddr *name, 1947 socklen_t namelen, 1948 int fflag, 1949 int flags) 1950 { 1951 struct T_conn_req conn_req; 1952 int error = 0; 1953 mblk_t *mp; 1954 void *src; 1955 socklen_t srclen; 1956 void *addr; 1957 socklen_t addrlen; 1958 boolean_t need_unlock; 1959 1960 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1961 (void *)so, (void *)name, namelen, fflag, flags, 1962 pr_state(so->so_state, so->so_mode))); 1963 1964 /* 1965 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1966 * avoid sleeping for memory with SOLOCKED held. 1967 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1968 * + sizeof (struct T_opthdr). 1969 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1970 * exceed so_faddr_maxlen). 1971 */ 1972 mp = soallocproto(sizeof (struct T_conn_req) + 1973 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1974 if (mp == NULL) { 1975 /* 1976 * Connect can not fail with ENOBUFS. A signal was 1977 * caught so return EINTR. 1978 */ 1979 error = EINTR; 1980 eprintsoline(so, error); 1981 return (error); 1982 } 1983 1984 mutex_enter(&so->so_lock); 1985 /* 1986 * Make sure there is a preallocated T_unbind_req message 1987 * before any binding. This message is allocated when the 1988 * socket is created. Since another thread can consume 1989 * so_unbind_mp by the time we return from so_lock_single(), 1990 * we should check the availability of so_unbind_mp after 1991 * we return from so_lock_single(). 1992 */ 1993 1994 so_lock_single(so); /* Set SOLOCKED */ 1995 need_unlock = B_TRUE; 1996 1997 if (so->so_unbind_mp == NULL) { 1998 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 1999 /* NOTE: holding so_lock while sleeping */ 2000 so->so_unbind_mp = 2001 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 2002 if (so->so_unbind_mp == NULL) { 2003 error = EINTR; 2004 goto done; 2005 } 2006 } 2007 2008 /* 2009 * Can't have done a listen before connecting. 2010 */ 2011 if (so->so_state & SS_ACCEPTCONN) { 2012 error = EOPNOTSUPP; 2013 goto done; 2014 } 2015 2016 /* 2017 * Must be bound with the transport 2018 */ 2019 if (!(so->so_state & SS_ISBOUND)) { 2020 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2021 /*CONSTCOND*/ 2022 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2023 /* 2024 * Optimization for AF_INET{,6} transports 2025 * that can handle a T_CONN_REQ without being bound. 2026 */ 2027 so_automatic_bind(so); 2028 } else { 2029 error = sotpi_bind(so, NULL, 0, 2030 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 2031 if (error) 2032 goto done; 2033 } 2034 ASSERT(so->so_state & SS_ISBOUND); 2035 flags |= _SOCONNECT_DID_BIND; 2036 } 2037 2038 /* 2039 * Handle a connect to a name parameter of type AF_UNSPEC like a 2040 * connect to a null address. This is the portable method to 2041 * unconnect a socket. 2042 */ 2043 if ((namelen >= sizeof (sa_family_t)) && 2044 (name->sa_family == AF_UNSPEC)) { 2045 name = NULL; 2046 namelen = 0; 2047 } 2048 2049 /* 2050 * Check that we are not already connected. 2051 * A connection-oriented socket cannot be reconnected. 2052 * A connected connection-less socket can be 2053 * - connected to a different address by a subsequent connect 2054 * - "unconnected" by a connect to the NULL address 2055 */ 2056 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2057 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2058 if (so->so_mode & SM_CONNREQUIRED) { 2059 /* Connection-oriented socket */ 2060 error = so->so_state & SS_ISCONNECTED ? 2061 EISCONN : EALREADY; 2062 goto done; 2063 } 2064 /* Connection-less socket */ 2065 if (name == NULL) { 2066 /* 2067 * Remove the connected state and clear SO_DGRAM_ERRIND 2068 * since it was set when the socket was connected. 2069 * If this is UDP also send down a T_DISCON_REQ. 2070 */ 2071 int val; 2072 2073 if ((so->so_family == AF_INET || 2074 so->so_family == AF_INET6) && 2075 (so->so_type == SOCK_DGRAM || 2076 so->so_type == SOCK_RAW) && 2077 /*CONSTCOND*/ 2078 !soconnect_tpi_udp) { 2079 /* XXX What about implicitly unbinding here? */ 2080 error = sodisconnect(so, -1, 2081 _SODISCONNECT_LOCK_HELD); 2082 } else { 2083 so->so_state &= 2084 ~(SS_ISCONNECTED | SS_ISCONNECTING | 2085 SS_FADDR_VALID); 2086 so->so_faddr_len = 0; 2087 } 2088 2089 so_unlock_single(so, SOLOCKED); 2090 mutex_exit(&so->so_lock); 2091 2092 val = 0; 2093 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2094 &val, (t_uscalar_t)sizeof (val)); 2095 2096 mutex_enter(&so->so_lock); 2097 so_lock_single(so); /* Set SOLOCKED */ 2098 goto done; 2099 } 2100 } 2101 ASSERT(so->so_state & SS_ISBOUND); 2102 2103 if (name == NULL || namelen == 0) { 2104 error = EINVAL; 2105 goto done; 2106 } 2107 /* 2108 * Mark the socket if so_faddr_sa represents the transport level 2109 * address. 2110 */ 2111 if (flags & _SOCONNECT_NOXLATE) { 2112 struct sockaddr_ux *soaddr_ux; 2113 2114 ASSERT(so->so_family == AF_UNIX); 2115 if (namelen != sizeof (struct sockaddr_ux)) { 2116 error = EINVAL; 2117 goto done; 2118 } 2119 soaddr_ux = (struct sockaddr_ux *)name; 2120 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2121 namelen = sizeof (soaddr_ux->sou_addr); 2122 so->so_state |= SS_FADDR_NOXLATE; 2123 } 2124 2125 /* 2126 * Length and family checks. 2127 */ 2128 error = so_addr_verify(so, name, namelen); 2129 if (error) 2130 goto bad; 2131 2132 /* 2133 * Save foreign address. Needed for AF_UNIX as well as 2134 * transport providers that do not support TI_GETPEERNAME. 2135 * Also used for cached foreign address for TCP and UDP. 2136 */ 2137 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2138 error = EINVAL; 2139 goto done; 2140 } 2141 so->so_faddr_len = (socklen_t)namelen; 2142 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2143 bcopy(name, so->so_faddr_sa, namelen); 2144 so->so_state |= SS_FADDR_VALID; 2145 2146 if (so->so_family == AF_UNIX) { 2147 if (so->so_state & SS_FADDR_NOXLATE) { 2148 /* 2149 * Already have a transport internal address. Do not 2150 * pass any (transport internal) source address. 2151 */ 2152 addr = so->so_faddr_sa; 2153 addrlen = (t_uscalar_t)so->so_faddr_len; 2154 src = NULL; 2155 srclen = 0; 2156 } else { 2157 /* 2158 * Pass the sockaddr_un source address as an option 2159 * and translate the remote address. 2160 * Holding so_lock thus so_laddr_sa can not change. 2161 */ 2162 src = so->so_laddr_sa; 2163 srclen = (t_uscalar_t)so->so_laddr_len; 2164 dprintso(so, 1, 2165 ("sotpi_connect UNIX: srclen %d, src %p\n", 2166 srclen, src)); 2167 error = so_ux_addr_xlate(so, 2168 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2169 (flags & _SOCONNECT_XPG4_2), 2170 &addr, &addrlen); 2171 if (error) 2172 goto bad; 2173 } 2174 } else { 2175 addr = so->so_faddr_sa; 2176 addrlen = (t_uscalar_t)so->so_faddr_len; 2177 src = NULL; 2178 srclen = 0; 2179 } 2180 /* 2181 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2182 * option which asks the transport provider to send T_UDERR_IND 2183 * messages. These T_UDERR_IND messages are used to return connected 2184 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2185 * 2186 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2187 * we send down a T_CONN_REQ. This is needed to let the 2188 * transport assign a local address that is consistent with 2189 * the remote address. Applications depend on a getsockname() 2190 * after a connect() to retrieve the "source" IP address for 2191 * the connected socket. Invalidate the cached local address 2192 * to force getsockname() to enquire of the transport. 2193 */ 2194 if (!(so->so_mode & SM_CONNREQUIRED)) { 2195 /* 2196 * Datagram socket. 2197 */ 2198 int32_t val; 2199 2200 so_unlock_single(so, SOLOCKED); 2201 mutex_exit(&so->so_lock); 2202 2203 val = 1; 2204 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2205 &val, (t_uscalar_t)sizeof (val)); 2206 2207 mutex_enter(&so->so_lock); 2208 so_lock_single(so); /* Set SOLOCKED */ 2209 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2210 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2211 soconnect_tpi_udp) { 2212 soisconnected(so); 2213 goto done; 2214 } 2215 /* 2216 * Send down T_CONN_REQ etc. 2217 * Clear fflag to avoid returning EWOULDBLOCK. 2218 */ 2219 fflag = 0; 2220 ASSERT(so->so_family != AF_UNIX); 2221 so->so_state &= ~SS_LADDR_VALID; 2222 } else if (so->so_laddr_len != 0) { 2223 /* 2224 * If the local address or port was "any" then it may be 2225 * changed by the transport as a result of the 2226 * connect. Invalidate the cached version if we have one. 2227 */ 2228 switch (so->so_family) { 2229 case AF_INET: 2230 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2231 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2232 INADDR_ANY || 2233 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2234 so->so_state &= ~SS_LADDR_VALID; 2235 break; 2236 2237 case AF_INET6: 2238 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2239 if (IN6_IS_ADDR_UNSPECIFIED( 2240 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2241 IN6_IS_ADDR_V4MAPPED_ANY( 2242 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2243 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2244 so->so_state &= ~SS_LADDR_VALID; 2245 break; 2246 2247 default: 2248 break; 2249 } 2250 } 2251 2252 /* 2253 * Check for failure of an earlier call 2254 */ 2255 if (so->so_error != 0) 2256 goto so_bad; 2257 2258 /* 2259 * Send down T_CONN_REQ. Message was allocated above. 2260 */ 2261 conn_req.PRIM_type = T_CONN_REQ; 2262 conn_req.DEST_length = addrlen; 2263 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2264 if (srclen == 0) { 2265 conn_req.OPT_length = 0; 2266 conn_req.OPT_offset = 0; 2267 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2268 soappendmsg(mp, addr, addrlen); 2269 } else { 2270 /* 2271 * There is a AF_UNIX sockaddr_un to include as a source 2272 * address option. 2273 */ 2274 struct T_opthdr toh; 2275 2276 toh.level = SOL_SOCKET; 2277 toh.name = SO_SRCADDR; 2278 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2279 toh.status = 0; 2280 conn_req.OPT_length = 2281 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2282 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2283 _TPI_ALIGN_TOPT(addrlen)); 2284 2285 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2286 soappendmsg(mp, addr, addrlen); 2287 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2288 soappendmsg(mp, &toh, sizeof (toh)); 2289 soappendmsg(mp, src, srclen); 2290 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2291 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2292 } 2293 /* 2294 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2295 * in order to have the right state when the T_CONN_CON shows up. 2296 */ 2297 soisconnecting(so); 2298 mutex_exit(&so->so_lock); 2299 2300 if (audit_active) 2301 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2302 2303 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2304 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2305 mp = NULL; 2306 mutex_enter(&so->so_lock); 2307 if (error != 0) 2308 goto bad; 2309 2310 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2311 goto bad; 2312 2313 /* Allow other threads to access the socket */ 2314 so_unlock_single(so, SOLOCKED); 2315 need_unlock = B_FALSE; 2316 2317 /* 2318 * Wait until we get a T_CONN_CON or an error 2319 */ 2320 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2321 so_lock_single(so); /* Set SOLOCKED */ 2322 need_unlock = B_TRUE; 2323 } 2324 2325 done: 2326 freemsg(mp); 2327 switch (error) { 2328 case EINPROGRESS: 2329 case EALREADY: 2330 case EISCONN: 2331 case EINTR: 2332 /* Non-fatal errors */ 2333 so->so_state &= ~SS_LADDR_VALID; 2334 /* FALLTHRU */ 2335 case 0: 2336 break; 2337 2338 case EHOSTUNREACH: 2339 if (flags & _SOCONNECT_XPG4_2) { 2340 /* 2341 * X/Open specification contains a requirement that 2342 * ENETUNREACH be returned but does not require 2343 * EHOSTUNREACH. In order to keep the test suite 2344 * happy we mess with the errno here. 2345 */ 2346 error = ENETUNREACH; 2347 } 2348 /* FALLTHRU */ 2349 2350 default: 2351 ASSERT(need_unlock); 2352 /* 2353 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2354 * and invalidate local-address cache 2355 */ 2356 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2357 /* A discon_ind might have already unbound us */ 2358 if ((flags & _SOCONNECT_DID_BIND) && 2359 (so->so_state & SS_ISBOUND)) { 2360 int err; 2361 2362 err = sotpi_unbind(so, 0); 2363 /* LINTED - statement has no conseq */ 2364 if (err) { 2365 eprintsoline(so, err); 2366 } 2367 } 2368 break; 2369 } 2370 if (need_unlock) 2371 so_unlock_single(so, SOLOCKED); 2372 mutex_exit(&so->so_lock); 2373 return (error); 2374 2375 so_bad: error = sogeterr(so); 2376 bad: eprintsoline(so, error); 2377 goto done; 2378 } 2379 2380 int 2381 sotpi_shutdown(struct sonode *so, int how) 2382 { 2383 struct T_ordrel_req ordrel_req; 2384 mblk_t *mp; 2385 uint_t old_state, state_change; 2386 int error = 0; 2387 2388 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2389 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2390 2391 mutex_enter(&so->so_lock); 2392 so_lock_single(so); /* Set SOLOCKED */ 2393 2394 /* 2395 * SunOS 4.X has no check for datagram sockets. 2396 * 5.X checks that it is connected (ENOTCONN) 2397 * X/Open requires that we check the connected state. 2398 */ 2399 if (!(so->so_state & SS_ISCONNECTED)) { 2400 if (!xnet_skip_checks) { 2401 error = ENOTCONN; 2402 if (xnet_check_print) { 2403 printf("sockfs: X/Open shutdown check " 2404 "caused ENOTCONN\n"); 2405 } 2406 } 2407 goto done; 2408 } 2409 /* 2410 * Record the current state and then perform any state changes. 2411 * Then use the difference between the old and new states to 2412 * determine which messages need to be sent. 2413 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2414 * duplicate calls to shutdown(). 2415 */ 2416 old_state = so->so_state; 2417 2418 switch (how) { 2419 case 0: 2420 socantrcvmore(so); 2421 break; 2422 case 1: 2423 socantsendmore(so); 2424 break; 2425 case 2: 2426 socantsendmore(so); 2427 socantrcvmore(so); 2428 break; 2429 default: 2430 error = EINVAL; 2431 goto done; 2432 } 2433 2434 /* 2435 * Assumes that the SS_CANT* flags are never cleared in the above code. 2436 */ 2437 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2438 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2439 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2440 2441 switch (state_change) { 2442 case 0: 2443 dprintso(so, 1, 2444 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2445 so->so_state)); 2446 goto done; 2447 2448 case SS_CANTRCVMORE: 2449 mutex_exit(&so->so_lock); 2450 strseteof(SOTOV(so), 1); 2451 /* 2452 * strseteof takes care of read side wakeups, 2453 * pollwakeups, and signals. 2454 */ 2455 /* 2456 * Get the read lock before flushing data to avoid problems 2457 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2458 */ 2459 mutex_enter(&so->so_lock); 2460 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2461 mutex_exit(&so->so_lock); 2462 2463 /* Flush read side queue */ 2464 strflushrq(SOTOV(so), FLUSHALL); 2465 2466 mutex_enter(&so->so_lock); 2467 so_unlock_read(so); /* Clear SOREADLOCKED */ 2468 break; 2469 2470 case SS_CANTSENDMORE: 2471 mutex_exit(&so->so_lock); 2472 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2473 mutex_enter(&so->so_lock); 2474 break; 2475 2476 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2477 mutex_exit(&so->so_lock); 2478 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2479 strseteof(SOTOV(so), 1); 2480 /* 2481 * strseteof takes care of read side wakeups, 2482 * pollwakeups, and signals. 2483 */ 2484 /* 2485 * Get the read lock before flushing data to avoid problems 2486 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2487 */ 2488 mutex_enter(&so->so_lock); 2489 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2490 mutex_exit(&so->so_lock); 2491 2492 /* Flush read side queue */ 2493 strflushrq(SOTOV(so), FLUSHALL); 2494 2495 mutex_enter(&so->so_lock); 2496 so_unlock_read(so); /* Clear SOREADLOCKED */ 2497 break; 2498 } 2499 2500 ASSERT(MUTEX_HELD(&so->so_lock)); 2501 2502 /* 2503 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2504 * was set due to this call and the new state has both of them set: 2505 * Send the AF_UNIX close indication 2506 * For T_COTS send a discon_ind 2507 * 2508 * If cantsend was set due to this call: 2509 * For T_COTSORD send an ordrel_ind 2510 * 2511 * Note that for T_CLTS there is no message sent here. 2512 */ 2513 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2514 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2515 /* 2516 * For SunOS 4.X compatibility we tell the other end 2517 * that we are unable to receive at this point. 2518 */ 2519 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2520 so_unix_close(so); 2521 2522 if (so->so_serv_type == T_COTS) 2523 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2524 } 2525 if ((state_change & SS_CANTSENDMORE) && 2526 (so->so_serv_type == T_COTS_ORD)) { 2527 /* Send an orderly release */ 2528 ordrel_req.PRIM_type = T_ORDREL_REQ; 2529 2530 mutex_exit(&so->so_lock); 2531 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2532 0, _ALLOC_SLEEP); 2533 /* 2534 * Send down the T_ORDREL_REQ even if there is flow control. 2535 * This prevents shutdown from blocking. 2536 * Note that there is no T_OK_ACK for ordrel_req. 2537 */ 2538 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2539 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2540 mutex_enter(&so->so_lock); 2541 if (error) { 2542 eprintsoline(so, error); 2543 goto done; 2544 } 2545 } 2546 2547 done: 2548 so_unlock_single(so, SOLOCKED); 2549 mutex_exit(&so->so_lock); 2550 return (error); 2551 } 2552 2553 /* 2554 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2555 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2556 * that we have closed. 2557 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2558 * T_UNITDATA_REQ containing the same option. 2559 * 2560 * For SOCK_DGRAM half-connections (somebody connected to this end 2561 * but this end is not connect) we don't know where to send any 2562 * SO_UNIX_CLOSE. 2563 * 2564 * We have to ignore stream head errors just in case there has been 2565 * a shutdown(output). 2566 * Ignore any flow control to try to get the message more quickly to the peer. 2567 * While locally ignoring flow control solves the problem when there 2568 * is only the loopback transport on the stream it would not provide 2569 * the correct AF_UNIX socket semantics when one or more modules have 2570 * been pushed. 2571 */ 2572 void 2573 so_unix_close(struct sonode *so) 2574 { 2575 int error; 2576 struct T_opthdr toh; 2577 mblk_t *mp; 2578 2579 ASSERT(MUTEX_HELD(&so->so_lock)); 2580 2581 ASSERT(so->so_family == AF_UNIX); 2582 2583 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2584 (SS_ISCONNECTED|SS_ISBOUND)) 2585 return; 2586 2587 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2588 (void *)so, pr_state(so->so_state, so->so_mode))); 2589 2590 toh.level = SOL_SOCKET; 2591 toh.name = SO_UNIX_CLOSE; 2592 2593 /* zero length + header */ 2594 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2595 toh.status = 0; 2596 2597 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2598 struct T_optdata_req tdr; 2599 2600 tdr.PRIM_type = T_OPTDATA_REQ; 2601 tdr.DATA_flag = 0; 2602 2603 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2604 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2605 2606 /* NOTE: holding so_lock while sleeping */ 2607 mp = soallocproto2(&tdr, sizeof (tdr), 2608 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2609 } else { 2610 struct T_unitdata_req tudr; 2611 void *addr; 2612 socklen_t addrlen; 2613 void *src; 2614 socklen_t srclen; 2615 struct T_opthdr toh2; 2616 t_scalar_t size; 2617 2618 /* Connecteded DGRAM socket */ 2619 2620 /* 2621 * For AF_UNIX the destination address is translated to 2622 * an internal name and the source address is passed as 2623 * an option. 2624 */ 2625 /* 2626 * Length and family checks. 2627 */ 2628 error = so_addr_verify(so, so->so_faddr_sa, 2629 (t_uscalar_t)so->so_faddr_len); 2630 if (error) { 2631 eprintsoline(so, error); 2632 return; 2633 } 2634 if (so->so_state & SS_FADDR_NOXLATE) { 2635 /* 2636 * Already have a transport internal address. Do not 2637 * pass any (transport internal) source address. 2638 */ 2639 addr = so->so_faddr_sa; 2640 addrlen = (t_uscalar_t)so->so_faddr_len; 2641 src = NULL; 2642 srclen = 0; 2643 } else { 2644 /* 2645 * Pass the sockaddr_un source address as an option 2646 * and translate the remote address. 2647 * Holding so_lock thus so_laddr_sa can not change. 2648 */ 2649 src = so->so_laddr_sa; 2650 srclen = (socklen_t)so->so_laddr_len; 2651 dprintso(so, 1, 2652 ("so_ux_close: srclen %d, src %p\n", 2653 srclen, src)); 2654 error = so_ux_addr_xlate(so, 2655 so->so_faddr_sa, 2656 (socklen_t)so->so_faddr_len, 0, 2657 &addr, &addrlen); 2658 if (error) { 2659 eprintsoline(so, error); 2660 return; 2661 } 2662 } 2663 tudr.PRIM_type = T_UNITDATA_REQ; 2664 tudr.DEST_length = addrlen; 2665 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2666 if (srclen == 0) { 2667 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2668 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2669 _TPI_ALIGN_TOPT(addrlen)); 2670 2671 size = tudr.OPT_offset + tudr.OPT_length; 2672 /* NOTE: holding so_lock while sleeping */ 2673 mp = soallocproto2(&tudr, sizeof (tudr), 2674 addr, addrlen, size, _ALLOC_SLEEP); 2675 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2676 soappendmsg(mp, &toh, sizeof (toh)); 2677 } else { 2678 /* 2679 * There is a AF_UNIX sockaddr_un to include as a 2680 * source address option. 2681 */ 2682 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2683 _TPI_ALIGN_TOPT(srclen)); 2684 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2685 _TPI_ALIGN_TOPT(addrlen)); 2686 2687 toh2.level = SOL_SOCKET; 2688 toh2.name = SO_SRCADDR; 2689 toh2.len = (t_uscalar_t)(srclen + 2690 sizeof (struct T_opthdr)); 2691 toh2.status = 0; 2692 2693 size = tudr.OPT_offset + tudr.OPT_length; 2694 2695 /* NOTE: holding so_lock while sleeping */ 2696 mp = soallocproto2(&tudr, sizeof (tudr), 2697 addr, addrlen, size, _ALLOC_SLEEP); 2698 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2699 soappendmsg(mp, &toh, sizeof (toh)); 2700 soappendmsg(mp, &toh2, sizeof (toh2)); 2701 soappendmsg(mp, src, srclen); 2702 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2703 } 2704 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2705 } 2706 mutex_exit(&so->so_lock); 2707 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2708 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2709 mutex_enter(&so->so_lock); 2710 } 2711 2712 /* 2713 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2714 */ 2715 int 2716 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2717 { 2718 mblk_t *mp, *nmp; 2719 int error; 2720 2721 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", 2722 (void *)so, (void *)msg, flags)); 2723 2724 /* 2725 * There is never any oob data with addresses or control since 2726 * the T_EXDATA_IND does not carry any options. 2727 */ 2728 msg->msg_controllen = 0; 2729 msg->msg_namelen = 0; 2730 2731 mutex_enter(&so->so_lock); 2732 ASSERT(so_verify_oobstate(so)); 2733 if ((so->so_options & SO_OOBINLINE) || 2734 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2735 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2736 mutex_exit(&so->so_lock); 2737 return (EINVAL); 2738 } 2739 if (!(so->so_state & SS_HAVEOOBDATA)) { 2740 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2741 mutex_exit(&so->so_lock); 2742 return (EWOULDBLOCK); 2743 } 2744 ASSERT(so->so_oobmsg != NULL); 2745 mp = so->so_oobmsg; 2746 if (flags & MSG_PEEK) { 2747 /* 2748 * Since recv* can not return ENOBUFS we can not use dupmsg. 2749 * Instead we revert to the consolidation private 2750 * allocb_wait plus bcopy. 2751 */ 2752 mblk_t *mp1; 2753 2754 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2755 ASSERT(mp1); 2756 2757 while (mp != NULL) { 2758 ssize_t size; 2759 2760 size = MBLKL(mp); 2761 bcopy(mp->b_rptr, mp1->b_wptr, size); 2762 mp1->b_wptr += size; 2763 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2764 mp = mp->b_cont; 2765 } 2766 mp = mp1; 2767 } else { 2768 /* 2769 * Update the state indicating that the data has been consumed. 2770 * Keep SS_OOBPEND set until data is consumed past the mark. 2771 */ 2772 so->so_oobmsg = NULL; 2773 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2774 } 2775 dprintso(so, 1, 2776 ("after recvoob(%p): counts %d/%d state %s\n", 2777 (void *)so, so->so_oobsigcnt, 2778 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2779 ASSERT(so_verify_oobstate(so)); 2780 mutex_exit(&so->so_lock); 2781 2782 error = 0; 2783 nmp = mp; 2784 while (nmp != NULL && uiop->uio_resid > 0) { 2785 ssize_t n = MBLKL(nmp); 2786 2787 n = MIN(n, uiop->uio_resid); 2788 if (n > 0) 2789 error = uiomove(nmp->b_rptr, n, 2790 UIO_READ, uiop); 2791 if (error) 2792 break; 2793 nmp = nmp->b_cont; 2794 } 2795 freemsg(mp); 2796 return (error); 2797 } 2798 2799 /* 2800 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2801 * In addition, the caller typically verifies that there is some 2802 * potential state to clear by checking 2803 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2804 * before calling this routine. 2805 * Note that such a check can be made without holding so_lock since 2806 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2807 * decrements so_oobsigcnt. 2808 * 2809 * When data is read *after* the point that all pending 2810 * oob data has been consumed the oob indication is cleared. 2811 * 2812 * This logic keeps select/poll returning POLLRDBAND and 2813 * SIOCATMARK returning true until we have read past 2814 * the mark. 2815 */ 2816 static void 2817 sorecv_update_oobstate(struct sonode *so) 2818 { 2819 mutex_enter(&so->so_lock); 2820 ASSERT(so_verify_oobstate(so)); 2821 dprintso(so, 1, 2822 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2823 so->so_oobsigcnt, 2824 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2825 if (so->so_oobsigcnt == 0) { 2826 /* No more pending oob indications */ 2827 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2828 freemsg(so->so_oobmsg); 2829 so->so_oobmsg = NULL; 2830 } 2831 ASSERT(so_verify_oobstate(so)); 2832 mutex_exit(&so->so_lock); 2833 } 2834 2835 /* 2836 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2837 */ 2838 static int 2839 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2840 { 2841 int error = 0; 2842 mblk_t *tmp = NULL; 2843 mblk_t *pmp = NULL; 2844 mblk_t *nmp = so->so_nl7c_rcv_mp; 2845 2846 ASSERT(nmp != NULL); 2847 2848 while (nmp != NULL && uiop->uio_resid > 0) { 2849 ssize_t n; 2850 2851 if (DB_TYPE(nmp) == M_DATA) { 2852 /* 2853 * We have some data, uiomove up to resid bytes. 2854 */ 2855 n = MIN(MBLKL(nmp), uiop->uio_resid); 2856 if (n > 0) 2857 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2858 nmp->b_rptr += n; 2859 if (nmp->b_rptr == nmp->b_wptr) { 2860 pmp = nmp; 2861 nmp = nmp->b_cont; 2862 } 2863 if (error) 2864 break; 2865 } else { 2866 /* 2867 * We only handle data, save for caller to handle. 2868 */ 2869 if (pmp != NULL) { 2870 pmp->b_cont = nmp->b_cont; 2871 } 2872 nmp->b_cont = NULL; 2873 if (*rmp == NULL) { 2874 *rmp = nmp; 2875 } else { 2876 tmp->b_cont = nmp; 2877 } 2878 nmp = nmp->b_cont; 2879 tmp = nmp; 2880 } 2881 } 2882 if (pmp != NULL) { 2883 /* Free any mblk_t(s) which we have consumed */ 2884 pmp->b_cont = NULL; 2885 freemsg(so->so_nl7c_rcv_mp); 2886 } 2887 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2888 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 2889 if (error == 0) { 2890 rval_t *p = (rval_t *)&so->so_nl7c_rcv_rval; 2891 2892 error = p->r_v.r_v2; 2893 p->r_v.r_v2 = 0; 2894 } 2895 rp->r_vals = so->so_nl7c_rcv_rval; 2896 so->so_nl7c_rcv_rval = 0; 2897 } else { 2898 /* More mblk_t(s) to process so no rval to return */ 2899 rp->r_vals = 0; 2900 } 2901 return (error); 2902 } 2903 2904 /* 2905 * Receive the next message on the queue. 2906 * If msg_controllen is non-zero when called the caller is interested in 2907 * any received control info (options). 2908 * If msg_namelen is non-zero when called the caller is interested in 2909 * any received source address. 2910 * The routine returns with msg_control and msg_name pointing to 2911 * kmem_alloc'ed memory which the caller has to free. 2912 */ 2913 int 2914 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2915 { 2916 union T_primitives *tpr; 2917 mblk_t *mp; 2918 uchar_t pri; 2919 int pflag, opflag; 2920 void *control; 2921 t_uscalar_t controllen; 2922 t_uscalar_t namelen; 2923 int so_state = so->so_state; /* Snapshot */ 2924 ssize_t saved_resid; 2925 rval_t rval; 2926 int flags; 2927 clock_t timout; 2928 int first; 2929 int error = 0; 2930 struct uio *suiop = NULL; 2931 sodirect_t *sodp = so->so_direct; 2932 2933 flags = msg->msg_flags; 2934 msg->msg_flags = 0; 2935 2936 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2937 (void *)so, (void *)msg, flags, 2938 pr_state(so->so_state, so->so_mode), so->so_error)); 2939 2940 /* 2941 * If we are not connected because we have never been connected 2942 * we return ENOTCONN. If we have been connected (but are no longer 2943 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2944 * the EOF. 2945 * 2946 * An alternative would be to post an ENOTCONN error in stream head 2947 * (read+write) and clear it when we're connected. However, that error 2948 * would cause incorrect poll/select behavior! 2949 */ 2950 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2951 (so->so_mode & SM_CONNREQUIRED)) { 2952 return (ENOTCONN); 2953 } 2954 2955 /* 2956 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2957 * after checking that the read queue is empty) and returns zero. 2958 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2959 * is zero. 2960 */ 2961 2962 if (flags & MSG_OOB) { 2963 /* Check that the transport supports OOB */ 2964 if (!(so->so_mode & SM_EXDATA)) 2965 return (EOPNOTSUPP); 2966 return (sorecvoob(so, msg, uiop, flags)); 2967 } 2968 2969 /* 2970 * Set msg_controllen and msg_namelen to zero here to make it 2971 * simpler in the cases that no control or name is returned. 2972 */ 2973 controllen = msg->msg_controllen; 2974 namelen = msg->msg_namelen; 2975 msg->msg_controllen = 0; 2976 msg->msg_namelen = 0; 2977 2978 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2979 namelen, controllen)); 2980 2981 mutex_enter(&so->so_lock); 2982 /* 2983 * If an NL7C enabled socket and not waiting for write data. 2984 */ 2985 if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 2986 NL7C_ENABLED) { 2987 if (so->so_nl7c_uri) { 2988 /* Close uri processing for a previous request */ 2989 nl7c_close(so); 2990 } 2991 if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) { 2992 /* Nothing to process, EOF */ 2993 mutex_exit(&so->so_lock); 2994 return (0); 2995 } else if (so->so_nl7c_flags & NL7C_SOPERSIST) { 2996 /* Persistent NL7C socket, try to process request */ 2997 boolean_t ret; 2998 2999 ret = nl7c_process(so, 3000 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3001 rval.r_vals = so->so_nl7c_rcv_rval; 3002 error = rval.r_v.r_v2; 3003 if (error) { 3004 /* Error of some sort, return it */ 3005 mutex_exit(&so->so_lock); 3006 return (error); 3007 } 3008 if (so->so_nl7c_flags && 3009 ! (so->so_nl7c_flags & NL7C_WAITWRITE)) { 3010 /* 3011 * Still an NL7C socket and no data 3012 * to pass up to the caller. 3013 */ 3014 mutex_exit(&so->so_lock); 3015 if (ret) { 3016 /* EOF */ 3017 return (0); 3018 } else { 3019 /* Need more data */ 3020 return (EAGAIN); 3021 } 3022 } 3023 } else { 3024 /* 3025 * Not persistent so no further NL7C processing. 3026 */ 3027 so->so_nl7c_flags = 0; 3028 } 3029 } 3030 /* 3031 * Only one reader is allowed at any given time. This is needed 3032 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3033 * 3034 * This is slightly different that BSD behavior in that it fails with 3035 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3036 * is single-threaded using sblock(), which is dropped while waiting 3037 * for data to appear. The difference shows up e.g. if one 3038 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3039 * does use nonblocking io and different threads are reading each 3040 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3041 * in this case as long as the read queue doesn't get empty. 3042 * In this implementation the thread using nonblocking io can 3043 * get an EWOULDBLOCK error due to the blocking thread executing 3044 * e.g. in the uiomove in kstrgetmsg. 3045 * This difference is not believed to be significant. 3046 */ 3047 /* Set SOREADLOCKED */ 3048 error = so_lock_read_intr(so, 3049 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3050 mutex_exit(&so->so_lock); 3051 if (error) 3052 return (error); 3053 3054 /* 3055 * Tell kstrgetmsg to not inspect the stream head errors until all 3056 * queued data has been consumed. 3057 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3058 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3059 * 3060 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3061 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3062 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3063 */ 3064 pflag = MSG_ANY | MSG_DELAYERROR; 3065 if (flags & MSG_PEEK) { 3066 pflag |= MSG_IPEEK; 3067 flags &= ~MSG_WAITALL; 3068 } 3069 if (so->so_mode & SM_ATOMIC) 3070 pflag |= MSG_DISCARDTAIL; 3071 3072 if (flags & MSG_DONTWAIT) 3073 timout = 0; 3074 else 3075 timout = -1; 3076 opflag = pflag; 3077 first = 1; 3078 3079 if (uiop->uio_resid >= uioasync.mincnt && 3080 sodp != NULL && (sodp->sod_state & SOD_ENABLED) && 3081 uioasync.enabled && !(flags & MSG_PEEK) && 3082 !(so_state & SS_CANTRCVMORE)) { 3083 /* 3084 * Big enough I/O for uioa min setup and an sodirect socket 3085 * and sodirect enabled and uioa enabled and I/O will be done 3086 * and not EOF so initialize the sodirect_t uioa_t with "uiop". 3087 */ 3088 mutex_enter(sodp->sod_lockp); 3089 if (!uioainit(uiop, &sodp->sod_uioa)) { 3090 /* 3091 * Successful uioainit() so the uio_t part of the 3092 * uioa_t will be used for all uio_t work to follow, 3093 * we save the original "uiop" in "suiop". 3094 */ 3095 suiop = uiop; 3096 uiop = (uio_t *)&sodp->sod_uioa; 3097 /* 3098 * Before returning to the caller the passed in uio_t 3099 * "uiop" will be updated via a call to uioafini() 3100 * below. 3101 * 3102 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED 3103 * here as first we have to uioamove() any currently 3104 * queued M_DATA mblk_t(s) so it will be done in 3105 * kstrgetmsg(). 3106 */ 3107 } 3108 /* 3109 * In either uioainit() success or not case note the number 3110 * of uio bytes the caller wants for sod framework and/or 3111 * transport (e.g. TCP) strategy. 3112 */ 3113 sodp->sod_want = uiop->uio_resid; 3114 mutex_exit(sodp->sod_lockp); 3115 } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { 3116 /* 3117 * No uioa but still using sodirect so note the number of 3118 * uio bytes the caller wants for sodirect framework and/or 3119 * transport (e.g. TCP) strategy. 3120 * 3121 * Note, sod_lockp not held, only writer is in this function 3122 * and only one thread at a time so not needed just to init. 3123 */ 3124 sodp->sod_want = uiop->uio_resid; 3125 } 3126 retry: 3127 saved_resid = uiop->uio_resid; 3128 pri = 0; 3129 mp = NULL; 3130 if (so->so_nl7c_rcv_mp != NULL) { 3131 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3132 error = nl7c_sorecv(so, &mp, uiop, &rval); 3133 } else { 3134 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3135 timout, &rval); 3136 } 3137 if (error) { 3138 switch (error) { 3139 case EINTR: 3140 case EWOULDBLOCK: 3141 if (!first) 3142 error = 0; 3143 break; 3144 case ETIME: 3145 /* Returned from kstrgetmsg when timeout expires */ 3146 if (!first) 3147 error = 0; 3148 else 3149 error = EWOULDBLOCK; 3150 break; 3151 default: 3152 eprintsoline(so, error); 3153 break; 3154 } 3155 goto out; 3156 } 3157 /* 3158 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3159 * For non-datagrams MOREDATA is used to set MSG_EOR. 3160 */ 3161 ASSERT(!(rval.r_val1 & MORECTL)); 3162 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3163 msg->msg_flags |= MSG_TRUNC; 3164 3165 if (mp == NULL) { 3166 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3167 /* 3168 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3169 * The draft Posix socket spec states that the mark should 3170 * not be cleared when peeking. We follow the latter. 3171 */ 3172 if ((so->so_state & 3173 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3174 (uiop->uio_resid != saved_resid) && 3175 !(flags & MSG_PEEK)) { 3176 sorecv_update_oobstate(so); 3177 } 3178 3179 mutex_enter(&so->so_lock); 3180 /* Set MSG_EOR based on MOREDATA */ 3181 if (!(rval.r_val1 & MOREDATA)) { 3182 if (so->so_state & SS_SAVEDEOR) { 3183 msg->msg_flags |= MSG_EOR; 3184 so->so_state &= ~SS_SAVEDEOR; 3185 } 3186 } 3187 /* 3188 * If some data was received (i.e. not EOF) and the 3189 * read/recv* has not been satisfied wait for some more. 3190 */ 3191 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3192 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3193 mutex_exit(&so->so_lock); 3194 first = 0; 3195 pflag = opflag | MSG_NOMARK; 3196 goto retry; 3197 } 3198 goto out_locked; 3199 } 3200 3201 /* strsock_proto has already verified length and alignment */ 3202 tpr = (union T_primitives *)mp->b_rptr; 3203 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3204 3205 switch (tpr->type) { 3206 case T_DATA_IND: { 3207 if ((so->so_state & 3208 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3209 (uiop->uio_resid != saved_resid) && 3210 !(flags & MSG_PEEK)) { 3211 sorecv_update_oobstate(so); 3212 } 3213 3214 /* 3215 * Set msg_flags to MSG_EOR based on 3216 * MORE_flag and MOREDATA. 3217 */ 3218 mutex_enter(&so->so_lock); 3219 so->so_state &= ~SS_SAVEDEOR; 3220 if (!(tpr->data_ind.MORE_flag & 1)) { 3221 if (!(rval.r_val1 & MOREDATA)) 3222 msg->msg_flags |= MSG_EOR; 3223 else 3224 so->so_state |= SS_SAVEDEOR; 3225 } 3226 freemsg(mp); 3227 /* 3228 * If some data was received (i.e. not EOF) and the 3229 * read/recv* has not been satisfied wait for some more. 3230 */ 3231 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3232 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3233 mutex_exit(&so->so_lock); 3234 first = 0; 3235 pflag = opflag | MSG_NOMARK; 3236 goto retry; 3237 } 3238 goto out_locked; 3239 } 3240 case T_UNITDATA_IND: { 3241 void *addr; 3242 t_uscalar_t addrlen; 3243 void *abuf; 3244 t_uscalar_t optlen; 3245 void *opt; 3246 3247 if ((so->so_state & 3248 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3249 (uiop->uio_resid != saved_resid) && 3250 !(flags & MSG_PEEK)) { 3251 sorecv_update_oobstate(so); 3252 } 3253 3254 if (namelen != 0) { 3255 /* Caller wants source address */ 3256 addrlen = tpr->unitdata_ind.SRC_length; 3257 addr = sogetoff(mp, 3258 tpr->unitdata_ind.SRC_offset, 3259 addrlen, 1); 3260 if (addr == NULL) { 3261 freemsg(mp); 3262 error = EPROTO; 3263 eprintsoline(so, error); 3264 goto out; 3265 } 3266 if (so->so_family == AF_UNIX) { 3267 /* 3268 * Can not use the transport level address. 3269 * If there is a SO_SRCADDR option carrying 3270 * the socket level address it will be 3271 * extracted below. 3272 */ 3273 addr = NULL; 3274 addrlen = 0; 3275 } 3276 } 3277 optlen = tpr->unitdata_ind.OPT_length; 3278 if (optlen != 0) { 3279 t_uscalar_t ncontrollen; 3280 3281 /* 3282 * Extract any source address option. 3283 * Determine how large cmsg buffer is needed. 3284 */ 3285 opt = sogetoff(mp, 3286 tpr->unitdata_ind.OPT_offset, 3287 optlen, __TPI_ALIGN_SIZE); 3288 3289 if (opt == NULL) { 3290 freemsg(mp); 3291 error = EPROTO; 3292 eprintsoline(so, error); 3293 goto out; 3294 } 3295 if (so->so_family == AF_UNIX) 3296 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3297 ncontrollen = so_cmsglen(mp, opt, optlen, 3298 !(flags & MSG_XPG4_2)); 3299 if (controllen != 0) 3300 controllen = ncontrollen; 3301 else if (ncontrollen != 0) 3302 msg->msg_flags |= MSG_CTRUNC; 3303 } else { 3304 controllen = 0; 3305 } 3306 3307 if (namelen != 0) { 3308 /* 3309 * Return address to caller. 3310 * Caller handles truncation if length 3311 * exceeds msg_namelen. 3312 * NOTE: AF_UNIX NUL termination is ensured by 3313 * the sender's copyin_name(). 3314 */ 3315 abuf = kmem_alloc(addrlen, KM_SLEEP); 3316 3317 bcopy(addr, abuf, addrlen); 3318 msg->msg_name = abuf; 3319 msg->msg_namelen = addrlen; 3320 } 3321 3322 if (controllen != 0) { 3323 /* 3324 * Return control msg to caller. 3325 * Caller handles truncation if length 3326 * exceeds msg_controllen. 3327 */ 3328 control = kmem_zalloc(controllen, KM_SLEEP); 3329 3330 error = so_opt2cmsg(mp, opt, optlen, 3331 !(flags & MSG_XPG4_2), 3332 control, controllen); 3333 if (error) { 3334 freemsg(mp); 3335 if (msg->msg_namelen != 0) 3336 kmem_free(msg->msg_name, 3337 msg->msg_namelen); 3338 kmem_free(control, controllen); 3339 eprintsoline(so, error); 3340 goto out; 3341 } 3342 msg->msg_control = control; 3343 msg->msg_controllen = controllen; 3344 } 3345 3346 freemsg(mp); 3347 goto out; 3348 } 3349 case T_OPTDATA_IND: { 3350 struct T_optdata_req *tdr; 3351 void *opt; 3352 t_uscalar_t optlen; 3353 3354 if ((so->so_state & 3355 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3356 (uiop->uio_resid != saved_resid) && 3357 !(flags & MSG_PEEK)) { 3358 sorecv_update_oobstate(so); 3359 } 3360 3361 tdr = (struct T_optdata_req *)mp->b_rptr; 3362 optlen = tdr->OPT_length; 3363 if (optlen != 0) { 3364 t_uscalar_t ncontrollen; 3365 /* 3366 * Determine how large cmsg buffer is needed. 3367 */ 3368 opt = sogetoff(mp, 3369 tpr->optdata_ind.OPT_offset, 3370 optlen, __TPI_ALIGN_SIZE); 3371 3372 if (opt == NULL) { 3373 freemsg(mp); 3374 error = EPROTO; 3375 eprintsoline(so, error); 3376 goto out; 3377 } 3378 3379 ncontrollen = so_cmsglen(mp, opt, optlen, 3380 !(flags & MSG_XPG4_2)); 3381 if (controllen != 0) 3382 controllen = ncontrollen; 3383 else if (ncontrollen != 0) 3384 msg->msg_flags |= MSG_CTRUNC; 3385 } else { 3386 controllen = 0; 3387 } 3388 3389 if (controllen != 0) { 3390 /* 3391 * Return control msg to caller. 3392 * Caller handles truncation if length 3393 * exceeds msg_controllen. 3394 */ 3395 control = kmem_zalloc(controllen, KM_SLEEP); 3396 3397 error = so_opt2cmsg(mp, opt, optlen, 3398 !(flags & MSG_XPG4_2), 3399 control, controllen); 3400 if (error) { 3401 freemsg(mp); 3402 kmem_free(control, controllen); 3403 eprintsoline(so, error); 3404 goto out; 3405 } 3406 msg->msg_control = control; 3407 msg->msg_controllen = controllen; 3408 } 3409 3410 /* 3411 * Set msg_flags to MSG_EOR based on 3412 * DATA_flag and MOREDATA. 3413 */ 3414 mutex_enter(&so->so_lock); 3415 so->so_state &= ~SS_SAVEDEOR; 3416 if (!(tpr->data_ind.MORE_flag & 1)) { 3417 if (!(rval.r_val1 & MOREDATA)) 3418 msg->msg_flags |= MSG_EOR; 3419 else 3420 so->so_state |= SS_SAVEDEOR; 3421 } 3422 freemsg(mp); 3423 /* 3424 * If some data was received (i.e. not EOF) and the 3425 * read/recv* has not been satisfied wait for some more. 3426 * Not possible to wait if control info was received. 3427 */ 3428 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3429 controllen == 0 && 3430 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3431 mutex_exit(&so->so_lock); 3432 first = 0; 3433 pflag = opflag | MSG_NOMARK; 3434 goto retry; 3435 } 3436 goto out_locked; 3437 } 3438 case T_EXDATA_IND: { 3439 dprintso(so, 1, 3440 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3441 "state %s\n", 3442 so->so_oobsigcnt, so->so_oobcnt, 3443 saved_resid - uiop->uio_resid, 3444 pr_state(so->so_state, so->so_mode))); 3445 /* 3446 * kstrgetmsg handles MSGMARK so there is nothing to 3447 * inspect in the T_EXDATA_IND. 3448 * strsock_proto makes the stream head queue the T_EXDATA_IND 3449 * as a separate message with no M_DATA component. Furthermore, 3450 * the stream head does not consolidate M_DATA messages onto 3451 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3452 * remains a message by itself. This is needed since MSGMARK 3453 * marks both the whole message as well as the last byte 3454 * of the message. 3455 */ 3456 freemsg(mp); 3457 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3458 if (flags & MSG_PEEK) { 3459 /* 3460 * Even though we are peeking we consume the 3461 * T_EXDATA_IND thereby moving the mark information 3462 * to SS_RCVATMARK. Then the oob code below will 3463 * retry the peeking kstrgetmsg. 3464 * Note that the stream head read queue is 3465 * never flushed without holding SOREADLOCKED 3466 * thus the T_EXDATA_IND can not disappear 3467 * underneath us. 3468 */ 3469 dprintso(so, 1, 3470 ("sotpi_recvmsg: consume EXDATA_IND " 3471 "counts %d/%d state %s\n", 3472 so->so_oobsigcnt, 3473 so->so_oobcnt, 3474 pr_state(so->so_state, so->so_mode))); 3475 3476 pflag = MSG_ANY | MSG_DELAYERROR; 3477 if (so->so_mode & SM_ATOMIC) 3478 pflag |= MSG_DISCARDTAIL; 3479 3480 pri = 0; 3481 mp = NULL; 3482 3483 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3484 &pri, &pflag, (clock_t)-1, &rval); 3485 ASSERT(uiop->uio_resid == saved_resid); 3486 3487 if (error) { 3488 #ifdef SOCK_DEBUG 3489 if (error != EWOULDBLOCK && error != EINTR) { 3490 eprintsoline(so, error); 3491 } 3492 #endif /* SOCK_DEBUG */ 3493 goto out; 3494 } 3495 ASSERT(mp); 3496 tpr = (union T_primitives *)mp->b_rptr; 3497 ASSERT(tpr->type == T_EXDATA_IND); 3498 freemsg(mp); 3499 } /* end "if (flags & MSG_PEEK)" */ 3500 3501 /* 3502 * Decrement the number of queued and pending oob. 3503 * 3504 * SS_RCVATMARK is cleared when we read past a mark. 3505 * SS_HAVEOOBDATA is cleared when we've read past the 3506 * last mark. 3507 * SS_OOBPEND is cleared if we've read past the last 3508 * mark and no (new) SIGURG has been posted. 3509 */ 3510 mutex_enter(&so->so_lock); 3511 ASSERT(so_verify_oobstate(so)); 3512 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3513 ASSERT(so->so_oobsigcnt > 0); 3514 so->so_oobsigcnt--; 3515 ASSERT(so->so_oobcnt > 0); 3516 so->so_oobcnt--; 3517 /* 3518 * Since the T_EXDATA_IND has been removed from the stream 3519 * head, but we have not read data past the mark, 3520 * sockfs needs to track that the socket is still at the mark. 3521 * 3522 * Since no data was received call kstrgetmsg again to wait 3523 * for data. 3524 */ 3525 so->so_state |= SS_RCVATMARK; 3526 mutex_exit(&so->so_lock); 3527 dprintso(so, 1, 3528 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3529 so->so_oobsigcnt, so->so_oobcnt, 3530 pr_state(so->so_state, so->so_mode))); 3531 pflag = opflag; 3532 goto retry; 3533 } 3534 default: 3535 ASSERT(0); 3536 freemsg(mp); 3537 error = EPROTO; 3538 eprintsoline(so, error); 3539 goto out; 3540 } 3541 /* NOTREACHED */ 3542 out: 3543 mutex_enter(&so->so_lock); 3544 out_locked: 3545 if (sodp != NULL) { 3546 /* Finish any sodirect and uioa processing */ 3547 mutex_enter(sodp->sod_lockp); 3548 if (suiop != NULL) { 3549 /* Finish any uioa_t processing */ 3550 int ret; 3551 3552 ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 3553 ret = uioafini(suiop, (uioa_t *)uiop); 3554 if (error == 0 && ret != 0) { 3555 /* If no error yet, set it */ 3556 error = ret; 3557 } 3558 if ((mp = sodp->sod_uioafh) != NULL) { 3559 sodp->sod_uioafh = NULL; 3560 sodp->sod_uioaft = NULL; 3561 freemsg(mp); 3562 } 3563 } 3564 ASSERT(sodp->sod_uioafh == NULL); 3565 if (!(sodp->sod_state & SOD_WAKE_NOT)) { 3566 /* Awoke */ 3567 sodp->sod_state &= SOD_WAKE_CLR; 3568 sodp->sod_state |= SOD_WAKE_NOT; 3569 } 3570 /* Last, clear sod_want value */ 3571 sodp->sod_want = 0; 3572 mutex_exit(sodp->sod_lockp); 3573 } 3574 so_unlock_read(so); /* Clear SOREADLOCKED */ 3575 mutex_exit(&so->so_lock); 3576 return (error); 3577 } 3578 3579 /* 3580 * Sending data with options on a datagram socket. 3581 * Assumes caller has verified that SS_ISBOUND etc. are set. 3582 */ 3583 static int 3584 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3585 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3586 { 3587 struct T_unitdata_req tudr; 3588 mblk_t *mp; 3589 int error; 3590 void *addr; 3591 socklen_t addrlen; 3592 void *src; 3593 socklen_t srclen; 3594 ssize_t len; 3595 int size; 3596 struct T_opthdr toh; 3597 struct fdbuf *fdbuf; 3598 t_uscalar_t optlen; 3599 void *fds; 3600 int fdlen; 3601 3602 ASSERT(name && namelen); 3603 ASSERT(control && controllen); 3604 3605 len = uiop->uio_resid; 3606 if (len > (ssize_t)so->so_tidu_size) { 3607 return (EMSGSIZE); 3608 } 3609 3610 /* 3611 * For AF_UNIX the destination address is translated to an internal 3612 * name and the source address is passed as an option. 3613 * Also, file descriptors are passed as file pointers in an 3614 * option. 3615 */ 3616 3617 /* 3618 * Length and family checks. 3619 */ 3620 error = so_addr_verify(so, name, namelen); 3621 if (error) { 3622 eprintsoline(so, error); 3623 return (error); 3624 } 3625 if (so->so_family == AF_UNIX) { 3626 if (so->so_state & SS_FADDR_NOXLATE) { 3627 /* 3628 * Already have a transport internal address. Do not 3629 * pass any (transport internal) source address. 3630 */ 3631 addr = name; 3632 addrlen = namelen; 3633 src = NULL; 3634 srclen = 0; 3635 } else { 3636 /* 3637 * Pass the sockaddr_un source address as an option 3638 * and translate the remote address. 3639 * 3640 * Note that this code does not prevent so_laddr_sa 3641 * from changing while it is being used. Thus 3642 * if an unbind+bind occurs concurrently with this 3643 * send the peer might see a partially new and a 3644 * partially old "from" address. 3645 */ 3646 src = so->so_laddr_sa; 3647 srclen = (t_uscalar_t)so->so_laddr_len; 3648 dprintso(so, 1, 3649 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3650 srclen, src)); 3651 error = so_ux_addr_xlate(so, name, namelen, 3652 (flags & MSG_XPG4_2), 3653 &addr, &addrlen); 3654 if (error) { 3655 eprintsoline(so, error); 3656 return (error); 3657 } 3658 } 3659 } else { 3660 addr = name; 3661 addrlen = namelen; 3662 src = NULL; 3663 srclen = 0; 3664 } 3665 optlen = so_optlen(control, controllen, 3666 !(flags & MSG_XPG4_2)); 3667 tudr.PRIM_type = T_UNITDATA_REQ; 3668 tudr.DEST_length = addrlen; 3669 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3670 if (srclen != 0) 3671 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3672 _TPI_ALIGN_TOPT(srclen)); 3673 else 3674 tudr.OPT_length = optlen; 3675 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3676 _TPI_ALIGN_TOPT(addrlen)); 3677 3678 size = tudr.OPT_offset + tudr.OPT_length; 3679 3680 /* 3681 * File descriptors only when SM_FDPASSING set. 3682 */ 3683 error = so_getfdopt(control, controllen, 3684 !(flags & MSG_XPG4_2), &fds, &fdlen); 3685 if (error) 3686 return (error); 3687 if (fdlen != -1) { 3688 if (!(so->so_mode & SM_FDPASSING)) 3689 return (EOPNOTSUPP); 3690 3691 error = fdbuf_create(fds, fdlen, &fdbuf); 3692 if (error) 3693 return (error); 3694 mp = fdbuf_allocmsg(size, fdbuf); 3695 } else { 3696 mp = soallocproto(size, _ALLOC_INTR); 3697 if (mp == NULL) { 3698 /* 3699 * Caught a signal waiting for memory. 3700 * Let send* return EINTR. 3701 */ 3702 return (EINTR); 3703 } 3704 } 3705 soappendmsg(mp, &tudr, sizeof (tudr)); 3706 soappendmsg(mp, addr, addrlen); 3707 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3708 3709 if (fdlen != -1) { 3710 ASSERT(fdbuf != NULL); 3711 toh.level = SOL_SOCKET; 3712 toh.name = SO_FILEP; 3713 toh.len = fdbuf->fd_size + 3714 (t_uscalar_t)sizeof (struct T_opthdr); 3715 toh.status = 0; 3716 soappendmsg(mp, &toh, sizeof (toh)); 3717 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3718 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3719 } 3720 if (srclen != 0) { 3721 /* 3722 * There is a AF_UNIX sockaddr_un to include as a source 3723 * address option. 3724 */ 3725 toh.level = SOL_SOCKET; 3726 toh.name = SO_SRCADDR; 3727 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3728 toh.status = 0; 3729 soappendmsg(mp, &toh, sizeof (toh)); 3730 soappendmsg(mp, src, srclen); 3731 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3732 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3733 } 3734 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3735 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3736 /* At most 3 bytes left in the message */ 3737 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3738 ASSERT(MBLKL(mp) <= (ssize_t)size); 3739 3740 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3741 if (audit_active) 3742 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3743 3744 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3745 #ifdef SOCK_DEBUG 3746 if (error) { 3747 eprintsoline(so, error); 3748 } 3749 #endif /* SOCK_DEBUG */ 3750 return (error); 3751 } 3752 3753 /* 3754 * Sending data with options on a connected stream socket. 3755 * Assumes caller has verified that SS_ISCONNECTED is set. 3756 */ 3757 static int 3758 sosend_svccmsg(struct sonode *so, 3759 struct uio *uiop, 3760 int more, 3761 void *control, 3762 t_uscalar_t controllen, 3763 int flags) 3764 { 3765 struct T_optdata_req tdr; 3766 mblk_t *mp; 3767 int error; 3768 ssize_t iosize; 3769 int first = 1; 3770 int size; 3771 struct fdbuf *fdbuf; 3772 t_uscalar_t optlen; 3773 void *fds; 3774 int fdlen; 3775 struct T_opthdr toh; 3776 3777 dprintso(so, 1, 3778 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3779 3780 /* 3781 * Has to be bound and connected. However, since no locks are 3782 * held the state could have changed after sotpi_sendmsg checked it 3783 * thus it is not possible to ASSERT on the state. 3784 */ 3785 3786 /* Options on connection-oriented only when SM_OPTDATA set. */ 3787 if (!(so->so_mode & SM_OPTDATA)) 3788 return (EOPNOTSUPP); 3789 3790 do { 3791 /* 3792 * Set the MORE flag if uio_resid does not fit in this 3793 * message or if the caller passed in "more". 3794 * Error for transports with zero tidu_size. 3795 */ 3796 tdr.PRIM_type = T_OPTDATA_REQ; 3797 iosize = so->so_tidu_size; 3798 if (iosize <= 0) 3799 return (EMSGSIZE); 3800 if (uiop->uio_resid > iosize) { 3801 tdr.DATA_flag = 1; 3802 } else { 3803 if (more) 3804 tdr.DATA_flag = 1; 3805 else 3806 tdr.DATA_flag = 0; 3807 iosize = uiop->uio_resid; 3808 } 3809 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3810 tdr.DATA_flag, iosize)); 3811 3812 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3813 tdr.OPT_length = optlen; 3814 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3815 3816 size = (int)sizeof (tdr) + optlen; 3817 /* 3818 * File descriptors only when SM_FDPASSING set. 3819 */ 3820 error = so_getfdopt(control, controllen, 3821 !(flags & MSG_XPG4_2), &fds, &fdlen); 3822 if (error) 3823 return (error); 3824 if (fdlen != -1) { 3825 if (!(so->so_mode & SM_FDPASSING)) 3826 return (EOPNOTSUPP); 3827 3828 error = fdbuf_create(fds, fdlen, &fdbuf); 3829 if (error) 3830 return (error); 3831 mp = fdbuf_allocmsg(size, fdbuf); 3832 } else { 3833 mp = soallocproto(size, _ALLOC_INTR); 3834 if (mp == NULL) { 3835 /* 3836 * Caught a signal waiting for memory. 3837 * Let send* return EINTR. 3838 */ 3839 return (first ? EINTR : 0); 3840 } 3841 } 3842 soappendmsg(mp, &tdr, sizeof (tdr)); 3843 3844 if (fdlen != -1) { 3845 ASSERT(fdbuf != NULL); 3846 toh.level = SOL_SOCKET; 3847 toh.name = SO_FILEP; 3848 toh.len = fdbuf->fd_size + 3849 (t_uscalar_t)sizeof (struct T_opthdr); 3850 toh.status = 0; 3851 soappendmsg(mp, &toh, sizeof (toh)); 3852 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3853 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3854 } 3855 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3856 /* At most 3 bytes left in the message */ 3857 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3858 ASSERT(MBLKL(mp) <= (ssize_t)size); 3859 3860 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3861 3862 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3863 0, MSG_BAND, 0); 3864 if (error) { 3865 if (!first && error == EWOULDBLOCK) 3866 return (0); 3867 eprintsoline(so, error); 3868 return (error); 3869 } 3870 control = NULL; 3871 first = 0; 3872 if (uiop->uio_resid > 0) { 3873 /* 3874 * Recheck for fatal errors. Fail write even though 3875 * some data have been written. This is consistent 3876 * with strwrite semantics and BSD sockets semantics. 3877 */ 3878 if (so->so_state & SS_CANTSENDMORE) { 3879 tsignal(curthread, SIGPIPE); 3880 eprintsoline(so, error); 3881 return (EPIPE); 3882 } 3883 if (so->so_error != 0) { 3884 mutex_enter(&so->so_lock); 3885 error = sogeterr(so); 3886 mutex_exit(&so->so_lock); 3887 if (error != 0) { 3888 eprintsoline(so, error); 3889 return (error); 3890 } 3891 } 3892 } 3893 } while (uiop->uio_resid > 0); 3894 return (0); 3895 } 3896 3897 /* 3898 * Sending data on a datagram socket. 3899 * Assumes caller has verified that SS_ISBOUND etc. are set. 3900 * 3901 * For AF_UNIX the destination address is translated to an internal 3902 * name and the source address is passed as an option. 3903 */ 3904 int 3905 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3906 struct uio *uiop, int flags) 3907 { 3908 struct T_unitdata_req tudr; 3909 mblk_t *mp; 3910 int error; 3911 void *addr; 3912 socklen_t addrlen; 3913 void *src; 3914 socklen_t srclen; 3915 ssize_t len; 3916 3917 ASSERT(name != NULL && namelen != 0); 3918 3919 len = uiop->uio_resid; 3920 if (len > so->so_tidu_size) { 3921 error = EMSGSIZE; 3922 goto done; 3923 } 3924 3925 /* Length and family checks */ 3926 error = so_addr_verify(so, name, namelen); 3927 if (error != 0) 3928 goto done; 3929 3930 if (so->so_state & SS_DIRECT) 3931 return (sodgram_direct(so, name, namelen, uiop, flags)); 3932 3933 if (so->so_family == AF_UNIX) { 3934 if (so->so_state & SS_FADDR_NOXLATE) { 3935 /* 3936 * Already have a transport internal address. Do not 3937 * pass any (transport internal) source address. 3938 */ 3939 addr = name; 3940 addrlen = namelen; 3941 src = NULL; 3942 srclen = 0; 3943 } else { 3944 /* 3945 * Pass the sockaddr_un source address as an option 3946 * and translate the remote address. 3947 * 3948 * Note that this code does not prevent so_laddr_sa 3949 * from changing while it is being used. Thus 3950 * if an unbind+bind occurs concurrently with this 3951 * send the peer might see a partially new and a 3952 * partially old "from" address. 3953 */ 3954 src = so->so_laddr_sa; 3955 srclen = (socklen_t)so->so_laddr_len; 3956 dprintso(so, 1, 3957 ("sosend_dgram UNIX: srclen %d, src %p\n", 3958 srclen, src)); 3959 error = so_ux_addr_xlate(so, name, namelen, 3960 (flags & MSG_XPG4_2), 3961 &addr, &addrlen); 3962 if (error) { 3963 eprintsoline(so, error); 3964 goto done; 3965 } 3966 } 3967 } else { 3968 addr = name; 3969 addrlen = namelen; 3970 src = NULL; 3971 srclen = 0; 3972 } 3973 tudr.PRIM_type = T_UNITDATA_REQ; 3974 tudr.DEST_length = addrlen; 3975 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3976 if (srclen == 0) { 3977 tudr.OPT_length = 0; 3978 tudr.OPT_offset = 0; 3979 3980 mp = soallocproto2(&tudr, sizeof (tudr), 3981 addr, addrlen, 0, _ALLOC_INTR); 3982 if (mp == NULL) { 3983 /* 3984 * Caught a signal waiting for memory. 3985 * Let send* return EINTR. 3986 */ 3987 error = EINTR; 3988 goto done; 3989 } 3990 } else { 3991 /* 3992 * There is a AF_UNIX sockaddr_un to include as a source 3993 * address option. 3994 */ 3995 struct T_opthdr toh; 3996 ssize_t size; 3997 3998 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3999 _TPI_ALIGN_TOPT(srclen)); 4000 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4001 _TPI_ALIGN_TOPT(addrlen)); 4002 4003 toh.level = SOL_SOCKET; 4004 toh.name = SO_SRCADDR; 4005 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4006 toh.status = 0; 4007 4008 size = tudr.OPT_offset + tudr.OPT_length; 4009 mp = soallocproto2(&tudr, sizeof (tudr), 4010 addr, addrlen, size, _ALLOC_INTR); 4011 if (mp == NULL) { 4012 /* 4013 * Caught a signal waiting for memory. 4014 * Let send* return EINTR. 4015 */ 4016 error = EINTR; 4017 goto done; 4018 } 4019 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4020 soappendmsg(mp, &toh, sizeof (toh)); 4021 soappendmsg(mp, src, srclen); 4022 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4023 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4024 } 4025 4026 if (audit_active) 4027 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4028 4029 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4030 done: 4031 #ifdef SOCK_DEBUG 4032 if (error) { 4033 eprintsoline(so, error); 4034 } 4035 #endif /* SOCK_DEBUG */ 4036 return (error); 4037 } 4038 4039 /* 4040 * Sending data on a connected stream socket. 4041 * Assumes caller has verified that SS_ISCONNECTED is set. 4042 */ 4043 int 4044 sosend_svc(struct sonode *so, 4045 struct uio *uiop, 4046 t_scalar_t prim, 4047 int more, 4048 int sflag) 4049 { 4050 struct T_data_req tdr; 4051 mblk_t *mp; 4052 int error; 4053 ssize_t iosize; 4054 int first = 1; 4055 4056 dprintso(so, 1, 4057 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4058 (void *)so, uiop->uio_resid, prim, sflag)); 4059 4060 /* 4061 * Has to be bound and connected. However, since no locks are 4062 * held the state could have changed after sotpi_sendmsg checked it 4063 * thus it is not possible to ASSERT on the state. 4064 */ 4065 4066 do { 4067 /* 4068 * Set the MORE flag if uio_resid does not fit in this 4069 * message or if the caller passed in "more". 4070 * Error for transports with zero tidu_size. 4071 */ 4072 tdr.PRIM_type = prim; 4073 iosize = so->so_tidu_size; 4074 if (iosize <= 0) 4075 return (EMSGSIZE); 4076 if (uiop->uio_resid > iosize) { 4077 tdr.MORE_flag = 1; 4078 } else { 4079 if (more) 4080 tdr.MORE_flag = 1; 4081 else 4082 tdr.MORE_flag = 0; 4083 iosize = uiop->uio_resid; 4084 } 4085 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4086 prim, tdr.MORE_flag, iosize)); 4087 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 4088 if (mp == NULL) { 4089 /* 4090 * Caught a signal waiting for memory. 4091 * Let send* return EINTR. 4092 */ 4093 if (first) 4094 return (EINTR); 4095 else 4096 return (0); 4097 } 4098 4099 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4100 0, sflag | MSG_BAND, 0); 4101 if (error) { 4102 if (!first && error == EWOULDBLOCK) 4103 return (0); 4104 eprintsoline(so, error); 4105 return (error); 4106 } 4107 first = 0; 4108 if (uiop->uio_resid > 0) { 4109 /* 4110 * Recheck for fatal errors. Fail write even though 4111 * some data have been written. This is consistent 4112 * with strwrite semantics and BSD sockets semantics. 4113 */ 4114 if (so->so_state & SS_CANTSENDMORE) { 4115 tsignal(curthread, SIGPIPE); 4116 eprintsoline(so, error); 4117 return (EPIPE); 4118 } 4119 if (so->so_error != 0) { 4120 mutex_enter(&so->so_lock); 4121 error = sogeterr(so); 4122 mutex_exit(&so->so_lock); 4123 if (error != 0) { 4124 eprintsoline(so, error); 4125 return (error); 4126 } 4127 } 4128 } 4129 } while (uiop->uio_resid > 0); 4130 return (0); 4131 } 4132 4133 /* 4134 * Check the state for errors and call the appropriate send function. 4135 * 4136 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4137 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4138 * after sending the message. 4139 */ 4140 static int 4141 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 4142 { 4143 int so_state; 4144 int so_mode; 4145 int error; 4146 struct sockaddr *name; 4147 t_uscalar_t namelen; 4148 int dontroute; 4149 int flags; 4150 4151 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4152 (void *)so, (void *)msg, msg->msg_flags, 4153 pr_state(so->so_state, so->so_mode), so->so_error)); 4154 4155 mutex_enter(&so->so_lock); 4156 so_state = so->so_state; 4157 4158 if (so_state & SS_CANTSENDMORE) { 4159 mutex_exit(&so->so_lock); 4160 tsignal(curthread, SIGPIPE); 4161 return (EPIPE); 4162 } 4163 4164 if (so->so_error != 0) { 4165 error = sogeterr(so); 4166 if (error != 0) { 4167 mutex_exit(&so->so_lock); 4168 return (error); 4169 } 4170 } 4171 4172 name = (struct sockaddr *)msg->msg_name; 4173 namelen = msg->msg_namelen; 4174 4175 so_mode = so->so_mode; 4176 4177 if (name == NULL) { 4178 if (!(so_state & SS_ISCONNECTED)) { 4179 mutex_exit(&so->so_lock); 4180 if (so_mode & SM_CONNREQUIRED) 4181 return (ENOTCONN); 4182 else 4183 return (EDESTADDRREQ); 4184 } 4185 if (so_mode & SM_CONNREQUIRED) { 4186 name = NULL; 4187 namelen = 0; 4188 } else { 4189 /* 4190 * Note that this code does not prevent so_faddr_sa 4191 * from changing while it is being used. Thus 4192 * if an "unconnect"+connect occurs concurrently with 4193 * this send the datagram might be delivered to a 4194 * garbaled address. 4195 */ 4196 ASSERT(so->so_faddr_sa); 4197 name = so->so_faddr_sa; 4198 namelen = (t_uscalar_t)so->so_faddr_len; 4199 } 4200 } else { 4201 if (!(so_state & SS_ISCONNECTED) && 4202 (so_mode & SM_CONNREQUIRED)) { 4203 /* Required but not connected */ 4204 mutex_exit(&so->so_lock); 4205 return (ENOTCONN); 4206 } 4207 /* 4208 * Ignore the address on connection-oriented sockets. 4209 * Just like BSD this code does not generate an error for 4210 * TCP (a CONNREQUIRED socket) when sending to an address 4211 * passed in with sendto/sendmsg. Instead the data is 4212 * delivered on the connection as if no address had been 4213 * supplied. 4214 */ 4215 if ((so_state & SS_ISCONNECTED) && 4216 !(so_mode & SM_CONNREQUIRED)) { 4217 mutex_exit(&so->so_lock); 4218 return (EISCONN); 4219 } 4220 if (!(so_state & SS_ISBOUND)) { 4221 so_lock_single(so); /* Set SOLOCKED */ 4222 error = sotpi_bind(so, NULL, 0, 4223 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 4224 so_unlock_single(so, SOLOCKED); 4225 if (error) { 4226 mutex_exit(&so->so_lock); 4227 eprintsoline(so, error); 4228 return (error); 4229 } 4230 } 4231 /* 4232 * Handle delayed datagram errors. These are only queued 4233 * when the application sets SO_DGRAM_ERRIND. 4234 * Return the error if we are sending to the address 4235 * that was returned in the last T_UDERROR_IND. 4236 * If sending to some other address discard the delayed 4237 * error indication. 4238 */ 4239 if (so->so_delayed_error) { 4240 struct T_uderror_ind *tudi; 4241 void *addr; 4242 t_uscalar_t addrlen; 4243 boolean_t match = B_FALSE; 4244 4245 ASSERT(so->so_eaddr_mp); 4246 error = so->so_delayed_error; 4247 so->so_delayed_error = 0; 4248 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4249 addrlen = tudi->DEST_length; 4250 addr = sogetoff(so->so_eaddr_mp, 4251 tudi->DEST_offset, 4252 addrlen, 1); 4253 ASSERT(addr); /* Checked by strsock_proto */ 4254 switch (so->so_family) { 4255 case AF_INET: { 4256 /* Compare just IP address and port */ 4257 sin_t *sin1 = (sin_t *)name; 4258 sin_t *sin2 = (sin_t *)addr; 4259 4260 if (addrlen == sizeof (sin_t) && 4261 namelen == addrlen && 4262 sin1->sin_port == sin2->sin_port && 4263 sin1->sin_addr.s_addr == 4264 sin2->sin_addr.s_addr) 4265 match = B_TRUE; 4266 break; 4267 } 4268 case AF_INET6: { 4269 /* Compare just IP address and port. Not flow */ 4270 sin6_t *sin1 = (sin6_t *)name; 4271 sin6_t *sin2 = (sin6_t *)addr; 4272 4273 if (addrlen == sizeof (sin6_t) && 4274 namelen == addrlen && 4275 sin1->sin6_port == sin2->sin6_port && 4276 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4277 &sin2->sin6_addr)) 4278 match = B_TRUE; 4279 break; 4280 } 4281 case AF_UNIX: 4282 default: 4283 if (namelen == addrlen && 4284 bcmp(name, addr, namelen) == 0) 4285 match = B_TRUE; 4286 } 4287 if (match) { 4288 freemsg(so->so_eaddr_mp); 4289 so->so_eaddr_mp = NULL; 4290 mutex_exit(&so->so_lock); 4291 #ifdef DEBUG 4292 dprintso(so, 0, 4293 ("sockfs delayed error %d for %s\n", 4294 error, 4295 pr_addr(so->so_family, name, namelen))); 4296 #endif /* DEBUG */ 4297 return (error); 4298 } 4299 freemsg(so->so_eaddr_mp); 4300 so->so_eaddr_mp = NULL; 4301 } 4302 } 4303 mutex_exit(&so->so_lock); 4304 4305 flags = msg->msg_flags; 4306 dontroute = 0; 4307 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4308 uint32_t val; 4309 4310 val = 1; 4311 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4312 &val, (t_uscalar_t)sizeof (val)); 4313 if (error) 4314 return (error); 4315 dontroute = 1; 4316 } 4317 4318 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4319 error = EOPNOTSUPP; 4320 goto done; 4321 } 4322 if (msg->msg_controllen != 0) { 4323 if (!(so_mode & SM_CONNREQUIRED)) { 4324 error = sosend_dgramcmsg(so, name, namelen, uiop, 4325 msg->msg_control, msg->msg_controllen, flags); 4326 } else { 4327 if (flags & MSG_OOB) { 4328 /* Can't generate T_EXDATA_REQ with options */ 4329 error = EOPNOTSUPP; 4330 goto done; 4331 } 4332 error = sosend_svccmsg(so, uiop, 4333 !(flags & MSG_EOR), 4334 msg->msg_control, msg->msg_controllen, 4335 flags); 4336 } 4337 goto done; 4338 } 4339 4340 if (!(so_mode & SM_CONNREQUIRED)) { 4341 /* 4342 * If there is no SO_DONTROUTE to turn off return immediately 4343 * from send_dgram. This can allow tail-call optimizations. 4344 */ 4345 if (!dontroute) { 4346 return (sosend_dgram(so, name, namelen, uiop, flags)); 4347 } 4348 error = sosend_dgram(so, name, namelen, uiop, flags); 4349 } else { 4350 t_scalar_t prim; 4351 int sflag; 4352 4353 /* Ignore msg_name in the connected state */ 4354 if (flags & MSG_OOB) { 4355 prim = T_EXDATA_REQ; 4356 /* 4357 * Send down T_EXDATA_REQ even if there is flow 4358 * control for data. 4359 */ 4360 sflag = MSG_IGNFLOW; 4361 } else { 4362 if (so_mode & SM_BYTESTREAM) { 4363 /* Byte stream transport - use write */ 4364 4365 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4366 /* 4367 * If there is no SO_DONTROUTE to turn off, 4368 * SS_DIRECT is on, and there is no flow 4369 * control, we can take the fast path. 4370 */ 4371 if (!dontroute && 4372 (so_state & SS_DIRECT) && 4373 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4374 return (sostream_direct(so, uiop, 4375 NULL, CRED())); 4376 } 4377 error = strwrite(SOTOV(so), uiop, CRED()); 4378 goto done; 4379 } 4380 prim = T_DATA_REQ; 4381 sflag = 0; 4382 } 4383 /* 4384 * If there is no SO_DONTROUTE to turn off return immediately 4385 * from sosend_svc. This can allow tail-call optimizations. 4386 */ 4387 if (!dontroute) 4388 return (sosend_svc(so, uiop, prim, 4389 !(flags & MSG_EOR), sflag)); 4390 error = sosend_svc(so, uiop, prim, 4391 !(flags & MSG_EOR), sflag); 4392 } 4393 ASSERT(dontroute); 4394 done: 4395 if (dontroute) { 4396 uint32_t val; 4397 4398 val = 0; 4399 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4400 &val, (t_uscalar_t)sizeof (val)); 4401 } 4402 return (error); 4403 } 4404 4405 /* 4406 * Sending data on a datagram socket. 4407 * Assumes caller has verified that SS_ISBOUND etc. are set. 4408 */ 4409 /* ARGSUSED */ 4410 static int 4411 sodgram_direct(struct sonode *so, struct sockaddr *name, 4412 socklen_t namelen, struct uio *uiop, int flags) 4413 { 4414 struct T_unitdata_req tudr; 4415 mblk_t *mp = NULL; 4416 int error = 0; 4417 void *addr; 4418 socklen_t addrlen; 4419 ssize_t len; 4420 struct stdata *stp = SOTOV(so)->v_stream; 4421 int so_state; 4422 queue_t *udp_wq; 4423 boolean_t connected; 4424 mblk_t *mpdata = NULL; 4425 4426 ASSERT(name != NULL && namelen != 0); 4427 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4428 ASSERT(!(so->so_mode & SM_EXDATA)); 4429 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4430 ASSERT(SOTOV(so)->v_type == VSOCK); 4431 4432 /* Caller checked for proper length */ 4433 len = uiop->uio_resid; 4434 ASSERT(len <= so->so_tidu_size); 4435 4436 /* Length and family checks have been done by caller */ 4437 ASSERT(name->sa_family == so->so_family); 4438 ASSERT(so->so_family == AF_INET || 4439 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4440 ASSERT(so->so_family == AF_INET6 || 4441 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4442 4443 addr = name; 4444 addrlen = namelen; 4445 4446 if (stp->sd_sidp != NULL && 4447 (error = straccess(stp, JCWRITE)) != 0) 4448 goto done; 4449 4450 so_state = so->so_state; 4451 4452 connected = so_state & SS_ISCONNECTED; 4453 if (!connected) { 4454 tudr.PRIM_type = T_UNITDATA_REQ; 4455 tudr.DEST_length = addrlen; 4456 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4457 tudr.OPT_length = 0; 4458 tudr.OPT_offset = 0; 4459 4460 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4461 _ALLOC_INTR); 4462 if (mp == NULL) { 4463 /* 4464 * Caught a signal waiting for memory. 4465 * Let send* return EINTR. 4466 */ 4467 error = EINTR; 4468 goto done; 4469 } 4470 } 4471 4472 /* 4473 * For UDP we don't break up the copyin into smaller pieces 4474 * as in the TCP case. That means if ENOMEM is returned by 4475 * mcopyinuio() then the uio vector has not been modified at 4476 * all and we fallback to either strwrite() or kstrputmsg() 4477 * below. Note also that we never generate priority messages 4478 * from here. 4479 */ 4480 udp_wq = stp->sd_wrq->q_next; 4481 if (canput(udp_wq) && 4482 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4483 ASSERT(DB_TYPE(mpdata) == M_DATA); 4484 ASSERT(uiop->uio_resid == 0); 4485 if (!connected) 4486 linkb(mp, mpdata); 4487 else 4488 mp = mpdata; 4489 if (audit_active) 4490 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4491 4492 udp_wput(udp_wq, mp); 4493 return (0); 4494 } 4495 4496 ASSERT(mpdata == NULL); 4497 if (error != 0 && error != ENOMEM) { 4498 freemsg(mp); 4499 return (error); 4500 } 4501 4502 /* 4503 * For connected, let strwrite() handle the blocking case. 4504 * Otherwise we fall thru and use kstrputmsg(). 4505 */ 4506 if (connected) 4507 return (strwrite(SOTOV(so), uiop, CRED())); 4508 4509 if (audit_active) 4510 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4511 4512 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4513 done: 4514 #ifdef SOCK_DEBUG 4515 if (error != 0) { 4516 eprintsoline(so, error); 4517 } 4518 #endif /* SOCK_DEBUG */ 4519 return (error); 4520 } 4521 4522 int 4523 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4524 { 4525 struct stdata *stp = SOTOV(so)->v_stream; 4526 ssize_t iosize, rmax, maxblk; 4527 queue_t *tcp_wq = stp->sd_wrq->q_next; 4528 mblk_t *newmp; 4529 int error = 0, wflag = 0; 4530 4531 ASSERT(so->so_mode & SM_BYTESTREAM); 4532 ASSERT(SOTOV(so)->v_type == VSOCK); 4533 4534 if (stp->sd_sidp != NULL && 4535 (error = straccess(stp, JCWRITE)) != 0) 4536 return (error); 4537 4538 if (uiop == NULL) { 4539 /* 4540 * kstrwritemp() should have checked sd_flag and 4541 * flow-control before coming here. If we end up 4542 * here it means that we can simply pass down the 4543 * data to tcp. 4544 */ 4545 ASSERT(mp != NULL); 4546 if (stp->sd_wputdatafunc != NULL) { 4547 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4548 NULL, NULL, NULL); 4549 if (newmp == NULL) { 4550 /* The caller will free mp */ 4551 return (ECOMM); 4552 } 4553 mp = newmp; 4554 } 4555 tcp_wput(tcp_wq, mp); 4556 return (0); 4557 } 4558 4559 /* Fallback to strwrite() to do proper error handling */ 4560 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4561 return (strwrite(SOTOV(so), uiop, cr)); 4562 4563 rmax = stp->sd_qn_maxpsz; 4564 ASSERT(rmax >= 0 || rmax == INFPSZ); 4565 if (rmax == 0 || uiop->uio_resid <= 0) 4566 return (0); 4567 4568 if (rmax == INFPSZ) 4569 rmax = uiop->uio_resid; 4570 4571 maxblk = stp->sd_maxblk; 4572 4573 for (;;) { 4574 iosize = MIN(uiop->uio_resid, rmax); 4575 4576 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4577 if (mp == NULL) { 4578 /* 4579 * Fallback to strwrite() for ENOMEM; if this 4580 * is our first time in this routine and the uio 4581 * vector has not been modified, we will end up 4582 * calling strwrite() without any flag set. 4583 */ 4584 if (error == ENOMEM) 4585 goto slow_send; 4586 else 4587 return (error); 4588 } 4589 ASSERT(uiop->uio_resid >= 0); 4590 /* 4591 * If mp is non-NULL and ENOMEM is set, it means that 4592 * mcopyinuio() was able to break down some of the user 4593 * data into one or more mblks. Send the partial data 4594 * to tcp and let the rest be handled in strwrite(). 4595 */ 4596 ASSERT(error == 0 || error == ENOMEM); 4597 if (stp->sd_wputdatafunc != NULL) { 4598 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4599 NULL, NULL, NULL); 4600 if (newmp == NULL) { 4601 /* The caller will free mp */ 4602 return (ECOMM); 4603 } 4604 mp = newmp; 4605 } 4606 tcp_wput(tcp_wq, mp); 4607 4608 wflag |= NOINTR; 4609 4610 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4611 ASSERT(error == 0); 4612 break; 4613 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4614 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4615 slow_send: 4616 /* 4617 * We were able to send down partial data using 4618 * the direct call interface, but are now relying 4619 * on strwrite() to handle the non-fastpath cases. 4620 * If the socket is blocking we will sleep in 4621 * strwaitq() until write is permitted, otherwise, 4622 * we will need to return the amount of bytes 4623 * written so far back to the app. This is the 4624 * reason why we pass NOINTR flag to strwrite() 4625 * for non-blocking socket, because we don't want 4626 * to return EAGAIN when portion of the user data 4627 * has actually been sent down. 4628 */ 4629 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4630 } 4631 } 4632 return (0); 4633 } 4634 4635 /* 4636 * Update so_faddr by asking the transport (unless AF_UNIX). 4637 */ 4638 int 4639 sotpi_getpeername(struct sonode *so) 4640 { 4641 struct strbuf strbuf; 4642 int error = 0, res; 4643 void *addr; 4644 t_uscalar_t addrlen; 4645 k_sigset_t smask; 4646 4647 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4648 (void *)so, pr_state(so->so_state, so->so_mode))); 4649 4650 mutex_enter(&so->so_lock); 4651 so_lock_single(so); /* Set SOLOCKED */ 4652 if (!(so->so_state & SS_ISCONNECTED)) { 4653 error = ENOTCONN; 4654 goto done; 4655 } 4656 /* Added this check for X/Open */ 4657 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4658 error = EINVAL; 4659 if (xnet_check_print) { 4660 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4661 } 4662 goto done; 4663 } 4664 #ifdef DEBUG 4665 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4666 pr_addr(so->so_family, so->so_faddr_sa, 4667 (t_uscalar_t)so->so_faddr_len))); 4668 #endif /* DEBUG */ 4669 4670 if (so->so_family == AF_UNIX) { 4671 /* Transport has different name space - return local info */ 4672 error = 0; 4673 goto done; 4674 } 4675 4676 ASSERT(so->so_faddr_sa); 4677 /* Allocate local buffer to use with ioctl */ 4678 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4679 mutex_exit(&so->so_lock); 4680 addr = kmem_alloc(addrlen, KM_SLEEP); 4681 4682 /* 4683 * Issue TI_GETPEERNAME with signals masked. 4684 * Put the result in so_faddr_sa so that getpeername works after 4685 * a shutdown(output). 4686 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4687 * back to the socket. 4688 */ 4689 strbuf.buf = addr; 4690 strbuf.maxlen = addrlen; 4691 strbuf.len = 0; 4692 4693 sigintr(&smask, 0); 4694 res = 0; 4695 ASSERT(CRED()); 4696 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4697 0, K_TO_K, CRED(), &res); 4698 sigunintr(&smask); 4699 4700 mutex_enter(&so->so_lock); 4701 /* 4702 * If there is an error record the error in so_error put don't fail 4703 * the getpeername. Instead fallback on the recorded 4704 * so->so_faddr_sa. 4705 */ 4706 if (error) { 4707 /* 4708 * Various stream head errors can be returned to the ioctl. 4709 * However, it is impossible to determine which ones of 4710 * these are really socket level errors that were incorrectly 4711 * consumed by the ioctl. Thus this code silently ignores the 4712 * error - to code explicitly does not reinstate the error 4713 * using soseterror(). 4714 * Experiments have shows that at least this set of 4715 * errors are reported and should not be reinstated on the 4716 * socket: 4717 * EINVAL E.g. if an I_LINK was in effect when 4718 * getpeername was called. 4719 * EPIPE The ioctl error semantics prefer the write 4720 * side error over the read side error. 4721 * ENOTCONN The transport just got disconnected but 4722 * sockfs had not yet seen the T_DISCON_IND 4723 * when issuing the ioctl. 4724 */ 4725 error = 0; 4726 } else if (res == 0 && strbuf.len > 0 && 4727 (so->so_state & SS_ISCONNECTED)) { 4728 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4729 so->so_faddr_len = (socklen_t)strbuf.len; 4730 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4731 so->so_state |= SS_FADDR_VALID; 4732 } 4733 kmem_free(addr, addrlen); 4734 #ifdef DEBUG 4735 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4736 pr_addr(so->so_family, so->so_faddr_sa, 4737 (t_uscalar_t)so->so_faddr_len))); 4738 #endif /* DEBUG */ 4739 done: 4740 so_unlock_single(so, SOLOCKED); 4741 mutex_exit(&so->so_lock); 4742 return (error); 4743 } 4744 4745 /* 4746 * Update so_laddr by asking the transport (unless AF_UNIX). 4747 */ 4748 int 4749 sotpi_getsockname(struct sonode *so) 4750 { 4751 struct strbuf strbuf; 4752 int error = 0, res; 4753 void *addr; 4754 t_uscalar_t addrlen; 4755 k_sigset_t smask; 4756 4757 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4758 (void *)so, pr_state(so->so_state, so->so_mode))); 4759 4760 mutex_enter(&so->so_lock); 4761 so_lock_single(so); /* Set SOLOCKED */ 4762 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4763 /* Return an all zero address except for the family */ 4764 if (so->so_family == AF_INET) 4765 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4766 else if (so->so_family == AF_INET6) 4767 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4768 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4769 bzero(so->so_laddr_sa, so->so_laddr_len); 4770 /* 4771 * Can not assume there is a sa_family for all 4772 * protocol families. 4773 */ 4774 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4775 so->so_laddr_sa->sa_family = so->so_family; 4776 } 4777 #ifdef DEBUG 4778 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4779 pr_addr(so->so_family, so->so_laddr_sa, 4780 (t_uscalar_t)so->so_laddr_len))); 4781 #endif /* DEBUG */ 4782 if (so->so_family == AF_UNIX) { 4783 /* Transport has different name space - return local info */ 4784 error = 0; 4785 goto done; 4786 } 4787 if (!(so->so_state & SS_ISBOUND)) { 4788 /* If not bound, then nothing to return. */ 4789 error = 0; 4790 goto done; 4791 } 4792 /* Allocate local buffer to use with ioctl */ 4793 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4794 mutex_exit(&so->so_lock); 4795 addr = kmem_alloc(addrlen, KM_SLEEP); 4796 4797 /* 4798 * Issue TI_GETMYNAME with signals masked. 4799 * Put the result in so_laddr_sa so that getsockname works after 4800 * a shutdown(output). 4801 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4802 * back to the socket. 4803 */ 4804 strbuf.buf = addr; 4805 strbuf.maxlen = addrlen; 4806 strbuf.len = 0; 4807 4808 sigintr(&smask, 0); 4809 res = 0; 4810 ASSERT(CRED()); 4811 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4812 0, K_TO_K, CRED(), &res); 4813 sigunintr(&smask); 4814 4815 mutex_enter(&so->so_lock); 4816 /* 4817 * If there is an error record the error in so_error put don't fail 4818 * the getsockname. Instead fallback on the recorded 4819 * so->so_laddr_sa. 4820 */ 4821 if (error) { 4822 /* 4823 * Various stream head errors can be returned to the ioctl. 4824 * However, it is impossible to determine which ones of 4825 * these are really socket level errors that were incorrectly 4826 * consumed by the ioctl. Thus this code silently ignores the 4827 * error - to code explicitly does not reinstate the error 4828 * using soseterror(). 4829 * Experiments have shows that at least this set of 4830 * errors are reported and should not be reinstated on the 4831 * socket: 4832 * EINVAL E.g. if an I_LINK was in effect when 4833 * getsockname was called. 4834 * EPIPE The ioctl error semantics prefer the write 4835 * side error over the read side error. 4836 */ 4837 error = 0; 4838 } else if (res == 0 && strbuf.len > 0 && 4839 (so->so_state & SS_ISBOUND)) { 4840 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4841 so->so_laddr_len = (socklen_t)strbuf.len; 4842 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4843 so->so_state |= SS_LADDR_VALID; 4844 } 4845 kmem_free(addr, addrlen); 4846 #ifdef DEBUG 4847 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4848 pr_addr(so->so_family, so->so_laddr_sa, 4849 (t_uscalar_t)so->so_laddr_len))); 4850 #endif /* DEBUG */ 4851 done: 4852 so_unlock_single(so, SOLOCKED); 4853 mutex_exit(&so->so_lock); 4854 return (error); 4855 } 4856 4857 /* 4858 * Get socket options. For SOL_SOCKET options some options are handled 4859 * by the sockfs while others use the value recorded in the sonode as a 4860 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4861 * 4862 * On the return most *optlenp bytes are copied to optval. 4863 */ 4864 int 4865 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4866 void *optval, socklen_t *optlenp, int flags) 4867 { 4868 struct T_optmgmt_req optmgmt_req; 4869 struct T_optmgmt_ack *optmgmt_ack; 4870 struct opthdr oh; 4871 struct opthdr *opt_res; 4872 mblk_t *mp = NULL; 4873 int error = 0; 4874 void *option = NULL; /* Set if fallback value */ 4875 t_uscalar_t maxlen = *optlenp; 4876 t_uscalar_t len; 4877 uint32_t value; 4878 4879 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4880 (void *)so, level, option_name, optval, (void *)optlenp, 4881 pr_state(so->so_state, so->so_mode))); 4882 4883 mutex_enter(&so->so_lock); 4884 so_lock_single(so); /* Set SOLOCKED */ 4885 4886 /* 4887 * Check for SOL_SOCKET options. 4888 * Certain SOL_SOCKET options are returned directly whereas 4889 * others only provide a default (fallback) value should 4890 * the T_SVR4_OPTMGMT_REQ fail. 4891 */ 4892 if (level == SOL_SOCKET) { 4893 /* Check parameters */ 4894 switch (option_name) { 4895 case SO_TYPE: 4896 case SO_ERROR: 4897 case SO_DEBUG: 4898 case SO_ACCEPTCONN: 4899 case SO_REUSEADDR: 4900 case SO_KEEPALIVE: 4901 case SO_DONTROUTE: 4902 case SO_BROADCAST: 4903 case SO_USELOOPBACK: 4904 case SO_OOBINLINE: 4905 case SO_SNDBUF: 4906 case SO_RCVBUF: 4907 #ifdef notyet 4908 case SO_SNDLOWAT: 4909 case SO_RCVLOWAT: 4910 case SO_SNDTIMEO: 4911 case SO_RCVTIMEO: 4912 #endif /* notyet */ 4913 case SO_DOMAIN: 4914 case SO_DGRAM_ERRIND: 4915 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4916 error = EINVAL; 4917 eprintsoline(so, error); 4918 goto done2; 4919 } 4920 break; 4921 case SO_LINGER: 4922 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4923 error = EINVAL; 4924 eprintsoline(so, error); 4925 goto done2; 4926 } 4927 break; 4928 } 4929 4930 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4931 4932 switch (option_name) { 4933 case SO_TYPE: 4934 value = so->so_type; 4935 option = &value; 4936 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4937 4938 case SO_ERROR: 4939 value = sogeterr(so); 4940 option = &value; 4941 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4942 4943 case SO_ACCEPTCONN: 4944 if (so->so_state & SS_ACCEPTCONN) 4945 value = SO_ACCEPTCONN; 4946 else 4947 value = 0; 4948 #ifdef DEBUG 4949 if (value) { 4950 dprintso(so, 1, 4951 ("sotpi_getsockopt: 0x%x is set\n", 4952 option_name)); 4953 } else { 4954 dprintso(so, 1, 4955 ("sotpi_getsockopt: 0x%x not set\n", 4956 option_name)); 4957 } 4958 #endif /* DEBUG */ 4959 option = &value; 4960 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4961 4962 case SO_DEBUG: 4963 case SO_REUSEADDR: 4964 case SO_KEEPALIVE: 4965 case SO_DONTROUTE: 4966 case SO_BROADCAST: 4967 case SO_USELOOPBACK: 4968 case SO_OOBINLINE: 4969 case SO_DGRAM_ERRIND: 4970 value = (so->so_options & option_name); 4971 #ifdef DEBUG 4972 if (value) { 4973 dprintso(so, 1, 4974 ("sotpi_getsockopt: 0x%x is set\n", 4975 option_name)); 4976 } else { 4977 dprintso(so, 1, 4978 ("sotpi_getsockopt: 0x%x not set\n", 4979 option_name)); 4980 } 4981 #endif /* DEBUG */ 4982 option = &value; 4983 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4984 4985 /* 4986 * The following options are only returned by sockfs when the 4987 * T_SVR4_OPTMGMT_REQ fails. 4988 */ 4989 case SO_LINGER: 4990 option = &so->so_linger; 4991 len = (t_uscalar_t)sizeof (struct linger); 4992 break; 4993 case SO_SNDBUF: { 4994 ssize_t lvalue; 4995 4996 /* 4997 * If the option has not been set then get a default 4998 * value from the read queue. This value is 4999 * returned if the transport fails 5000 * the T_SVR4_OPTMGMT_REQ. 5001 */ 5002 lvalue = so->so_sndbuf; 5003 if (lvalue == 0) { 5004 mutex_exit(&so->so_lock); 5005 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5006 QHIWAT, 0, &lvalue); 5007 mutex_enter(&so->so_lock); 5008 dprintso(so, 1, 5009 ("got SO_SNDBUF %ld from q\n", lvalue)); 5010 } 5011 value = (int)lvalue; 5012 option = &value; 5013 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5014 break; 5015 } 5016 case SO_RCVBUF: { 5017 ssize_t lvalue; 5018 5019 /* 5020 * If the option has not been set then get a default 5021 * value from the read queue. This value is 5022 * returned if the transport fails 5023 * the T_SVR4_OPTMGMT_REQ. 5024 * 5025 * XXX If SO_RCVBUF has been set and this is an 5026 * XPG 4.2 application then do not ask the transport 5027 * since the transport might adjust the value and not 5028 * return exactly what was set by the application. 5029 * For non-XPG 4.2 application we return the value 5030 * that the transport is actually using. 5031 */ 5032 lvalue = so->so_rcvbuf; 5033 if (lvalue == 0) { 5034 mutex_exit(&so->so_lock); 5035 (void) strqget(RD(strvp2wq(SOTOV(so))), 5036 QHIWAT, 0, &lvalue); 5037 mutex_enter(&so->so_lock); 5038 dprintso(so, 1, 5039 ("got SO_RCVBUF %ld from q\n", lvalue)); 5040 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5041 value = (int)lvalue; 5042 option = &value; 5043 goto copyout; /* skip asking transport */ 5044 } 5045 value = (int)lvalue; 5046 option = &value; 5047 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5048 break; 5049 } 5050 case SO_DOMAIN: 5051 value = so->so_family; 5052 option = &value; 5053 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5054 5055 #ifdef notyet 5056 /* 5057 * We do not implement the semantics of these options 5058 * thus we shouldn't implement the options either. 5059 */ 5060 case SO_SNDLOWAT: 5061 value = so->so_sndlowat; 5062 option = &value; 5063 break; 5064 case SO_RCVLOWAT: 5065 value = so->so_rcvlowat; 5066 option = &value; 5067 break; 5068 case SO_SNDTIMEO: 5069 value = so->so_sndtimeo; 5070 option = &value; 5071 break; 5072 case SO_RCVTIMEO: 5073 value = so->so_rcvtimeo; 5074 option = &value; 5075 break; 5076 #endif /* notyet */ 5077 } 5078 } 5079 5080 mutex_exit(&so->so_lock); 5081 5082 /* Send request */ 5083 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5084 optmgmt_req.MGMT_flags = T_CHECK; 5085 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5086 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5087 5088 oh.level = level; 5089 oh.name = option_name; 5090 oh.len = maxlen; 5091 5092 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5093 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 5094 /* Let option management work in the presence of data flow control */ 5095 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5096 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5097 mp = NULL; 5098 mutex_enter(&so->so_lock); 5099 if (error) { 5100 eprintsoline(so, error); 5101 goto done2; 5102 } 5103 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5104 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5105 if (error) { 5106 if (option != NULL) { 5107 /* We have a fallback value */ 5108 error = 0; 5109 goto copyout; 5110 } 5111 eprintsoline(so, error); 5112 goto done2; 5113 } 5114 ASSERT(mp); 5115 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5116 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5117 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5118 if (opt_res == NULL) { 5119 if (option != NULL) { 5120 /* We have a fallback value */ 5121 error = 0; 5122 goto copyout; 5123 } 5124 error = EPROTO; 5125 eprintsoline(so, error); 5126 goto done; 5127 } 5128 option = &opt_res[1]; 5129 5130 /* check to ensure that the option is within bounds */ 5131 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5132 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5133 if (option != NULL) { 5134 /* We have a fallback value */ 5135 error = 0; 5136 goto copyout; 5137 } 5138 error = EPROTO; 5139 eprintsoline(so, error); 5140 goto done; 5141 } 5142 5143 len = opt_res->len; 5144 5145 copyout: { 5146 t_uscalar_t size = MIN(len, maxlen); 5147 bcopy(option, optval, size); 5148 bcopy(&size, optlenp, sizeof (size)); 5149 } 5150 done: 5151 freemsg(mp); 5152 done2: 5153 so_unlock_single(so, SOLOCKED); 5154 mutex_exit(&so->so_lock); 5155 return (error); 5156 } 5157 5158 /* 5159 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5160 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5161 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5162 * setsockopt has to work even if the transport does not support the option. 5163 */ 5164 int 5165 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5166 const void *optval, t_uscalar_t optlen) 5167 { 5168 struct T_optmgmt_req optmgmt_req; 5169 struct opthdr oh; 5170 mblk_t *mp; 5171 int error = 0; 5172 boolean_t handled = B_FALSE; 5173 5174 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5175 (void *)so, level, option_name, optval, optlen, 5176 pr_state(so->so_state, so->so_mode))); 5177 5178 5179 /* X/Open requires this check */ 5180 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5181 if (xnet_check_print) 5182 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5183 return (EINVAL); 5184 } 5185 5186 /* Caller allocates aligned optval, or passes null */ 5187 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 5188 /* If optval is null optlen is 0, and vice-versa */ 5189 ASSERT(optval != NULL || optlen == 0); 5190 ASSERT(optlen != 0 || optval == NULL); 5191 5192 mutex_enter(&so->so_lock); 5193 so_lock_single(so); /* Set SOLOCKED */ 5194 mutex_exit(&so->so_lock); 5195 5196 /* 5197 * For SOCKET or TCP level options, try to set it here itself 5198 * provided socket has not been popped and we know the tcp 5199 * structure (stored in so_priv). 5200 */ 5201 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5202 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5203 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 5204 tcp_t *tcp = so->so_priv; 5205 boolean_t onoff; 5206 5207 #define intvalue (*(int32_t *)optval) 5208 5209 switch (level) { 5210 case SOL_SOCKET: 5211 switch (option_name) { /* Check length param */ 5212 case SO_DEBUG: 5213 case SO_REUSEADDR: 5214 case SO_DONTROUTE: 5215 case SO_BROADCAST: 5216 case SO_USELOOPBACK: 5217 case SO_OOBINLINE: 5218 case SO_DGRAM_ERRIND: 5219 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5220 error = EINVAL; 5221 eprintsoline(so, error); 5222 mutex_enter(&so->so_lock); 5223 goto done2; 5224 } 5225 ASSERT(optval); 5226 onoff = intvalue != 0; 5227 handled = B_TRUE; 5228 break; 5229 case SO_LINGER: 5230 if (optlen != 5231 (t_uscalar_t)sizeof (struct linger)) { 5232 error = EINVAL; 5233 eprintsoline(so, error); 5234 mutex_enter(&so->so_lock); 5235 goto done2; 5236 } 5237 ASSERT(optval); 5238 handled = B_TRUE; 5239 break; 5240 } 5241 5242 switch (option_name) { /* Do actions */ 5243 case SO_LINGER: { 5244 struct linger *lgr = (struct linger *)optval; 5245 5246 if (lgr->l_onoff) { 5247 tcp->tcp_linger = 1; 5248 tcp->tcp_lingertime = lgr->l_linger; 5249 so->so_linger.l_onoff = SO_LINGER; 5250 so->so_options |= SO_LINGER; 5251 } else { 5252 tcp->tcp_linger = 0; 5253 tcp->tcp_lingertime = 0; 5254 so->so_linger.l_onoff = 0; 5255 so->so_options &= ~SO_LINGER; 5256 } 5257 so->so_linger.l_linger = lgr->l_linger; 5258 handled = B_TRUE; 5259 break; 5260 } 5261 case SO_DEBUG: 5262 tcp->tcp_debug = onoff; 5263 #ifdef SOCK_TEST 5264 if (intvalue & 2) 5265 sock_test_timelimit = 10 * hz; 5266 else 5267 sock_test_timelimit = 0; 5268 5269 if (intvalue & 4) 5270 do_useracc = 0; 5271 else 5272 do_useracc = 1; 5273 #endif /* SOCK_TEST */ 5274 break; 5275 case SO_DONTROUTE: 5276 /* 5277 * SO_DONTROUTE, SO_USELOOPBACK and 5278 * SO_BROADCAST are only of interest to IP. 5279 * We track them here only so 5280 * that we can report their current value. 5281 */ 5282 tcp->tcp_dontroute = onoff; 5283 if (onoff) 5284 so->so_options |= option_name; 5285 else 5286 so->so_options &= ~option_name; 5287 break; 5288 case SO_USELOOPBACK: 5289 tcp->tcp_useloopback = onoff; 5290 if (onoff) 5291 so->so_options |= option_name; 5292 else 5293 so->so_options &= ~option_name; 5294 break; 5295 case SO_BROADCAST: 5296 tcp->tcp_broadcast = onoff; 5297 if (onoff) 5298 so->so_options |= option_name; 5299 else 5300 so->so_options &= ~option_name; 5301 break; 5302 case SO_REUSEADDR: 5303 tcp->tcp_reuseaddr = onoff; 5304 if (onoff) 5305 so->so_options |= option_name; 5306 else 5307 so->so_options &= ~option_name; 5308 break; 5309 case SO_OOBINLINE: 5310 tcp->tcp_oobinline = onoff; 5311 if (onoff) 5312 so->so_options |= option_name; 5313 else 5314 so->so_options &= ~option_name; 5315 break; 5316 case SO_DGRAM_ERRIND: 5317 tcp->tcp_dgram_errind = onoff; 5318 if (onoff) 5319 so->so_options |= option_name; 5320 else 5321 so->so_options &= ~option_name; 5322 break; 5323 } 5324 break; 5325 case IPPROTO_TCP: 5326 switch (option_name) { 5327 case TCP_NODELAY: 5328 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5329 error = EINVAL; 5330 eprintsoline(so, error); 5331 mutex_enter(&so->so_lock); 5332 goto done2; 5333 } 5334 ASSERT(optval); 5335 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5336 handled = B_TRUE; 5337 break; 5338 } 5339 break; 5340 default: 5341 handled = B_FALSE; 5342 break; 5343 } 5344 } 5345 5346 if (handled) { 5347 mutex_enter(&so->so_lock); 5348 goto done2; 5349 } 5350 5351 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5352 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5353 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5354 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5355 5356 oh.level = level; 5357 oh.name = option_name; 5358 oh.len = optlen; 5359 5360 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5361 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5362 /* Let option management work in the presence of data flow control */ 5363 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5364 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5365 mp = NULL; 5366 mutex_enter(&so->so_lock); 5367 if (error) { 5368 eprintsoline(so, error); 5369 goto done; 5370 } 5371 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5372 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5373 if (error) { 5374 eprintsoline(so, error); 5375 goto done; 5376 } 5377 ASSERT(mp); 5378 /* No need to verify T_optmgmt_ack */ 5379 freemsg(mp); 5380 done: 5381 /* 5382 * Check for SOL_SOCKET options and record their values. 5383 * If we know about a SOL_SOCKET parameter and the transport 5384 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5385 * EPROTO) we let the setsockopt succeed. 5386 */ 5387 if (level == SOL_SOCKET) { 5388 /* Check parameters */ 5389 switch (option_name) { 5390 case SO_DEBUG: 5391 case SO_REUSEADDR: 5392 case SO_KEEPALIVE: 5393 case SO_DONTROUTE: 5394 case SO_BROADCAST: 5395 case SO_USELOOPBACK: 5396 case SO_OOBINLINE: 5397 case SO_SNDBUF: 5398 case SO_RCVBUF: 5399 #ifdef notyet 5400 case SO_SNDLOWAT: 5401 case SO_RCVLOWAT: 5402 case SO_SNDTIMEO: 5403 case SO_RCVTIMEO: 5404 #endif /* notyet */ 5405 case SO_DGRAM_ERRIND: 5406 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5407 error = EINVAL; 5408 eprintsoline(so, error); 5409 goto done2; 5410 } 5411 ASSERT(optval); 5412 handled = B_TRUE; 5413 break; 5414 case SO_LINGER: 5415 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5416 error = EINVAL; 5417 eprintsoline(so, error); 5418 goto done2; 5419 } 5420 ASSERT(optval); 5421 handled = B_TRUE; 5422 break; 5423 } 5424 5425 #define intvalue (*(int32_t *)optval) 5426 5427 switch (option_name) { 5428 case SO_TYPE: 5429 case SO_ERROR: 5430 case SO_ACCEPTCONN: 5431 /* Can't be set */ 5432 error = ENOPROTOOPT; 5433 goto done2; 5434 case SO_LINGER: { 5435 struct linger *l = (struct linger *)optval; 5436 5437 so->so_linger.l_linger = l->l_linger; 5438 if (l->l_onoff) { 5439 so->so_linger.l_onoff = SO_LINGER; 5440 so->so_options |= SO_LINGER; 5441 } else { 5442 so->so_linger.l_onoff = 0; 5443 so->so_options &= ~SO_LINGER; 5444 } 5445 break; 5446 } 5447 5448 case SO_DEBUG: 5449 #ifdef SOCK_TEST 5450 if (intvalue & 2) 5451 sock_test_timelimit = 10 * hz; 5452 else 5453 sock_test_timelimit = 0; 5454 5455 if (intvalue & 4) 5456 do_useracc = 0; 5457 else 5458 do_useracc = 1; 5459 #endif /* SOCK_TEST */ 5460 /* FALLTHRU */ 5461 case SO_REUSEADDR: 5462 case SO_KEEPALIVE: 5463 case SO_DONTROUTE: 5464 case SO_BROADCAST: 5465 case SO_USELOOPBACK: 5466 case SO_OOBINLINE: 5467 case SO_DGRAM_ERRIND: 5468 if (intvalue != 0) { 5469 dprintso(so, 1, 5470 ("sotpi_setsockopt: setting 0x%x\n", 5471 option_name)); 5472 so->so_options |= option_name; 5473 } else { 5474 dprintso(so, 1, 5475 ("sotpi_setsockopt: clearing 0x%x\n", 5476 option_name)); 5477 so->so_options &= ~option_name; 5478 } 5479 break; 5480 /* 5481 * The following options are only returned by us when the 5482 * T_SVR4_OPTMGMT_REQ fails. 5483 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5484 * since the transport might adjust the value and not 5485 * return exactly what was set by the application. 5486 */ 5487 case SO_SNDBUF: 5488 so->so_sndbuf = intvalue; 5489 break; 5490 case SO_RCVBUF: 5491 so->so_rcvbuf = intvalue; 5492 break; 5493 #ifdef notyet 5494 /* 5495 * We do not implement the semantics of these options 5496 * thus we shouldn't implement the options either. 5497 */ 5498 case SO_SNDLOWAT: 5499 so->so_sndlowat = intvalue; 5500 break; 5501 case SO_RCVLOWAT: 5502 so->so_rcvlowat = intvalue; 5503 break; 5504 case SO_SNDTIMEO: 5505 so->so_sndtimeo = intvalue; 5506 break; 5507 case SO_RCVTIMEO: 5508 so->so_rcvtimeo = intvalue; 5509 break; 5510 #endif /* notyet */ 5511 } 5512 #undef intvalue 5513 5514 if (error) { 5515 if ((error == ENOPROTOOPT || error == EPROTO || 5516 error == EINVAL) && handled) { 5517 dprintso(so, 1, 5518 ("setsockopt: ignoring error %d for 0x%x\n", 5519 error, option_name)); 5520 error = 0; 5521 } 5522 } 5523 } 5524 done2: 5525 ret: 5526 so_unlock_single(so, SOLOCKED); 5527 mutex_exit(&so->so_lock); 5528 return (error); 5529 } 5530