1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/sysmacros.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/time.h> 41 #include <sys/file.h> 42 #include <sys/open.h> 43 #include <sys/user.h> 44 #include <sys/termios.h> 45 #include <sys/stream.h> 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/ddi.h> 49 #include <sys/esunddi.h> 50 #include <sys/flock.h> 51 #include <sys/modctl.h> 52 #include <sys/vtrace.h> 53 #include <sys/cmn_err.h> 54 #include <sys/pathname.h> 55 56 #include <sys/socket.h> 57 #include <sys/socketvar.h> 58 #include <sys/sockio.h> 59 #include <sys/sodirect.h> 60 #include <netinet/in.h> 61 #include <sys/un.h> 62 #include <sys/strsun.h> 63 64 #include <sys/tiuser.h> 65 #define _SUN_TPI_VERSION 2 66 #include <sys/tihdr.h> 67 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 68 69 #include <c2/audit.h> 70 71 #include <inet/common.h> 72 #include <inet/ip.h> 73 #include <inet/ip6.h> 74 #include <inet/tcp.h> 75 #include <inet/udp_impl.h> 76 77 #include <sys/zone.h> 78 79 #include <fs/sockfs/nl7c.h> 80 #include <fs/sockfs/nl7curi.h> 81 82 #include <inet/kssl/ksslapi.h> 83 84 /* 85 * Possible failures when memory can't be allocated. The documented behavior: 86 * 87 * 5.5: 4.X: XNET: 88 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 89 * EINTR 90 * (4.X does not document EINTR but returns it) 91 * bind: ENOSR - ENOBUFS/ENOSR 92 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 93 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 94 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 95 * (4.X getpeername and getsockname do not fail in practice) 96 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 97 * listen: - - ENOBUFS 98 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 99 * EINTR 100 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 101 * EINTR 102 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 103 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 104 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 105 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 106 * 107 * Resolution. When allocation fails: 108 * recv: return EINTR 109 * send: return EINTR 110 * connect, accept: EINTR 111 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 112 * socket, socketpair: ENOBUFS 113 * getpeername, getsockname: sleep 114 * getsockopt, setsockopt: sleep 115 */ 116 117 #ifdef SOCK_TEST 118 /* 119 * Variables that make sockfs do something other than the standard TPI 120 * for the AF_INET transports. 121 * 122 * solisten_tpi_tcp: 123 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 124 * the transport is already bound. This is needed to avoid loosing the 125 * port number should listen() do a T_UNBIND_REQ followed by a 126 * O_T_BIND_REQ. 127 * 128 * soconnect_tpi_udp: 129 * UDP and ICMP can handle a T_CONN_REQ. 130 * This is needed to make the sequence of connect(), getsockname() 131 * return the local IP address used to send packets to the connected to 132 * destination. 133 * 134 * soconnect_tpi_tcp: 135 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 136 * Set this to non-zero to send TPI conformant messages to TCP in this 137 * respect. This is a performance optimization. 138 * 139 * soaccept_tpi_tcp: 140 * TCP can handle a T_CONN_REQ without the acceptor being bound. 141 * This is a performance optimization that has been picked up in XTI. 142 * 143 * soaccept_tpi_multioptions: 144 * When inheriting SOL_SOCKET options from the listener to the accepting 145 * socket send them as a single message for AF_INET{,6}. 146 */ 147 int solisten_tpi_tcp = 0; 148 int soconnect_tpi_udp = 0; 149 int soconnect_tpi_tcp = 0; 150 int soaccept_tpi_tcp = 0; 151 int soaccept_tpi_multioptions = 1; 152 #else /* SOCK_TEST */ 153 #define soconnect_tpi_tcp 0 154 #define soconnect_tpi_udp 0 155 #define solisten_tpi_tcp 0 156 #define soaccept_tpi_tcp 0 157 #define soaccept_tpi_multioptions 1 158 #endif /* SOCK_TEST */ 159 160 #ifdef SOCK_TEST 161 extern int do_useracc; 162 extern clock_t sock_test_timelimit; 163 #endif /* SOCK_TEST */ 164 165 /* 166 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 167 * applications working. Turn on this flag to disable these checks. 168 */ 169 int xnet_skip_checks = 0; 170 int xnet_check_print = 0; 171 int xnet_truncate_print = 0; 172 173 extern void sigintr(k_sigset_t *, int); 174 extern void sigunintr(k_sigset_t *); 175 176 extern void *nl7c_lookup_addr(void *, t_uscalar_t); 177 extern void *nl7c_add_addr(void *, t_uscalar_t); 178 extern void nl7c_listener_addr(void *, struct sonode *); 179 180 /* Sockets acting as an in-kernel SSL proxy */ 181 extern mblk_t *strsock_kssl_input(vnode_t *, mblk_t *, strwakeup_t *, 182 strsigset_t *, strsigset_t *, strpollset_t *); 183 extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *, 184 strsigset_t *, strsigset_t *, strpollset_t *); 185 186 static int sotpi_unbind(struct sonode *, int); 187 188 extern int sodput(sodirect_t *, mblk_t *); 189 extern void sodwakeup(sodirect_t *); 190 191 /* TPI sockfs sonode operations */ 192 static int sotpi_accept(struct sonode *, int, struct sonode **); 193 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 194 int); 195 static int sotpi_connect(struct sonode *, const struct sockaddr *, 196 socklen_t, int, int); 197 static int sotpi_listen(struct sonode *, int); 198 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 199 struct uio *); 200 static int sotpi_shutdown(struct sonode *, int); 201 static int sotpi_getsockname(struct sonode *); 202 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 203 struct uio *, void *, t_uscalar_t, int); 204 static int sodgram_direct(struct sonode *, struct sockaddr *, 205 socklen_t, struct uio *, int); 206 207 sonodeops_t sotpi_sonodeops = { 208 sotpi_accept, /* sop_accept */ 209 sotpi_bind, /* sop_bind */ 210 sotpi_listen, /* sop_listen */ 211 sotpi_connect, /* sop_connect */ 212 sotpi_recvmsg, /* sop_recvmsg */ 213 sotpi_sendmsg, /* sop_sendmsg */ 214 sotpi_getpeername, /* sop_getpeername */ 215 sotpi_getsockname, /* sop_getsockname */ 216 sotpi_shutdown, /* sop_shutdown */ 217 sotpi_getsockopt, /* sop_getsockopt */ 218 sotpi_setsockopt /* sop_setsockopt */ 219 }; 220 221 /* 222 * Common create code for socket and accept. If tso is set the values 223 * from that node is used instead of issuing a T_INFO_REQ. 224 * 225 * Assumes that the caller has a VN_HOLD on accessvp. 226 * The VN_RELE will occur either when sotpi_create() fails or when 227 * the returned sonode is freed. 228 */ 229 struct sonode * 230 sotpi_create(vnode_t *accessvp, int domain, int type, int protocol, int version, 231 struct sonode *tso, int *errorp) 232 { 233 struct sonode *so; 234 vnode_t *vp; 235 int flags, error; 236 237 ASSERT(accessvp != NULL); 238 vp = makesockvp(accessvp, domain, type, protocol); 239 ASSERT(vp != NULL); 240 so = VTOSO(vp); 241 242 flags = FREAD|FWRITE; 243 244 if ((type == SOCK_STREAM || type == SOCK_DGRAM) && 245 (domain == AF_INET || domain == AF_INET6) && 246 (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP || 247 protocol == IPPROTO_IP)) { 248 /* Tell tcp or udp that it's talking to sockets */ 249 flags |= SO_SOCKSTR; 250 251 /* 252 * Here we indicate to socktpi_open() our attempt to 253 * make direct calls between sockfs and transport. 254 * The final decision is left to socktpi_open(). 255 */ 256 so->so_state |= SS_DIRECT; 257 258 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 259 if (so->so_type == SOCK_STREAM && tso != NULL) { 260 if (tso->so_state & SS_DIRECT) { 261 /* 262 * Inherit SS_DIRECT from listener and pass 263 * SO_ACCEPTOR open flag to tcp, indicating 264 * that this is an accept fast-path instance. 265 */ 266 flags |= SO_ACCEPTOR; 267 } else { 268 /* 269 * SS_DIRECT is not set on listener, meaning 270 * that the listener has been converted from 271 * a socket to a stream. Ensure that the 272 * acceptor inherits these settings. 273 */ 274 so->so_state &= ~SS_DIRECT; 275 flags &= ~SO_SOCKSTR; 276 } 277 } 278 } 279 280 /* 281 * Tell local transport that it is talking to sockets. 282 */ 283 if (so->so_family == AF_UNIX) { 284 flags |= SO_SOCKSTR; 285 } 286 287 /* Initialize the kernel SSL proxy fields */ 288 so->so_kssl_type = KSSL_NO_PROXY; 289 so->so_kssl_ent = NULL; 290 so->so_kssl_ctx = NULL; 291 292 if (error = socktpi_open(&vp, flags, CRED(), NULL)) { 293 VN_RELE(vp); 294 *errorp = error; 295 return (NULL); 296 } 297 298 if (error = so_strinit(so, tso)) { 299 (void) VOP_CLOSE(vp, 0, 1, 0, CRED(), NULL); 300 VN_RELE(vp); 301 *errorp = error; 302 return (NULL); 303 } 304 305 if (version == SOV_DEFAULT) 306 version = so_default_version; 307 308 so->so_version = (short)version; 309 310 return (so); 311 } 312 313 /* 314 * Bind the socket to an unspecified address in sockfs only. 315 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 316 * required in all cases. 317 */ 318 static void 319 so_automatic_bind(struct sonode *so) 320 { 321 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 322 323 ASSERT(MUTEX_HELD(&so->so_lock)); 324 ASSERT(!(so->so_state & SS_ISBOUND)); 325 ASSERT(so->so_unbind_mp); 326 327 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 328 bzero(so->so_laddr_sa, so->so_laddr_len); 329 so->so_laddr_sa->sa_family = so->so_family; 330 so->so_state |= SS_ISBOUND; 331 } 332 333 334 /* 335 * bind the socket. 336 * 337 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 338 * are passed in we allow rebinding. Note that for backwards compatibility 339 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 340 * Thus the rebinding code is currently not executed. 341 * 342 * The constraints for rebinding are: 343 * - it is a SOCK_DGRAM, or 344 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 345 * and no listen() has been done. 346 * This rebinding code was added based on some language in the XNET book 347 * about not returning EINVAL it the protocol allows rebinding. However, 348 * this language is not present in the Posix socket draft. Thus maybe the 349 * rebinding logic should be deleted from the source. 350 * 351 * A null "name" can be used to unbind the socket if: 352 * - it is a SOCK_DGRAM, or 353 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 354 * and no listen() has been done. 355 */ 356 static int 357 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 358 socklen_t namelen, int backlog, int flags) 359 { 360 struct T_bind_req bind_req; 361 struct T_bind_ack *bind_ack; 362 int error = 0; 363 mblk_t *mp; 364 void *addr; 365 t_uscalar_t addrlen; 366 int unbind_on_err = 1; 367 boolean_t clear_acceptconn_on_err = B_FALSE; 368 boolean_t restore_backlog_on_err = B_FALSE; 369 int save_so_backlog; 370 t_scalar_t PRIM_type = O_T_BIND_REQ; 371 boolean_t tcp_udp_xport; 372 void *nl7c = NULL; 373 374 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 375 (void *)so, (void *)name, namelen, backlog, flags, 376 pr_state(so->so_state, so->so_mode))); 377 378 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 379 380 if (!(flags & _SOBIND_LOCK_HELD)) { 381 mutex_enter(&so->so_lock); 382 so_lock_single(so); /* Set SOLOCKED */ 383 } else { 384 ASSERT(MUTEX_HELD(&so->so_lock)); 385 ASSERT(so->so_flag & SOLOCKED); 386 } 387 388 /* 389 * Make sure that there is a preallocated unbind_req message 390 * before binding. This message allocated when the socket is 391 * created but it might be have been consumed. 392 */ 393 if (so->so_unbind_mp == NULL) { 394 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 395 /* NOTE: holding so_lock while sleeping */ 396 so->so_unbind_mp = 397 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP); 398 } 399 400 if (flags & _SOBIND_REBIND) { 401 /* 402 * Called from solisten after doing an sotpi_unbind() or 403 * potentially without the unbind (latter for AF_INET{,6}). 404 */ 405 ASSERT(name == NULL && namelen == 0); 406 407 if (so->so_family == AF_UNIX) { 408 ASSERT(so->so_ux_bound_vp); 409 addr = &so->so_ux_laddr; 410 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 411 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 412 "addr 0x%p, vp %p\n", 413 addrlen, 414 (void *)((struct so_ux_addr *)addr)->soua_vp, 415 (void *)so->so_ux_bound_vp)); 416 } else { 417 addr = so->so_laddr_sa; 418 addrlen = (t_uscalar_t)so->so_laddr_len; 419 } 420 } else if (flags & _SOBIND_UNSPEC) { 421 ASSERT(name == NULL && namelen == 0); 422 423 /* 424 * The caller checked SS_ISBOUND but not necessarily 425 * under so_lock 426 */ 427 if (so->so_state & SS_ISBOUND) { 428 /* No error */ 429 goto done; 430 } 431 432 /* Set an initial local address */ 433 switch (so->so_family) { 434 case AF_UNIX: 435 /* 436 * Use an address with same size as struct sockaddr 437 * just like BSD. 438 */ 439 so->so_laddr_len = 440 (socklen_t)sizeof (struct sockaddr); 441 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 442 bzero(so->so_laddr_sa, so->so_laddr_len); 443 so->so_laddr_sa->sa_family = so->so_family; 444 445 /* 446 * Pass down an address with the implicit bind 447 * magic number and the rest all zeros. 448 * The transport will return a unique address. 449 */ 450 so->so_ux_laddr.soua_vp = NULL; 451 so->so_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 452 addr = &so->so_ux_laddr; 453 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 454 break; 455 456 case AF_INET: 457 case AF_INET6: 458 /* 459 * An unspecified bind in TPI has a NULL address. 460 * Set the address in sockfs to have the sa_family. 461 */ 462 so->so_laddr_len = (so->so_family == AF_INET) ? 463 (socklen_t)sizeof (sin_t) : 464 (socklen_t)sizeof (sin6_t); 465 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 466 bzero(so->so_laddr_sa, so->so_laddr_len); 467 so->so_laddr_sa->sa_family = so->so_family; 468 addr = NULL; 469 addrlen = 0; 470 break; 471 472 default: 473 /* 474 * An unspecified bind in TPI has a NULL address. 475 * Set the address in sockfs to be zero length. 476 * 477 * Can not assume there is a sa_family for all 478 * protocol families. For example, AF_X25 does not 479 * have a family field. 480 */ 481 bzero(so->so_laddr_sa, so->so_laddr_len); 482 so->so_laddr_len = 0; /* XXX correct? */ 483 addr = NULL; 484 addrlen = 0; 485 break; 486 } 487 488 } else { 489 if (so->so_state & SS_ISBOUND) { 490 /* 491 * If it is ok to rebind the socket, first unbind 492 * with the transport. A rebind to the NULL address 493 * is interpreted as an unbind. 494 * Note that a bind to NULL in BSD does unbind the 495 * socket but it fails with EINVAL. 496 * Note that regular sockets set SOV_SOCKBSD i.e. 497 * _SOBIND_SOCKBSD gets set here hence no type of 498 * socket does currently allow rebinding. 499 * 500 * If the name is NULL just do an unbind. 501 */ 502 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 503 name != NULL) { 504 error = EINVAL; 505 unbind_on_err = 0; 506 eprintsoline(so, error); 507 goto done; 508 } 509 if ((so->so_mode & SM_CONNREQUIRED) && 510 (so->so_state & SS_CANTREBIND)) { 511 error = EINVAL; 512 unbind_on_err = 0; 513 eprintsoline(so, error); 514 goto done; 515 } 516 error = sotpi_unbind(so, 0); 517 if (error) { 518 eprintsoline(so, error); 519 goto done; 520 } 521 ASSERT(!(so->so_state & SS_ISBOUND)); 522 if (name == NULL) { 523 so->so_state &= 524 ~(SS_ISCONNECTED|SS_ISCONNECTING); 525 goto done; 526 } 527 } 528 /* X/Open requires this check */ 529 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 530 if (xnet_check_print) { 531 printf("sockfs: X/Open bind state check " 532 "caused EINVAL\n"); 533 } 534 error = EINVAL; 535 goto done; 536 } 537 538 switch (so->so_family) { 539 case AF_UNIX: 540 /* 541 * All AF_UNIX addresses are nul terminated 542 * when copied (copyin_name) in so the minimum 543 * length is 3 bytes. 544 */ 545 if (name == NULL || 546 (ssize_t)namelen <= sizeof (short) + 1) { 547 error = EISDIR; 548 eprintsoline(so, error); 549 goto done; 550 } 551 /* 552 * Verify so_family matches the bound family. 553 * BSD does not check this for AF_UNIX resulting 554 * in funny mknods. 555 */ 556 if (name->sa_family != so->so_family) { 557 error = EAFNOSUPPORT; 558 goto done; 559 } 560 break; 561 case AF_INET: 562 if (name == NULL) { 563 error = EINVAL; 564 eprintsoline(so, error); 565 goto done; 566 } 567 if ((size_t)namelen != sizeof (sin_t)) { 568 error = name->sa_family != so->so_family ? 569 EAFNOSUPPORT : EINVAL; 570 eprintsoline(so, error); 571 goto done; 572 } 573 if ((flags & _SOBIND_XPG4_2) && 574 (name->sa_family != so->so_family)) { 575 /* 576 * This check has to be made for X/Open 577 * sockets however application failures have 578 * been observed when it is applied to 579 * all sockets. 580 */ 581 error = EAFNOSUPPORT; 582 eprintsoline(so, error); 583 goto done; 584 } 585 /* 586 * Force a zero sa_family to match so_family. 587 * 588 * Some programs like inetd(1M) don't set the 589 * family field. Other programs leave 590 * sin_family set to garbage - SunOS 4.X does 591 * not check the family field on a bind. 592 * We use the family field that 593 * was passed in to the socket() call. 594 */ 595 name->sa_family = so->so_family; 596 break; 597 598 case AF_INET6: { 599 #ifdef DEBUG 600 sin6_t *sin6 = (sin6_t *)name; 601 #endif /* DEBUG */ 602 603 if (name == NULL) { 604 error = EINVAL; 605 eprintsoline(so, error); 606 goto done; 607 } 608 if ((size_t)namelen != sizeof (sin6_t)) { 609 error = name->sa_family != so->so_family ? 610 EAFNOSUPPORT : EINVAL; 611 eprintsoline(so, error); 612 goto done; 613 } 614 if (name->sa_family != so->so_family) { 615 /* 616 * With IPv6 we require the family to match 617 * unlike in IPv4. 618 */ 619 error = EAFNOSUPPORT; 620 eprintsoline(so, error); 621 goto done; 622 } 623 #ifdef DEBUG 624 /* 625 * Verify that apps don't forget to clear 626 * sin6_scope_id etc 627 */ 628 if (sin6->sin6_scope_id != 0 && 629 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 630 zcmn_err(getzoneid(), CE_WARN, 631 "bind with uninitialized sin6_scope_id " 632 "(%d) on socket. Pid = %d\n", 633 (int)sin6->sin6_scope_id, 634 (int)curproc->p_pid); 635 } 636 if (sin6->__sin6_src_id != 0) { 637 zcmn_err(getzoneid(), CE_WARN, 638 "bind with uninitialized __sin6_src_id " 639 "(%d) on socket. Pid = %d\n", 640 (int)sin6->__sin6_src_id, 641 (int)curproc->p_pid); 642 } 643 #endif /* DEBUG */ 644 break; 645 } 646 default: 647 /* 648 * Don't do any length or sa_family check to allow 649 * non-sockaddr style addresses. 650 */ 651 if (name == NULL) { 652 error = EINVAL; 653 eprintsoline(so, error); 654 goto done; 655 } 656 break; 657 } 658 659 if (namelen > (t_uscalar_t)so->so_laddr_maxlen) { 660 error = ENAMETOOLONG; 661 eprintsoline(so, error); 662 goto done; 663 } 664 /* 665 * Save local address. 666 */ 667 so->so_laddr_len = (socklen_t)namelen; 668 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 669 bcopy(name, so->so_laddr_sa, namelen); 670 671 addr = so->so_laddr_sa; 672 addrlen = (t_uscalar_t)so->so_laddr_len; 673 switch (so->so_family) { 674 case AF_INET6: 675 case AF_INET: 676 break; 677 case AF_UNIX: { 678 struct sockaddr_un *soun = 679 (struct sockaddr_un *)so->so_laddr_sa; 680 struct vnode *vp, *rvp; 681 struct vattr vattr; 682 683 ASSERT(so->so_ux_bound_vp == NULL); 684 /* 685 * Create vnode for the specified path name. 686 * Keep vnode held with a reference in so_ux_bound_vp. 687 * Use the vnode pointer as the address used in the 688 * bind with the transport. 689 * 690 * Use the same mode as in BSD. In particular this does 691 * not observe the umask. 692 */ 693 /* MAXPATHLEN + soun_family + nul termination */ 694 if (so->so_laddr_len > 695 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 696 error = ENAMETOOLONG; 697 eprintsoline(so, error); 698 goto done; 699 } 700 vattr.va_type = VSOCK; 701 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 702 vattr.va_mask = AT_TYPE|AT_MODE; 703 /* NOTE: holding so_lock */ 704 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 705 EXCL, 0, &vp, CRMKNOD, 0, 0); 706 if (error) { 707 if (error == EEXIST) 708 error = EADDRINUSE; 709 eprintsoline(so, error); 710 goto done; 711 } 712 /* 713 * Establish pointer from the underlying filesystem 714 * vnode to the socket node. 715 * so_ux_bound_vp and v_stream->sd_vnode form the 716 * cross-linkage between the underlying filesystem 717 * node and the socket node. 718 */ 719 720 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 721 VN_HOLD(rvp); 722 VN_RELE(vp); 723 vp = rvp; 724 } 725 726 ASSERT(SOTOV(so)->v_stream); 727 mutex_enter(&vp->v_lock); 728 vp->v_stream = SOTOV(so)->v_stream; 729 so->so_ux_bound_vp = vp; 730 mutex_exit(&vp->v_lock); 731 732 /* 733 * Use the vnode pointer value as a unique address 734 * (together with the magic number to avoid conflicts 735 * with implicit binds) in the transport provider. 736 */ 737 so->so_ux_laddr.soua_vp = (void *)so->so_ux_bound_vp; 738 so->so_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 739 addr = &so->so_ux_laddr; 740 addrlen = (t_uscalar_t)sizeof (so->so_ux_laddr); 741 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 742 addrlen, 743 ((struct so_ux_addr *)addr)->soua_vp)); 744 break; 745 } 746 } /* end switch (so->so_family) */ 747 } 748 749 /* 750 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 751 * the transport can start passing up T_CONN_IND messages 752 * as soon as it receives the bind req and strsock_proto() 753 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 754 */ 755 if (flags & _SOBIND_LISTEN) { 756 if ((so->so_state & SS_ACCEPTCONN) == 0) 757 clear_acceptconn_on_err = B_TRUE; 758 save_so_backlog = so->so_backlog; 759 restore_backlog_on_err = B_TRUE; 760 so->so_state |= SS_ACCEPTCONN; 761 so->so_backlog = backlog; 762 } 763 764 /* 765 * If NL7C addr(s) have been configured check for addr/port match, 766 * or if an implicit NL7C socket via AF_NCA mark socket as NL7C. 767 * 768 * NL7C supports the TCP transport only so check AF_INET and AF_INET6 769 * family sockets only. If match mark as such. 770 */ 771 if (nl7c_enabled && ((addr != NULL && 772 (so->so_family == AF_INET || so->so_family == AF_INET6) && 773 (nl7c = nl7c_lookup_addr(addr, addrlen))) || 774 so->so_nl7c_flags == NL7C_AF_NCA)) { 775 /* 776 * NL7C is not supported in non-global zones, 777 * we enforce this restriction here. 778 */ 779 if (so->so_zoneid == GLOBAL_ZONEID) { 780 /* An NL7C socket, mark it */ 781 so->so_nl7c_flags |= NL7C_ENABLED; 782 if (nl7c == NULL) { 783 /* 784 * Was an AF_NCA bind() so add it to the 785 * addr list for reporting purposes. 786 */ 787 nl7c = nl7c_add_addr(addr, addrlen); 788 } 789 } else 790 nl7c = NULL; 791 } 792 /* 793 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 794 * for other transports we will send in a O_T_BIND_REQ. 795 */ 796 if (tcp_udp_xport && 797 (so->so_family == AF_INET || so->so_family == AF_INET6)) 798 PRIM_type = T_BIND_REQ; 799 800 bind_req.PRIM_type = PRIM_type; 801 bind_req.ADDR_length = addrlen; 802 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 803 bind_req.CONIND_number = backlog; 804 /* NOTE: holding so_lock while sleeping */ 805 mp = soallocproto2(&bind_req, sizeof (bind_req), 806 addr, addrlen, 0, _ALLOC_SLEEP); 807 so->so_state &= ~SS_LADDR_VALID; 808 809 /* Done using so_laddr_sa - can drop the lock */ 810 mutex_exit(&so->so_lock); 811 812 /* 813 * Intercept the bind_req message here to check if this <address/port> 814 * was configured as an SSL proxy server, or if another endpoint was 815 * already configured to act as a proxy for us. 816 * 817 * Note, only if NL7C not enabled for this socket. 818 */ 819 if (nl7c == NULL && 820 (so->so_family == AF_INET || so->so_family == AF_INET6) && 821 so->so_type == SOCK_STREAM) { 822 823 if (so->so_kssl_ent != NULL) { 824 kssl_release_ent(so->so_kssl_ent, so, so->so_kssl_type); 825 so->so_kssl_ent = NULL; 826 } 827 828 so->so_kssl_type = kssl_check_proxy(mp, so, &so->so_kssl_ent); 829 switch (so->so_kssl_type) { 830 case KSSL_NO_PROXY: 831 break; 832 833 case KSSL_HAS_PROXY: 834 mutex_enter(&so->so_lock); 835 goto skip_transport; 836 837 case KSSL_IS_PROXY: 838 break; 839 } 840 } 841 842 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 843 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 844 if (error) { 845 eprintsoline(so, error); 846 mutex_enter(&so->so_lock); 847 goto done; 848 } 849 850 mutex_enter(&so->so_lock); 851 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 852 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 853 if (error) { 854 eprintsoline(so, error); 855 goto done; 856 } 857 skip_transport: 858 ASSERT(mp); 859 /* 860 * Even if some TPI message (e.g. T_DISCON_IND) was received in 861 * strsock_proto while the lock was dropped above, the bind 862 * is allowed to complete. 863 */ 864 865 /* Mark as bound. This will be undone if we detect errors below. */ 866 if (flags & _SOBIND_NOXLATE) { 867 ASSERT(so->so_family == AF_UNIX); 868 so->so_state |= SS_FADDR_NOXLATE; 869 } 870 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 871 so->so_state |= SS_ISBOUND; 872 ASSERT(so->so_unbind_mp); 873 874 /* note that we've already set SS_ACCEPTCONN above */ 875 876 /* 877 * Recompute addrlen - an unspecied bind sent down an 878 * address of length zero but we expect the appropriate length 879 * in return. 880 */ 881 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 882 sizeof (so->so_ux_laddr) : so->so_laddr_len); 883 884 bind_ack = (struct T_bind_ack *)mp->b_rptr; 885 /* 886 * The alignment restriction is really too strict but 887 * we want enough alignment to inspect the fields of 888 * a sockaddr_in. 889 */ 890 addr = sogetoff(mp, bind_ack->ADDR_offset, 891 bind_ack->ADDR_length, 892 __TPI_ALIGN_SIZE); 893 if (addr == NULL) { 894 freemsg(mp); 895 error = EPROTO; 896 eprintsoline(so, error); 897 goto done; 898 } 899 if (!(flags & _SOBIND_UNSPEC)) { 900 /* 901 * Verify that the transport didn't return something we 902 * did not want e.g. an address other than what we asked for. 903 * 904 * NOTE: These checks would go away if/when we switch to 905 * using the new TPI (in which the transport would fail 906 * the request instead of assigning a different address). 907 * 908 * NOTE2: For protocols that we don't know (i.e. any 909 * other than AF_INET6, AF_INET and AF_UNIX), we 910 * cannot know if the transport should be expected to 911 * return the same address as that requested. 912 * 913 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 914 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 915 * 916 * For example, in the case of netatalk it may be 917 * inappropriate for the transport to return the 918 * requested address (as it may have allocated a local 919 * port number in behaviour similar to that of an 920 * AF_INET bind request with a port number of zero). 921 * 922 * Given the definition of O_T_BIND_REQ, where the 923 * transport may bind to an address other than the 924 * requested address, it's not possible to determine 925 * whether a returned address that differs from the 926 * requested address is a reason to fail (because the 927 * requested address was not available) or succeed 928 * (because the transport allocated an appropriate 929 * address and/or port). 930 * 931 * sockfs currently requires that the transport return 932 * the requested address in the T_BIND_ACK, unless 933 * there is code here to allow for any discrepancy. 934 * Such code exists for AF_INET and AF_INET6. 935 * 936 * Netatalk chooses to return the requested address 937 * rather than the (correct) allocated address. This 938 * means that netatalk violates the TPI specification 939 * (and would not function correctly if used from a 940 * TLI application), but it does mean that it works 941 * with sockfs. 942 * 943 * As noted above, using the newer XTI bind primitive 944 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 945 * allow sockfs to be more sure about whether or not 946 * the bind request had succeeded (as transports are 947 * not permitted to bind to a different address than 948 * that requested - they must return failure). 949 * Unfortunately, support for T_BIND_REQ may not be 950 * present in all transport implementations (netatalk, 951 * for example, doesn't have it), making the 952 * transition difficult. 953 */ 954 if (bind_ack->ADDR_length != addrlen) { 955 /* Assumes that the requested address was in use */ 956 freemsg(mp); 957 error = EADDRINUSE; 958 eprintsoline(so, error); 959 goto done; 960 } 961 962 switch (so->so_family) { 963 case AF_INET6: 964 case AF_INET: { 965 sin_t *rname, *aname; 966 967 rname = (sin_t *)addr; 968 aname = (sin_t *)so->so_laddr_sa; 969 970 /* 971 * Take advantage of the alignment 972 * of sin_port and sin6_port which fall 973 * in the same place in their data structures. 974 * Just use sin_port for either address family. 975 * 976 * This may become a problem if (heaven forbid) 977 * there's a separate ipv6port_reserved... :-P 978 * 979 * Binding to port 0 has the semantics of letting 980 * the transport bind to any port. 981 * 982 * If the transport is TCP or UDP since we had sent 983 * a T_BIND_REQ we would not get a port other than 984 * what we asked for. 985 */ 986 if (tcp_udp_xport) { 987 /* 988 * Pick up the new port number if we bound to 989 * port 0. 990 */ 991 if (aname->sin_port == 0) 992 aname->sin_port = rname->sin_port; 993 so->so_state |= SS_LADDR_VALID; 994 break; 995 } 996 if (aname->sin_port != 0 && 997 aname->sin_port != rname->sin_port) { 998 freemsg(mp); 999 error = EADDRINUSE; 1000 eprintsoline(so, error); 1001 goto done; 1002 } 1003 /* 1004 * Pick up the new port number if we bound to port 0. 1005 */ 1006 aname->sin_port = rname->sin_port; 1007 1008 /* 1009 * Unfortunately, addresses aren't _quite_ the same. 1010 */ 1011 if (so->so_family == AF_INET) { 1012 if (aname->sin_addr.s_addr != 1013 rname->sin_addr.s_addr) { 1014 freemsg(mp); 1015 error = EADDRNOTAVAIL; 1016 eprintsoline(so, error); 1017 goto done; 1018 } 1019 } else { 1020 sin6_t *rname6 = (sin6_t *)rname; 1021 sin6_t *aname6 = (sin6_t *)aname; 1022 1023 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1024 &rname6->sin6_addr)) { 1025 freemsg(mp); 1026 error = EADDRNOTAVAIL; 1027 eprintsoline(so, error); 1028 goto done; 1029 } 1030 } 1031 break; 1032 } 1033 case AF_UNIX: 1034 if (bcmp(addr, &so->so_ux_laddr, addrlen) != 0) { 1035 freemsg(mp); 1036 error = EADDRINUSE; 1037 eprintsoline(so, error); 1038 eprintso(so, 1039 ("addrlen %d, addr 0x%x, vp %p\n", 1040 addrlen, *((int *)addr), 1041 (void *)so->so_ux_bound_vp)); 1042 goto done; 1043 } 1044 so->so_state |= SS_LADDR_VALID; 1045 break; 1046 default: 1047 /* 1048 * NOTE: This assumes that addresses can be 1049 * byte-compared for equivalence. 1050 */ 1051 if (bcmp(addr, so->so_laddr_sa, addrlen) != 0) { 1052 freemsg(mp); 1053 error = EADDRINUSE; 1054 eprintsoline(so, error); 1055 goto done; 1056 } 1057 /* 1058 * Don't mark SS_LADDR_VALID, as we cannot be 1059 * sure that the returned address is the real 1060 * bound address when talking to an unknown 1061 * transport. 1062 */ 1063 break; 1064 } 1065 } else { 1066 /* 1067 * Save for returned address for getsockname. 1068 * Needed for unspecific bind unless transport supports 1069 * the TI_GETMYNAME ioctl. 1070 * Do this for AF_INET{,6} even though they do, as 1071 * caching info here is much better performance than 1072 * a TPI/STREAMS trip to the transport for getsockname. 1073 * Any which can't for some reason _must_ _not_ set 1074 * LADDR_VALID here for the caching version of getsockname 1075 * to not break; 1076 */ 1077 switch (so->so_family) { 1078 case AF_UNIX: 1079 /* 1080 * Record the address bound with the transport 1081 * for use by socketpair. 1082 */ 1083 bcopy(addr, &so->so_ux_laddr, addrlen); 1084 so->so_state |= SS_LADDR_VALID; 1085 break; 1086 case AF_INET: 1087 case AF_INET6: 1088 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 1089 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 1090 so->so_state |= SS_LADDR_VALID; 1091 break; 1092 default: 1093 /* 1094 * Don't mark SS_LADDR_VALID, as we cannot be 1095 * sure that the returned address is the real 1096 * bound address when talking to an unknown 1097 * transport. 1098 */ 1099 break; 1100 } 1101 } 1102 1103 if (nl7c != NULL) { 1104 /* Register listen()er sonode pointer with NL7C */ 1105 nl7c_listener_addr(nl7c, so); 1106 } 1107 1108 freemsg(mp); 1109 1110 done: 1111 if (error) { 1112 /* reset state & backlog to values held on entry */ 1113 if (clear_acceptconn_on_err == B_TRUE) 1114 so->so_state &= ~SS_ACCEPTCONN; 1115 if (restore_backlog_on_err == B_TRUE) 1116 so->so_backlog = save_so_backlog; 1117 1118 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1119 int err; 1120 1121 err = sotpi_unbind(so, 0); 1122 /* LINTED - statement has no consequent: if */ 1123 if (err) { 1124 eprintsoline(so, error); 1125 } else { 1126 ASSERT(!(so->so_state & SS_ISBOUND)); 1127 } 1128 } 1129 } 1130 if (!(flags & _SOBIND_LOCK_HELD)) { 1131 so_unlock_single(so, SOLOCKED); 1132 mutex_exit(&so->so_lock); 1133 } else { 1134 /* If the caller held the lock don't release it here */ 1135 ASSERT(MUTEX_HELD(&so->so_lock)); 1136 ASSERT(so->so_flag & SOLOCKED); 1137 } 1138 return (error); 1139 } 1140 1141 /* bind the socket */ 1142 static int 1143 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1144 int flags) 1145 { 1146 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1147 return (sotpi_bindlisten(so, name, namelen, 0, flags)); 1148 1149 flags &= ~_SOBIND_SOCKETPAIR; 1150 return (sotpi_bindlisten(so, name, namelen, 1, flags)); 1151 } 1152 1153 /* 1154 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1155 * address, or when listen needs to unbind and bind. 1156 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1157 * so that a sobind can pick them up. 1158 */ 1159 static int 1160 sotpi_unbind(struct sonode *so, int flags) 1161 { 1162 struct T_unbind_req unbind_req; 1163 int error = 0; 1164 mblk_t *mp; 1165 1166 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1167 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1168 1169 ASSERT(MUTEX_HELD(&so->so_lock)); 1170 ASSERT(so->so_flag & SOLOCKED); 1171 1172 if (!(so->so_state & SS_ISBOUND)) { 1173 error = EINVAL; 1174 eprintsoline(so, error); 1175 goto done; 1176 } 1177 1178 mutex_exit(&so->so_lock); 1179 1180 /* 1181 * Flush the read and write side (except stream head read queue) 1182 * and send down T_UNBIND_REQ. 1183 */ 1184 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1185 1186 unbind_req.PRIM_type = T_UNBIND_REQ; 1187 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1188 0, _ALLOC_SLEEP); 1189 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1190 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1191 mutex_enter(&so->so_lock); 1192 if (error) { 1193 eprintsoline(so, error); 1194 goto done; 1195 } 1196 1197 error = sowaitokack(so, T_UNBIND_REQ); 1198 if (error) { 1199 eprintsoline(so, error); 1200 goto done; 1201 } 1202 1203 /* 1204 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1205 * strsock_proto while the lock was dropped above, the unbind 1206 * is allowed to complete. 1207 */ 1208 if (!(flags & _SOUNBIND_REBIND)) { 1209 /* 1210 * Clear out bound address. 1211 */ 1212 vnode_t *vp; 1213 1214 if ((vp = so->so_ux_bound_vp) != NULL) { 1215 1216 /* Undo any SSL proxy setup */ 1217 if ((so->so_family == AF_INET || 1218 so->so_family == AF_INET6) && 1219 (so->so_type == SOCK_STREAM) && 1220 (so->so_kssl_ent != NULL)) { 1221 kssl_release_ent(so->so_kssl_ent, so, 1222 so->so_kssl_type); 1223 so->so_kssl_ent = NULL; 1224 so->so_kssl_type = KSSL_NO_PROXY; 1225 } 1226 1227 so->so_ux_bound_vp = NULL; 1228 vn_rele_stream(vp); 1229 } 1230 /* Clear out address */ 1231 so->so_laddr_len = 0; 1232 } 1233 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID); 1234 1235 done: 1236 1237 /* If the caller held the lock don't release it here */ 1238 ASSERT(MUTEX_HELD(&so->so_lock)); 1239 ASSERT(so->so_flag & SOLOCKED); 1240 1241 return (error); 1242 } 1243 1244 /* 1245 * listen on the socket. 1246 * For TPI conforming transports this has to first unbind with the transport 1247 * and then bind again using the new backlog. 1248 */ 1249 int 1250 sotpi_listen(struct sonode *so, int backlog) 1251 { 1252 int error = 0; 1253 1254 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1255 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1256 1257 if (so->so_serv_type == T_CLTS) 1258 return (EOPNOTSUPP); 1259 1260 /* 1261 * If the socket is ready to accept connections already, then 1262 * return without doing anything. This avoids a problem where 1263 * a second listen() call fails if a connection is pending and 1264 * leaves the socket unbound. Only when we are not unbinding 1265 * with the transport can we safely increase the backlog. 1266 */ 1267 if (so->so_state & SS_ACCEPTCONN && 1268 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1269 /*CONSTCOND*/ 1270 !solisten_tpi_tcp)) 1271 return (0); 1272 1273 if (so->so_state & SS_ISCONNECTED) 1274 return (EINVAL); 1275 1276 mutex_enter(&so->so_lock); 1277 so_lock_single(so); /* Set SOLOCKED */ 1278 1279 if (backlog < 0) 1280 backlog = 0; 1281 /* 1282 * Use the same qlimit as in BSD. BSD checks the qlimit 1283 * before queuing the next connection implying that a 1284 * listen(sock, 0) allows one connection to be queued. 1285 * BSD also uses 1.5 times the requested backlog. 1286 * 1287 * XNS Issue 4 required a strict interpretation of the backlog. 1288 * This has been waived subsequently for Issue 4 and the change 1289 * incorporated in XNS Issue 5. So we aren't required to do 1290 * anything special for XPG apps. 1291 */ 1292 if (backlog >= (INT_MAX - 1) / 3) 1293 backlog = INT_MAX; 1294 else 1295 backlog = backlog * 3 / 2 + 1; 1296 1297 /* 1298 * If the listen doesn't change the backlog we do nothing. 1299 * This avoids an EPROTO error from the transport. 1300 */ 1301 if ((so->so_state & SS_ACCEPTCONN) && 1302 so->so_backlog == backlog) 1303 goto done; 1304 1305 if (!(so->so_state & SS_ISBOUND)) { 1306 /* 1307 * Must have been explicitly bound in the UNIX domain. 1308 */ 1309 if (so->so_family == AF_UNIX) { 1310 error = EINVAL; 1311 goto done; 1312 } 1313 error = sotpi_bindlisten(so, NULL, 0, backlog, 1314 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1315 } else if (backlog > 0) { 1316 /* 1317 * AF_INET{,6} hack to avoid losing the port. 1318 * Assumes that all AF_INET{,6} transports can handle a 1319 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1320 * has already bound thus it is possible to avoid the unbind. 1321 */ 1322 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1323 /*CONSTCOND*/ 1324 !solisten_tpi_tcp)) { 1325 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1326 if (error) 1327 goto done; 1328 } 1329 error = sotpi_bindlisten(so, NULL, 0, backlog, 1330 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN); 1331 } else { 1332 so->so_state |= SS_ACCEPTCONN; 1333 so->so_backlog = backlog; 1334 } 1335 if (error) 1336 goto done; 1337 ASSERT(so->so_state & SS_ACCEPTCONN); 1338 done: 1339 so_unlock_single(so, SOLOCKED); 1340 mutex_exit(&so->so_lock); 1341 return (error); 1342 } 1343 1344 /* 1345 * Disconnect either a specified seqno or all (-1). 1346 * The former is used on listening sockets only. 1347 * 1348 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1349 * the current use of sodisconnect(seqno == -1) is only for shutdown 1350 * so there is no point (and potentially incorrect) to unbind. 1351 */ 1352 int 1353 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1354 { 1355 struct T_discon_req discon_req; 1356 int error = 0; 1357 mblk_t *mp; 1358 1359 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1360 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1361 1362 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1363 mutex_enter(&so->so_lock); 1364 so_lock_single(so); /* Set SOLOCKED */ 1365 } else { 1366 ASSERT(MUTEX_HELD(&so->so_lock)); 1367 ASSERT(so->so_flag & SOLOCKED); 1368 } 1369 1370 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1371 error = EINVAL; 1372 eprintsoline(so, error); 1373 goto done; 1374 } 1375 1376 mutex_exit(&so->so_lock); 1377 /* 1378 * Flush the write side (unless this is a listener) 1379 * and then send down a T_DISCON_REQ. 1380 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1381 * and other messages.) 1382 */ 1383 if (!(so->so_state & SS_ACCEPTCONN)) 1384 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1385 1386 discon_req.PRIM_type = T_DISCON_REQ; 1387 discon_req.SEQ_number = seqno; 1388 mp = soallocproto1(&discon_req, sizeof (discon_req), 1389 0, _ALLOC_SLEEP); 1390 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1391 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1392 mutex_enter(&so->so_lock); 1393 if (error) { 1394 eprintsoline(so, error); 1395 goto done; 1396 } 1397 1398 error = sowaitokack(so, T_DISCON_REQ); 1399 if (error) { 1400 eprintsoline(so, error); 1401 goto done; 1402 } 1403 /* 1404 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1405 * strsock_proto while the lock was dropped above, the disconnect 1406 * is allowed to complete. However, it is not possible to 1407 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1408 */ 1409 so->so_state &= 1410 ~(SS_ISCONNECTED|SS_ISCONNECTING|SS_LADDR_VALID|SS_FADDR_VALID); 1411 done: 1412 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1413 so_unlock_single(so, SOLOCKED); 1414 mutex_exit(&so->so_lock); 1415 } else { 1416 /* If the caller held the lock don't release it here */ 1417 ASSERT(MUTEX_HELD(&so->so_lock)); 1418 ASSERT(so->so_flag & SOLOCKED); 1419 } 1420 return (error); 1421 } 1422 1423 int 1424 sotpi_accept(struct sonode *so, int fflag, struct sonode **nsop) 1425 { 1426 struct T_conn_ind *conn_ind; 1427 struct T_conn_res *conn_res; 1428 int error = 0; 1429 mblk_t *mp, *ctxmp, *ack_mp; 1430 struct sonode *nso; 1431 vnode_t *nvp; 1432 void *src; 1433 t_uscalar_t srclen; 1434 void *opt; 1435 t_uscalar_t optlen; 1436 t_scalar_t PRIM_type; 1437 t_scalar_t SEQ_number; 1438 size_t sinlen; 1439 1440 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1441 (void *)so, fflag, (void *)nsop, 1442 pr_state(so->so_state, so->so_mode))); 1443 1444 /* 1445 * Defer single-threading the accepting socket until 1446 * the T_CONN_IND has been received and parsed and the 1447 * new sonode has been opened. 1448 */ 1449 1450 /* Check that we are not already connected */ 1451 if ((so->so_state & SS_ACCEPTCONN) == 0) 1452 goto conn_bad; 1453 again: 1454 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1455 goto e_bad; 1456 1457 ASSERT(mp); 1458 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1459 ctxmp = mp->b_cont; 1460 1461 /* 1462 * Save SEQ_number for error paths. 1463 */ 1464 SEQ_number = conn_ind->SEQ_number; 1465 1466 srclen = conn_ind->SRC_length; 1467 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1468 if (src == NULL) { 1469 error = EPROTO; 1470 freemsg(mp); 1471 eprintsoline(so, error); 1472 goto disconnect_unlocked; 1473 } 1474 optlen = conn_ind->OPT_length; 1475 switch (so->so_family) { 1476 case AF_INET: 1477 case AF_INET6: 1478 if ((optlen == sizeof (intptr_t)) && 1479 ((so->so_state & SS_DIRECT) != 0)) { 1480 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1481 &opt, conn_ind->OPT_length); 1482 } else { 1483 /* 1484 * The transport (in this case TCP) hasn't sent up 1485 * a pointer to an instance for the accept fast-path. 1486 * Disable fast-path completely because the call to 1487 * sotpi_create() below would otherwise create an 1488 * incomplete TCP instance, which would lead to 1489 * problems when sockfs sends a normal T_CONN_RES 1490 * message down the new stream. 1491 */ 1492 if (so->so_state & SS_DIRECT) { 1493 int rval; 1494 /* 1495 * For consistency we inform tcp to disable 1496 * direct interface on the listener, though 1497 * we can certainly live without doing this 1498 * because no data will ever travel upstream 1499 * on the listening socket. 1500 */ 1501 so->so_state &= ~SS_DIRECT; 1502 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1503 0, 0, K_TO_K, CRED(), &rval); 1504 } 1505 opt = NULL; 1506 optlen = 0; 1507 } 1508 break; 1509 case AF_UNIX: 1510 default: 1511 if (optlen != 0) { 1512 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1513 __TPI_ALIGN_SIZE); 1514 if (opt == NULL) { 1515 error = EPROTO; 1516 freemsg(mp); 1517 eprintsoline(so, error); 1518 goto disconnect_unlocked; 1519 } 1520 } 1521 if (so->so_family == AF_UNIX) { 1522 if (!(so->so_state & SS_FADDR_NOXLATE)) { 1523 src = NULL; 1524 srclen = 0; 1525 } 1526 /* Extract src address from options */ 1527 if (optlen != 0) 1528 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1529 } 1530 break; 1531 } 1532 1533 /* 1534 * Create the new socket. 1535 */ 1536 VN_HOLD(so->so_accessvp); 1537 nso = sotpi_create(so->so_accessvp, so->so_family, so->so_type, 1538 so->so_protocol, so->so_version, so, &error); 1539 if (nso == NULL) { 1540 ASSERT(error != 0); 1541 /* 1542 * Accept can not fail with ENOBUFS. sotpi_create 1543 * sleeps waiting for memory until a signal is caught 1544 * so return EINTR. 1545 */ 1546 freemsg(mp); 1547 if (error == ENOBUFS) 1548 error = EINTR; 1549 goto e_disc_unl; 1550 } 1551 nvp = SOTOV(nso); 1552 1553 /* 1554 * If the transport sent up an SSL connection context, then attach 1555 * it the new socket, and set the (sd_wputdatafunc)() and 1556 * (sd_rputdatafunc)() stream head hooks to intercept and process 1557 * SSL records. 1558 */ 1559 if (ctxmp != NULL) { 1560 /* 1561 * This kssl_ctx_t is already held for us by the transport. 1562 * So, we don't need to do a kssl_hold_ctx() here. 1563 */ 1564 nso->so_kssl_ctx = *((kssl_ctx_t *)ctxmp->b_rptr); 1565 freemsg(ctxmp); 1566 mp->b_cont = NULL; 1567 strsetrwputdatahooks(nvp, strsock_kssl_input, 1568 strsock_kssl_output); 1569 1570 /* Disable sodirect if any */ 1571 if (nso->so_direct != NULL) { 1572 mutex_enter(nso->so_direct->sod_lockp); 1573 SOD_DISABLE(nso->so_direct); 1574 mutex_exit(nso->so_direct->sod_lockp); 1575 nso->so_direct = NULL; 1576 } 1577 } 1578 #ifdef DEBUG 1579 /* 1580 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1581 * it's inherited early to allow debugging of the accept code itself. 1582 */ 1583 nso->so_options |= so->so_options & SO_DEBUG; 1584 #endif /* DEBUG */ 1585 1586 /* 1587 * Save the SRC address from the T_CONN_IND 1588 * for getpeername to work on AF_UNIX and on transports that do not 1589 * support TI_GETPEERNAME. 1590 * 1591 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1592 * copyin_name(). 1593 */ 1594 if (srclen > (t_uscalar_t)nso->so_faddr_maxlen) { 1595 error = EINVAL; 1596 freemsg(mp); 1597 eprintsoline(so, error); 1598 goto disconnect_vp_unlocked; 1599 } 1600 nso->so_faddr_len = (socklen_t)srclen; 1601 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 1602 bcopy(src, nso->so_faddr_sa, srclen); 1603 nso->so_state |= SS_FADDR_VALID; 1604 1605 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1606 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1607 cred_t *cr; 1608 1609 if ((cr = DB_CRED(mp)) != NULL) { 1610 crhold(cr); 1611 nso->so_peercred = cr; 1612 nso->so_cpid = DB_CPID(mp); 1613 } 1614 freemsg(mp); 1615 1616 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1617 sizeof (intptr_t), 0, _ALLOC_INTR); 1618 if (mp == NULL) { 1619 /* 1620 * Accept can not fail with ENOBUFS. 1621 * A signal was caught so return EINTR. 1622 */ 1623 error = EINTR; 1624 eprintsoline(so, error); 1625 goto disconnect_vp_unlocked; 1626 } 1627 conn_res = (struct T_conn_res *)mp->b_rptr; 1628 } else { 1629 nso->so_peercred = DB_CRED(mp); 1630 nso->so_cpid = DB_CPID(mp); 1631 DB_CRED(mp) = NULL; 1632 1633 mp->b_rptr = DB_BASE(mp); 1634 conn_res = (struct T_conn_res *)mp->b_rptr; 1635 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1636 } 1637 1638 /* 1639 * New socket must be bound at least in sockfs and, except for AF_INET, 1640 * (or AF_INET6) it also has to be bound in the transport provider. 1641 * We set the local address in the sonode from the T_OK_ACK of the 1642 * T_CONN_RES. For this reason the address we bind to here isn't 1643 * important. 1644 */ 1645 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1646 /*CONSTCOND*/ 1647 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1648 /* 1649 * Optimization for AF_INET{,6} transports 1650 * that can handle a T_CONN_RES without being bound. 1651 */ 1652 mutex_enter(&nso->so_lock); 1653 so_automatic_bind(nso); 1654 mutex_exit(&nso->so_lock); 1655 } else { 1656 /* Perform NULL bind with the transport provider. */ 1657 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC)) != 0) { 1658 ASSERT(error != ENOBUFS); 1659 freemsg(mp); 1660 eprintsoline(nso, error); 1661 goto disconnect_vp_unlocked; 1662 } 1663 } 1664 1665 /* 1666 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1667 * so that any data arriving on the new socket will cause the 1668 * appropriate signals to be delivered for the new socket. 1669 * 1670 * No other thread (except strsock_proto and strsock_misc) 1671 * can access the new socket thus we relax the locking. 1672 */ 1673 nso->so_pgrp = so->so_pgrp; 1674 nso->so_state |= so->so_state & (SS_ASYNC|SS_FADDR_NOXLATE); 1675 1676 if (nso->so_pgrp != 0) { 1677 if ((error = so_set_events(nso, nvp, CRED())) != 0) { 1678 eprintsoline(nso, error); 1679 error = 0; 1680 nso->so_pgrp = 0; 1681 } 1682 } 1683 1684 /* 1685 * Make note of the socket level options. TCP and IP level options 1686 * are already inherited. We could do all this after accept is 1687 * successful but doing it here simplifies code and no harm done 1688 * for error case. 1689 */ 1690 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1691 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1692 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1693 nso->so_sndbuf = so->so_sndbuf; 1694 nso->so_rcvbuf = so->so_rcvbuf; 1695 if (nso->so_options & SO_LINGER) 1696 nso->so_linger = so->so_linger; 1697 1698 if ((so->so_state & SS_DIRECT) != 0) { 1699 1700 ASSERT(opt != NULL); 1701 1702 conn_res->OPT_length = optlen; 1703 conn_res->OPT_offset = MBLKL(mp); 1704 bcopy(&opt, mp->b_wptr, optlen); 1705 mp->b_wptr += optlen; 1706 conn_res->PRIM_type = T_CONN_RES; 1707 conn_res->ACCEPTOR_id = 0; 1708 PRIM_type = T_CONN_RES; 1709 1710 /* Send down the T_CONN_RES on acceptor STREAM */ 1711 error = kstrputmsg(SOTOV(nso), mp, NULL, 1712 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1713 if (error) { 1714 mutex_enter(&so->so_lock); 1715 so_lock_single(so); 1716 eprintsoline(so, error); 1717 goto disconnect_vp; 1718 } 1719 mutex_enter(&nso->so_lock); 1720 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1721 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1722 if (error) { 1723 mutex_exit(&nso->so_lock); 1724 mutex_enter(&so->so_lock); 1725 so_lock_single(so); 1726 eprintsoline(so, error); 1727 goto disconnect_vp; 1728 } 1729 if (nso->so_family == AF_INET) { 1730 sin_t *sin; 1731 1732 sin = (sin_t *)(ack_mp->b_rptr + 1733 sizeof (struct T_ok_ack)); 1734 bcopy(sin, nso->so_laddr_sa, sizeof (sin_t)); 1735 nso->so_laddr_len = sizeof (sin_t); 1736 } else { 1737 sin6_t *sin6; 1738 1739 sin6 = (sin6_t *)(ack_mp->b_rptr + 1740 sizeof (struct T_ok_ack)); 1741 bcopy(sin6, nso->so_laddr_sa, sizeof (sin6_t)); 1742 nso->so_laddr_len = sizeof (sin6_t); 1743 } 1744 freemsg(ack_mp); 1745 1746 nso->so_state |= SS_ISCONNECTED | SS_LADDR_VALID; 1747 nso->so_priv = opt; 1748 1749 if (so->so_nl7c_flags & NL7C_ENABLED) { 1750 /* 1751 * A NL7C marked listen()er so the new socket 1752 * inherits the listen()er's NL7C state, except 1753 * for NL7C_POLLIN. 1754 * 1755 * Only call NL7C to process the new socket if 1756 * the listen socket allows blocking i/o. 1757 */ 1758 nso->so_nl7c_flags = so->so_nl7c_flags & (~NL7C_POLLIN); 1759 if (so->so_state & (SS_NONBLOCK|SS_NDELAY)) { 1760 /* 1761 * Nonblocking accept() just make it 1762 * persist to defer processing to the 1763 * read-side syscall (e.g. read). 1764 */ 1765 nso->so_nl7c_flags |= NL7C_SOPERSIST; 1766 } else if (nl7c_process(nso, B_FALSE)) { 1767 /* 1768 * NL7C has completed processing on the 1769 * socket, close the socket and back to 1770 * the top to await the next T_CONN_IND. 1771 */ 1772 mutex_exit(&nso->so_lock); 1773 (void) VOP_CLOSE(nvp, 0, 1, (offset_t)0, 1774 CRED(), NULL); 1775 VN_RELE(nvp); 1776 goto again; 1777 } 1778 /* Pass the new socket out */ 1779 } 1780 1781 mutex_exit(&nso->so_lock); 1782 1783 /* 1784 * It's possible, through the use of autopush for example, 1785 * that the acceptor stream may not support SS_DIRECT 1786 * semantics. If the new socket does not support SS_DIRECT 1787 * we issue a _SIOCSOCKFALLBACK to inform the transport 1788 * as we would in the I_PUSH case. 1789 */ 1790 if (!(nso->so_state & SS_DIRECT)) { 1791 int rval; 1792 1793 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1794 0, 0, K_TO_K, CRED(), &rval)) != 0) { 1795 mutex_enter(&so->so_lock); 1796 so_lock_single(so); 1797 eprintsoline(so, error); 1798 goto disconnect_vp; 1799 } 1800 } 1801 1802 /* 1803 * Pass out new socket. 1804 */ 1805 if (nsop != NULL) 1806 *nsop = nso; 1807 1808 return (0); 1809 } 1810 1811 /* 1812 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1813 * which don't support the FireEngine accept fast-path. It is also 1814 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1815 * again. Neither sockfs nor TCP attempt to find out if some other 1816 * random module has been inserted in between (in which case we 1817 * should follow TLI accept behaviour). We blindly assume the worst 1818 * case and revert back to old behaviour i.e. TCP will not send us 1819 * any option (eager) and the accept should happen on the listener 1820 * queue. Any queued T_conn_ind have already got their options removed 1821 * by so_sock2_stream() when "sockmod" was I_POP'd. 1822 */ 1823 /* 1824 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1825 */ 1826 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1827 #ifdef _ILP32 1828 queue_t *q; 1829 1830 /* 1831 * Find read queue in driver 1832 * Can safely do this since we "own" nso/nvp. 1833 */ 1834 q = strvp2wq(nvp)->q_next; 1835 while (SAMESTR(q)) 1836 q = q->q_next; 1837 q = RD(q); 1838 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1839 #else 1840 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1841 #endif /* _ILP32 */ 1842 conn_res->PRIM_type = O_T_CONN_RES; 1843 PRIM_type = O_T_CONN_RES; 1844 } else { 1845 conn_res->ACCEPTOR_id = nso->so_acceptor_id; 1846 conn_res->PRIM_type = T_CONN_RES; 1847 PRIM_type = T_CONN_RES; 1848 } 1849 conn_res->SEQ_number = SEQ_number; 1850 conn_res->OPT_length = 0; 1851 conn_res->OPT_offset = 0; 1852 1853 mutex_enter(&so->so_lock); 1854 so_lock_single(so); /* Set SOLOCKED */ 1855 mutex_exit(&so->so_lock); 1856 1857 error = kstrputmsg(SOTOV(so), mp, NULL, 1858 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1859 mutex_enter(&so->so_lock); 1860 if (error) { 1861 eprintsoline(so, error); 1862 goto disconnect_vp; 1863 } 1864 error = sowaitprim(so, PRIM_type, T_OK_ACK, 1865 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1866 if (error) { 1867 eprintsoline(so, error); 1868 goto disconnect_vp; 1869 } 1870 /* 1871 * If there is a sin/sin6 appended onto the T_OK_ACK use 1872 * that to set the local address. If this is not present 1873 * then we zero out the address and don't set the 1874 * SS_LADDR_VALID bit. For AF_UNIX endpoints we copy over 1875 * the pathname from the listening socket. 1876 */ 1877 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 1878 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 1879 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 1880 ack_mp->b_rptr += sizeof (struct T_ok_ack); 1881 bcopy(ack_mp->b_rptr, nso->so_laddr_sa, sinlen); 1882 nso->so_laddr_len = sinlen; 1883 nso->so_state |= SS_LADDR_VALID; 1884 } else if (nso->so_family == AF_UNIX) { 1885 ASSERT(so->so_family == AF_UNIX); 1886 nso->so_laddr_len = so->so_laddr_len; 1887 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1888 bcopy(so->so_laddr_sa, nso->so_laddr_sa, nso->so_laddr_len); 1889 nso->so_state |= SS_LADDR_VALID; 1890 } else { 1891 nso->so_laddr_len = so->so_laddr_len; 1892 ASSERT(nso->so_laddr_len <= nso->so_laddr_maxlen); 1893 bzero(nso->so_laddr_sa, nso->so_addr_size); 1894 nso->so_laddr_sa->sa_family = nso->so_family; 1895 } 1896 freemsg(ack_mp); 1897 1898 so_unlock_single(so, SOLOCKED); 1899 mutex_exit(&so->so_lock); 1900 1901 nso->so_state |= SS_ISCONNECTED; 1902 1903 /* 1904 * Pass out new socket. 1905 */ 1906 if (nsop != NULL) 1907 *nsop = nso; 1908 1909 return (0); 1910 1911 1912 eproto_disc_unl: 1913 error = EPROTO; 1914 e_disc_unl: 1915 eprintsoline(so, error); 1916 goto disconnect_unlocked; 1917 1918 pr_disc_vp_unl: 1919 eprintsoline(so, error); 1920 disconnect_vp_unlocked: 1921 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1922 VN_RELE(nvp); 1923 disconnect_unlocked: 1924 (void) sodisconnect(so, SEQ_number, 0); 1925 return (error); 1926 1927 pr_disc_vp: 1928 eprintsoline(so, error); 1929 disconnect_vp: 1930 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 1931 so_unlock_single(so, SOLOCKED); 1932 mutex_exit(&so->so_lock); 1933 (void) VOP_CLOSE(nvp, 0, 1, 0, CRED(), NULL); 1934 VN_RELE(nvp); 1935 return (error); 1936 1937 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 1938 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 1939 ? EOPNOTSUPP : EINVAL; 1940 e_bad: 1941 eprintsoline(so, error); 1942 return (error); 1943 } 1944 1945 /* 1946 * connect a socket. 1947 * 1948 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 1949 * unconnect (by specifying a null address). 1950 */ 1951 int 1952 sotpi_connect(struct sonode *so, 1953 const struct sockaddr *name, 1954 socklen_t namelen, 1955 int fflag, 1956 int flags) 1957 { 1958 struct T_conn_req conn_req; 1959 int error = 0; 1960 mblk_t *mp; 1961 void *src; 1962 socklen_t srclen; 1963 void *addr; 1964 socklen_t addrlen; 1965 boolean_t need_unlock; 1966 1967 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 1968 (void *)so, (void *)name, namelen, fflag, flags, 1969 pr_state(so->so_state, so->so_mode))); 1970 1971 /* 1972 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 1973 * avoid sleeping for memory with SOLOCKED held. 1974 * We know that the T_CONN_REQ can't be larger than 2 * so_faddr_maxlen 1975 * + sizeof (struct T_opthdr). 1976 * (the AF_UNIX so_ux_addr_xlate() does not make the address 1977 * exceed so_faddr_maxlen). 1978 */ 1979 mp = soallocproto(sizeof (struct T_conn_req) + 1980 2 * so->so_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR); 1981 if (mp == NULL) { 1982 /* 1983 * Connect can not fail with ENOBUFS. A signal was 1984 * caught so return EINTR. 1985 */ 1986 error = EINTR; 1987 eprintsoline(so, error); 1988 return (error); 1989 } 1990 1991 mutex_enter(&so->so_lock); 1992 /* 1993 * Make sure there is a preallocated T_unbind_req message 1994 * before any binding. This message is allocated when the 1995 * socket is created. Since another thread can consume 1996 * so_unbind_mp by the time we return from so_lock_single(), 1997 * we should check the availability of so_unbind_mp after 1998 * we return from so_lock_single(). 1999 */ 2000 2001 so_lock_single(so); /* Set SOLOCKED */ 2002 need_unlock = B_TRUE; 2003 2004 if (so->so_unbind_mp == NULL) { 2005 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2006 /* NOTE: holding so_lock while sleeping */ 2007 so->so_unbind_mp = 2008 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR); 2009 if (so->so_unbind_mp == NULL) { 2010 error = EINTR; 2011 goto done; 2012 } 2013 } 2014 2015 /* 2016 * Can't have done a listen before connecting. 2017 */ 2018 if (so->so_state & SS_ACCEPTCONN) { 2019 error = EOPNOTSUPP; 2020 goto done; 2021 } 2022 2023 /* 2024 * Must be bound with the transport 2025 */ 2026 if (!(so->so_state & SS_ISBOUND)) { 2027 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2028 /*CONSTCOND*/ 2029 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2030 /* 2031 * Optimization for AF_INET{,6} transports 2032 * that can handle a T_CONN_REQ without being bound. 2033 */ 2034 so_automatic_bind(so); 2035 } else { 2036 error = sotpi_bind(so, NULL, 0, 2037 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 2038 if (error) 2039 goto done; 2040 } 2041 ASSERT(so->so_state & SS_ISBOUND); 2042 flags |= _SOCONNECT_DID_BIND; 2043 } 2044 2045 /* 2046 * Handle a connect to a name parameter of type AF_UNSPEC like a 2047 * connect to a null address. This is the portable method to 2048 * unconnect a socket. 2049 */ 2050 if ((namelen >= sizeof (sa_family_t)) && 2051 (name->sa_family == AF_UNSPEC)) { 2052 name = NULL; 2053 namelen = 0; 2054 } 2055 2056 /* 2057 * Check that we are not already connected. 2058 * A connection-oriented socket cannot be reconnected. 2059 * A connected connection-less socket can be 2060 * - connected to a different address by a subsequent connect 2061 * - "unconnected" by a connect to the NULL address 2062 */ 2063 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2064 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2065 if (so->so_mode & SM_CONNREQUIRED) { 2066 /* Connection-oriented socket */ 2067 error = so->so_state & SS_ISCONNECTED ? 2068 EISCONN : EALREADY; 2069 goto done; 2070 } 2071 /* Connection-less socket */ 2072 if (name == NULL) { 2073 /* 2074 * Remove the connected state and clear SO_DGRAM_ERRIND 2075 * since it was set when the socket was connected. 2076 * If this is UDP also send down a T_DISCON_REQ. 2077 */ 2078 int val; 2079 2080 if ((so->so_family == AF_INET || 2081 so->so_family == AF_INET6) && 2082 (so->so_type == SOCK_DGRAM || 2083 so->so_type == SOCK_RAW) && 2084 /*CONSTCOND*/ 2085 !soconnect_tpi_udp) { 2086 /* XXX What about implicitly unbinding here? */ 2087 error = sodisconnect(so, -1, 2088 _SODISCONNECT_LOCK_HELD); 2089 } else { 2090 so->so_state &= 2091 ~(SS_ISCONNECTED | SS_ISCONNECTING | 2092 SS_FADDR_VALID); 2093 so->so_faddr_len = 0; 2094 } 2095 2096 so_unlock_single(so, SOLOCKED); 2097 mutex_exit(&so->so_lock); 2098 2099 val = 0; 2100 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2101 &val, (t_uscalar_t)sizeof (val)); 2102 2103 mutex_enter(&so->so_lock); 2104 so_lock_single(so); /* Set SOLOCKED */ 2105 goto done; 2106 } 2107 } 2108 ASSERT(so->so_state & SS_ISBOUND); 2109 2110 if (name == NULL || namelen == 0) { 2111 error = EINVAL; 2112 goto done; 2113 } 2114 /* 2115 * Mark the socket if so_faddr_sa represents the transport level 2116 * address. 2117 */ 2118 if (flags & _SOCONNECT_NOXLATE) { 2119 struct sockaddr_ux *soaddr_ux; 2120 2121 ASSERT(so->so_family == AF_UNIX); 2122 if (namelen != sizeof (struct sockaddr_ux)) { 2123 error = EINVAL; 2124 goto done; 2125 } 2126 soaddr_ux = (struct sockaddr_ux *)name; 2127 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2128 namelen = sizeof (soaddr_ux->sou_addr); 2129 so->so_state |= SS_FADDR_NOXLATE; 2130 } 2131 2132 /* 2133 * Length and family checks. 2134 */ 2135 error = so_addr_verify(so, name, namelen); 2136 if (error) 2137 goto bad; 2138 2139 /* 2140 * Save foreign address. Needed for AF_UNIX as well as 2141 * transport providers that do not support TI_GETPEERNAME. 2142 * Also used for cached foreign address for TCP and UDP. 2143 */ 2144 if (namelen > (t_uscalar_t)so->so_faddr_maxlen) { 2145 error = EINVAL; 2146 goto done; 2147 } 2148 so->so_faddr_len = (socklen_t)namelen; 2149 ASSERT(so->so_faddr_len <= so->so_faddr_maxlen); 2150 bcopy(name, so->so_faddr_sa, namelen); 2151 so->so_state |= SS_FADDR_VALID; 2152 2153 if (so->so_family == AF_UNIX) { 2154 if (so->so_state & SS_FADDR_NOXLATE) { 2155 /* 2156 * Already have a transport internal address. Do not 2157 * pass any (transport internal) source address. 2158 */ 2159 addr = so->so_faddr_sa; 2160 addrlen = (t_uscalar_t)so->so_faddr_len; 2161 src = NULL; 2162 srclen = 0; 2163 } else { 2164 /* 2165 * Pass the sockaddr_un source address as an option 2166 * and translate the remote address. 2167 * Holding so_lock thus so_laddr_sa can not change. 2168 */ 2169 src = so->so_laddr_sa; 2170 srclen = (t_uscalar_t)so->so_laddr_len; 2171 dprintso(so, 1, 2172 ("sotpi_connect UNIX: srclen %d, src %p\n", 2173 srclen, src)); 2174 error = so_ux_addr_xlate(so, 2175 so->so_faddr_sa, (socklen_t)so->so_faddr_len, 2176 (flags & _SOCONNECT_XPG4_2), 2177 &addr, &addrlen); 2178 if (error) 2179 goto bad; 2180 } 2181 } else { 2182 addr = so->so_faddr_sa; 2183 addrlen = (t_uscalar_t)so->so_faddr_len; 2184 src = NULL; 2185 srclen = 0; 2186 } 2187 /* 2188 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2189 * option which asks the transport provider to send T_UDERR_IND 2190 * messages. These T_UDERR_IND messages are used to return connected 2191 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2192 * 2193 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2194 * we send down a T_CONN_REQ. This is needed to let the 2195 * transport assign a local address that is consistent with 2196 * the remote address. Applications depend on a getsockname() 2197 * after a connect() to retrieve the "source" IP address for 2198 * the connected socket. Invalidate the cached local address 2199 * to force getsockname() to enquire of the transport. 2200 */ 2201 if (!(so->so_mode & SM_CONNREQUIRED)) { 2202 /* 2203 * Datagram socket. 2204 */ 2205 int32_t val; 2206 2207 so_unlock_single(so, SOLOCKED); 2208 mutex_exit(&so->so_lock); 2209 2210 val = 1; 2211 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2212 &val, (t_uscalar_t)sizeof (val)); 2213 2214 mutex_enter(&so->so_lock); 2215 so_lock_single(so); /* Set SOLOCKED */ 2216 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2217 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2218 soconnect_tpi_udp) { 2219 soisconnected(so); 2220 goto done; 2221 } 2222 /* 2223 * Send down T_CONN_REQ etc. 2224 * Clear fflag to avoid returning EWOULDBLOCK. 2225 */ 2226 fflag = 0; 2227 ASSERT(so->so_family != AF_UNIX); 2228 so->so_state &= ~SS_LADDR_VALID; 2229 } else if (so->so_laddr_len != 0) { 2230 /* 2231 * If the local address or port was "any" then it may be 2232 * changed by the transport as a result of the 2233 * connect. Invalidate the cached version if we have one. 2234 */ 2235 switch (so->so_family) { 2236 case AF_INET: 2237 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin_t)); 2238 if (((sin_t *)so->so_laddr_sa)->sin_addr.s_addr == 2239 INADDR_ANY || 2240 ((sin_t *)so->so_laddr_sa)->sin_port == 0) 2241 so->so_state &= ~SS_LADDR_VALID; 2242 break; 2243 2244 case AF_INET6: 2245 ASSERT(so->so_laddr_len == (socklen_t)sizeof (sin6_t)); 2246 if (IN6_IS_ADDR_UNSPECIFIED( 2247 &((sin6_t *)so->so_laddr_sa) ->sin6_addr) || 2248 IN6_IS_ADDR_V4MAPPED_ANY( 2249 &((sin6_t *)so->so_laddr_sa)->sin6_addr) || 2250 ((sin6_t *)so->so_laddr_sa)->sin6_port == 0) 2251 so->so_state &= ~SS_LADDR_VALID; 2252 break; 2253 2254 default: 2255 break; 2256 } 2257 } 2258 2259 /* 2260 * Check for failure of an earlier call 2261 */ 2262 if (so->so_error != 0) 2263 goto so_bad; 2264 2265 /* 2266 * Send down T_CONN_REQ. Message was allocated above. 2267 */ 2268 conn_req.PRIM_type = T_CONN_REQ; 2269 conn_req.DEST_length = addrlen; 2270 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2271 if (srclen == 0) { 2272 conn_req.OPT_length = 0; 2273 conn_req.OPT_offset = 0; 2274 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2275 soappendmsg(mp, addr, addrlen); 2276 } else { 2277 /* 2278 * There is a AF_UNIX sockaddr_un to include as a source 2279 * address option. 2280 */ 2281 struct T_opthdr toh; 2282 2283 toh.level = SOL_SOCKET; 2284 toh.name = SO_SRCADDR; 2285 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2286 toh.status = 0; 2287 conn_req.OPT_length = 2288 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2289 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2290 _TPI_ALIGN_TOPT(addrlen)); 2291 2292 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2293 soappendmsg(mp, addr, addrlen); 2294 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2295 soappendmsg(mp, &toh, sizeof (toh)); 2296 soappendmsg(mp, src, srclen); 2297 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2298 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2299 } 2300 /* 2301 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2302 * in order to have the right state when the T_CONN_CON shows up. 2303 */ 2304 soisconnecting(so); 2305 mutex_exit(&so->so_lock); 2306 2307 if (audit_active) 2308 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2309 2310 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2311 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2312 mp = NULL; 2313 mutex_enter(&so->so_lock); 2314 if (error != 0) 2315 goto bad; 2316 2317 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2318 goto bad; 2319 2320 /* Allow other threads to access the socket */ 2321 so_unlock_single(so, SOLOCKED); 2322 need_unlock = B_FALSE; 2323 2324 /* 2325 * Wait until we get a T_CONN_CON or an error 2326 */ 2327 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2328 so_lock_single(so); /* Set SOLOCKED */ 2329 need_unlock = B_TRUE; 2330 } 2331 2332 done: 2333 freemsg(mp); 2334 switch (error) { 2335 case EINPROGRESS: 2336 case EALREADY: 2337 case EISCONN: 2338 case EINTR: 2339 /* Non-fatal errors */ 2340 so->so_state &= ~SS_LADDR_VALID; 2341 /* FALLTHRU */ 2342 case 0: 2343 break; 2344 2345 case EHOSTUNREACH: 2346 if (flags & _SOCONNECT_XPG4_2) { 2347 /* 2348 * X/Open specification contains a requirement that 2349 * ENETUNREACH be returned but does not require 2350 * EHOSTUNREACH. In order to keep the test suite 2351 * happy we mess with the errno here. 2352 */ 2353 error = ENETUNREACH; 2354 } 2355 /* FALLTHRU */ 2356 2357 default: 2358 ASSERT(need_unlock); 2359 /* 2360 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2361 * and invalidate local-address cache 2362 */ 2363 so->so_state &= ~(SS_ISCONNECTING | SS_LADDR_VALID); 2364 /* A discon_ind might have already unbound us */ 2365 if ((flags & _SOCONNECT_DID_BIND) && 2366 (so->so_state & SS_ISBOUND)) { 2367 int err; 2368 2369 err = sotpi_unbind(so, 0); 2370 /* LINTED - statement has no conseq */ 2371 if (err) { 2372 eprintsoline(so, err); 2373 } 2374 } 2375 break; 2376 } 2377 if (need_unlock) 2378 so_unlock_single(so, SOLOCKED); 2379 mutex_exit(&so->so_lock); 2380 return (error); 2381 2382 so_bad: error = sogeterr(so); 2383 bad: eprintsoline(so, error); 2384 goto done; 2385 } 2386 2387 int 2388 sotpi_shutdown(struct sonode *so, int how) 2389 { 2390 struct T_ordrel_req ordrel_req; 2391 mblk_t *mp; 2392 uint_t old_state, state_change; 2393 int error = 0; 2394 2395 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2396 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2397 2398 mutex_enter(&so->so_lock); 2399 so_lock_single(so); /* Set SOLOCKED */ 2400 2401 /* 2402 * SunOS 4.X has no check for datagram sockets. 2403 * 5.X checks that it is connected (ENOTCONN) 2404 * X/Open requires that we check the connected state. 2405 */ 2406 if (!(so->so_state & SS_ISCONNECTED)) { 2407 if (!xnet_skip_checks) { 2408 error = ENOTCONN; 2409 if (xnet_check_print) { 2410 printf("sockfs: X/Open shutdown check " 2411 "caused ENOTCONN\n"); 2412 } 2413 } 2414 goto done; 2415 } 2416 /* 2417 * Record the current state and then perform any state changes. 2418 * Then use the difference between the old and new states to 2419 * determine which messages need to be sent. 2420 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2421 * duplicate calls to shutdown(). 2422 */ 2423 old_state = so->so_state; 2424 2425 switch (how) { 2426 case 0: 2427 socantrcvmore(so); 2428 break; 2429 case 1: 2430 socantsendmore(so); 2431 break; 2432 case 2: 2433 socantsendmore(so); 2434 socantrcvmore(so); 2435 break; 2436 default: 2437 error = EINVAL; 2438 goto done; 2439 } 2440 2441 /* 2442 * Assumes that the SS_CANT* flags are never cleared in the above code. 2443 */ 2444 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2445 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2446 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2447 2448 switch (state_change) { 2449 case 0: 2450 dprintso(so, 1, 2451 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2452 so->so_state)); 2453 goto done; 2454 2455 case SS_CANTRCVMORE: 2456 mutex_exit(&so->so_lock); 2457 strseteof(SOTOV(so), 1); 2458 /* 2459 * strseteof takes care of read side wakeups, 2460 * pollwakeups, and signals. 2461 */ 2462 /* 2463 * Get the read lock before flushing data to avoid problems 2464 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2465 */ 2466 mutex_enter(&so->so_lock); 2467 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2468 mutex_exit(&so->so_lock); 2469 2470 /* Flush read side queue */ 2471 strflushrq(SOTOV(so), FLUSHALL); 2472 2473 mutex_enter(&so->so_lock); 2474 so_unlock_read(so); /* Clear SOREADLOCKED */ 2475 break; 2476 2477 case SS_CANTSENDMORE: 2478 mutex_exit(&so->so_lock); 2479 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2480 mutex_enter(&so->so_lock); 2481 break; 2482 2483 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2484 mutex_exit(&so->so_lock); 2485 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2486 strseteof(SOTOV(so), 1); 2487 /* 2488 * strseteof takes care of read side wakeups, 2489 * pollwakeups, and signals. 2490 */ 2491 /* 2492 * Get the read lock before flushing data to avoid problems 2493 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2494 */ 2495 mutex_enter(&so->so_lock); 2496 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2497 mutex_exit(&so->so_lock); 2498 2499 /* Flush read side queue */ 2500 strflushrq(SOTOV(so), FLUSHALL); 2501 2502 mutex_enter(&so->so_lock); 2503 so_unlock_read(so); /* Clear SOREADLOCKED */ 2504 break; 2505 } 2506 2507 ASSERT(MUTEX_HELD(&so->so_lock)); 2508 2509 /* 2510 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2511 * was set due to this call and the new state has both of them set: 2512 * Send the AF_UNIX close indication 2513 * For T_COTS send a discon_ind 2514 * 2515 * If cantsend was set due to this call: 2516 * For T_COTSORD send an ordrel_ind 2517 * 2518 * Note that for T_CLTS there is no message sent here. 2519 */ 2520 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2521 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2522 /* 2523 * For SunOS 4.X compatibility we tell the other end 2524 * that we are unable to receive at this point. 2525 */ 2526 if (so->so_family == AF_UNIX && so->so_serv_type != T_CLTS) 2527 so_unix_close(so); 2528 2529 if (so->so_serv_type == T_COTS) 2530 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2531 } 2532 if ((state_change & SS_CANTSENDMORE) && 2533 (so->so_serv_type == T_COTS_ORD)) { 2534 /* Send an orderly release */ 2535 ordrel_req.PRIM_type = T_ORDREL_REQ; 2536 2537 mutex_exit(&so->so_lock); 2538 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2539 0, _ALLOC_SLEEP); 2540 /* 2541 * Send down the T_ORDREL_REQ even if there is flow control. 2542 * This prevents shutdown from blocking. 2543 * Note that there is no T_OK_ACK for ordrel_req. 2544 */ 2545 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2546 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2547 mutex_enter(&so->so_lock); 2548 if (error) { 2549 eprintsoline(so, error); 2550 goto done; 2551 } 2552 } 2553 2554 done: 2555 so_unlock_single(so, SOLOCKED); 2556 mutex_exit(&so->so_lock); 2557 return (error); 2558 } 2559 2560 /* 2561 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2562 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2563 * that we have closed. 2564 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2565 * T_UNITDATA_REQ containing the same option. 2566 * 2567 * For SOCK_DGRAM half-connections (somebody connected to this end 2568 * but this end is not connect) we don't know where to send any 2569 * SO_UNIX_CLOSE. 2570 * 2571 * We have to ignore stream head errors just in case there has been 2572 * a shutdown(output). 2573 * Ignore any flow control to try to get the message more quickly to the peer. 2574 * While locally ignoring flow control solves the problem when there 2575 * is only the loopback transport on the stream it would not provide 2576 * the correct AF_UNIX socket semantics when one or more modules have 2577 * been pushed. 2578 */ 2579 void 2580 so_unix_close(struct sonode *so) 2581 { 2582 int error; 2583 struct T_opthdr toh; 2584 mblk_t *mp; 2585 2586 ASSERT(MUTEX_HELD(&so->so_lock)); 2587 2588 ASSERT(so->so_family == AF_UNIX); 2589 2590 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2591 (SS_ISCONNECTED|SS_ISBOUND)) 2592 return; 2593 2594 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2595 (void *)so, pr_state(so->so_state, so->so_mode))); 2596 2597 toh.level = SOL_SOCKET; 2598 toh.name = SO_UNIX_CLOSE; 2599 2600 /* zero length + header */ 2601 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2602 toh.status = 0; 2603 2604 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2605 struct T_optdata_req tdr; 2606 2607 tdr.PRIM_type = T_OPTDATA_REQ; 2608 tdr.DATA_flag = 0; 2609 2610 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2611 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2612 2613 /* NOTE: holding so_lock while sleeping */ 2614 mp = soallocproto2(&tdr, sizeof (tdr), 2615 &toh, sizeof (toh), 0, _ALLOC_SLEEP); 2616 } else { 2617 struct T_unitdata_req tudr; 2618 void *addr; 2619 socklen_t addrlen; 2620 void *src; 2621 socklen_t srclen; 2622 struct T_opthdr toh2; 2623 t_scalar_t size; 2624 2625 /* Connecteded DGRAM socket */ 2626 2627 /* 2628 * For AF_UNIX the destination address is translated to 2629 * an internal name and the source address is passed as 2630 * an option. 2631 */ 2632 /* 2633 * Length and family checks. 2634 */ 2635 error = so_addr_verify(so, so->so_faddr_sa, 2636 (t_uscalar_t)so->so_faddr_len); 2637 if (error) { 2638 eprintsoline(so, error); 2639 return; 2640 } 2641 if (so->so_state & SS_FADDR_NOXLATE) { 2642 /* 2643 * Already have a transport internal address. Do not 2644 * pass any (transport internal) source address. 2645 */ 2646 addr = so->so_faddr_sa; 2647 addrlen = (t_uscalar_t)so->so_faddr_len; 2648 src = NULL; 2649 srclen = 0; 2650 } else { 2651 /* 2652 * Pass the sockaddr_un source address as an option 2653 * and translate the remote address. 2654 * Holding so_lock thus so_laddr_sa can not change. 2655 */ 2656 src = so->so_laddr_sa; 2657 srclen = (socklen_t)so->so_laddr_len; 2658 dprintso(so, 1, 2659 ("so_ux_close: srclen %d, src %p\n", 2660 srclen, src)); 2661 error = so_ux_addr_xlate(so, 2662 so->so_faddr_sa, 2663 (socklen_t)so->so_faddr_len, 0, 2664 &addr, &addrlen); 2665 if (error) { 2666 eprintsoline(so, error); 2667 return; 2668 } 2669 } 2670 tudr.PRIM_type = T_UNITDATA_REQ; 2671 tudr.DEST_length = addrlen; 2672 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2673 if (srclen == 0) { 2674 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2675 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2676 _TPI_ALIGN_TOPT(addrlen)); 2677 2678 size = tudr.OPT_offset + tudr.OPT_length; 2679 /* NOTE: holding so_lock while sleeping */ 2680 mp = soallocproto2(&tudr, sizeof (tudr), 2681 addr, addrlen, size, _ALLOC_SLEEP); 2682 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2683 soappendmsg(mp, &toh, sizeof (toh)); 2684 } else { 2685 /* 2686 * There is a AF_UNIX sockaddr_un to include as a 2687 * source address option. 2688 */ 2689 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2690 _TPI_ALIGN_TOPT(srclen)); 2691 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2692 _TPI_ALIGN_TOPT(addrlen)); 2693 2694 toh2.level = SOL_SOCKET; 2695 toh2.name = SO_SRCADDR; 2696 toh2.len = (t_uscalar_t)(srclen + 2697 sizeof (struct T_opthdr)); 2698 toh2.status = 0; 2699 2700 size = tudr.OPT_offset + tudr.OPT_length; 2701 2702 /* NOTE: holding so_lock while sleeping */ 2703 mp = soallocproto2(&tudr, sizeof (tudr), 2704 addr, addrlen, size, _ALLOC_SLEEP); 2705 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2706 soappendmsg(mp, &toh, sizeof (toh)); 2707 soappendmsg(mp, &toh2, sizeof (toh2)); 2708 soappendmsg(mp, src, srclen); 2709 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2710 } 2711 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2712 } 2713 mutex_exit(&so->so_lock); 2714 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2715 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2716 mutex_enter(&so->so_lock); 2717 } 2718 2719 /* 2720 * Handle recv* calls that set MSG_OOB or MSG_OOB together with MSG_PEEK. 2721 */ 2722 int 2723 sorecvoob(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, int flags) 2724 { 2725 mblk_t *mp, *nmp; 2726 int error; 2727 2728 dprintso(so, 1, ("sorecvoob(%p, %p, 0x%x)\n", 2729 (void *)so, (void *)msg, flags)); 2730 2731 /* 2732 * There is never any oob data with addresses or control since 2733 * the T_EXDATA_IND does not carry any options. 2734 */ 2735 msg->msg_controllen = 0; 2736 msg->msg_namelen = 0; 2737 2738 mutex_enter(&so->so_lock); 2739 ASSERT(so_verify_oobstate(so)); 2740 if ((so->so_options & SO_OOBINLINE) || 2741 (so->so_state & (SS_OOBPEND|SS_HADOOBDATA)) != SS_OOBPEND) { 2742 dprintso(so, 1, ("sorecvoob: inline or data consumed\n")); 2743 mutex_exit(&so->so_lock); 2744 return (EINVAL); 2745 } 2746 if (!(so->so_state & SS_HAVEOOBDATA)) { 2747 dprintso(so, 1, ("sorecvoob: no data yet\n")); 2748 mutex_exit(&so->so_lock); 2749 return (EWOULDBLOCK); 2750 } 2751 ASSERT(so->so_oobmsg != NULL); 2752 mp = so->so_oobmsg; 2753 if (flags & MSG_PEEK) { 2754 /* 2755 * Since recv* can not return ENOBUFS we can not use dupmsg. 2756 * Instead we revert to the consolidation private 2757 * allocb_wait plus bcopy. 2758 */ 2759 mblk_t *mp1; 2760 2761 mp1 = allocb_wait(msgdsize(mp), BPRI_MED, STR_NOSIG, NULL); 2762 ASSERT(mp1); 2763 2764 while (mp != NULL) { 2765 ssize_t size; 2766 2767 size = MBLKL(mp); 2768 bcopy(mp->b_rptr, mp1->b_wptr, size); 2769 mp1->b_wptr += size; 2770 ASSERT(mp1->b_wptr <= mp1->b_datap->db_lim); 2771 mp = mp->b_cont; 2772 } 2773 mp = mp1; 2774 } else { 2775 /* 2776 * Update the state indicating that the data has been consumed. 2777 * Keep SS_OOBPEND set until data is consumed past the mark. 2778 */ 2779 so->so_oobmsg = NULL; 2780 so->so_state ^= SS_HAVEOOBDATA|SS_HADOOBDATA; 2781 } 2782 dprintso(so, 1, 2783 ("after recvoob(%p): counts %d/%d state %s\n", 2784 (void *)so, so->so_oobsigcnt, 2785 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2786 ASSERT(so_verify_oobstate(so)); 2787 mutex_exit(&so->so_lock); 2788 2789 error = 0; 2790 nmp = mp; 2791 while (nmp != NULL && uiop->uio_resid > 0) { 2792 ssize_t n = MBLKL(nmp); 2793 2794 n = MIN(n, uiop->uio_resid); 2795 if (n > 0) 2796 error = uiomove(nmp->b_rptr, n, 2797 UIO_READ, uiop); 2798 if (error) 2799 break; 2800 nmp = nmp->b_cont; 2801 } 2802 freemsg(mp); 2803 return (error); 2804 } 2805 2806 /* 2807 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2808 * In addition, the caller typically verifies that there is some 2809 * potential state to clear by checking 2810 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2811 * before calling this routine. 2812 * Note that such a check can be made without holding so_lock since 2813 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2814 * decrements so_oobsigcnt. 2815 * 2816 * When data is read *after* the point that all pending 2817 * oob data has been consumed the oob indication is cleared. 2818 * 2819 * This logic keeps select/poll returning POLLRDBAND and 2820 * SIOCATMARK returning true until we have read past 2821 * the mark. 2822 */ 2823 static void 2824 sorecv_update_oobstate(struct sonode *so) 2825 { 2826 mutex_enter(&so->so_lock); 2827 ASSERT(so_verify_oobstate(so)); 2828 dprintso(so, 1, 2829 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2830 so->so_oobsigcnt, 2831 so->so_oobcnt, pr_state(so->so_state, so->so_mode))); 2832 if (so->so_oobsigcnt == 0) { 2833 /* No more pending oob indications */ 2834 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2835 freemsg(so->so_oobmsg); 2836 so->so_oobmsg = NULL; 2837 } 2838 ASSERT(so_verify_oobstate(so)); 2839 mutex_exit(&so->so_lock); 2840 } 2841 2842 /* 2843 * Handle recv* calls for an so which has NL7C saved recv mblk_t(s). 2844 */ 2845 static int 2846 nl7c_sorecv(struct sonode *so, mblk_t **rmp, uio_t *uiop, rval_t *rp) 2847 { 2848 int error = 0; 2849 mblk_t *tmp = NULL; 2850 mblk_t *pmp = NULL; 2851 mblk_t *nmp = so->so_nl7c_rcv_mp; 2852 2853 ASSERT(nmp != NULL); 2854 2855 while (nmp != NULL && uiop->uio_resid > 0) { 2856 ssize_t n; 2857 2858 if (DB_TYPE(nmp) == M_DATA) { 2859 /* 2860 * We have some data, uiomove up to resid bytes. 2861 */ 2862 n = MIN(MBLKL(nmp), uiop->uio_resid); 2863 if (n > 0) 2864 error = uiomove(nmp->b_rptr, n, UIO_READ, uiop); 2865 nmp->b_rptr += n; 2866 if (nmp->b_rptr == nmp->b_wptr) { 2867 pmp = nmp; 2868 nmp = nmp->b_cont; 2869 } 2870 if (error) 2871 break; 2872 } else { 2873 /* 2874 * We only handle data, save for caller to handle. 2875 */ 2876 if (pmp != NULL) { 2877 pmp->b_cont = nmp->b_cont; 2878 } 2879 nmp->b_cont = NULL; 2880 if (*rmp == NULL) { 2881 *rmp = nmp; 2882 } else { 2883 tmp->b_cont = nmp; 2884 } 2885 nmp = nmp->b_cont; 2886 tmp = nmp; 2887 } 2888 } 2889 if (pmp != NULL) { 2890 /* Free any mblk_t(s) which we have consumed */ 2891 pmp->b_cont = NULL; 2892 freemsg(so->so_nl7c_rcv_mp); 2893 } 2894 if ((so->so_nl7c_rcv_mp = nmp) == NULL) { 2895 /* Last mblk_t so return the saved kstrgetmsg() rval/error */ 2896 if (error == 0) { 2897 rval_t *p = (rval_t *)&so->so_nl7c_rcv_rval; 2898 2899 error = p->r_v.r_v2; 2900 p->r_v.r_v2 = 0; 2901 } 2902 rp->r_vals = so->so_nl7c_rcv_rval; 2903 so->so_nl7c_rcv_rval = 0; 2904 } else { 2905 /* More mblk_t(s) to process so no rval to return */ 2906 rp->r_vals = 0; 2907 } 2908 return (error); 2909 } 2910 2911 /* 2912 * Receive the next message on the queue. 2913 * If msg_controllen is non-zero when called the caller is interested in 2914 * any received control info (options). 2915 * If msg_namelen is non-zero when called the caller is interested in 2916 * any received source address. 2917 * The routine returns with msg_control and msg_name pointing to 2918 * kmem_alloc'ed memory which the caller has to free. 2919 */ 2920 int 2921 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 2922 { 2923 union T_primitives *tpr; 2924 mblk_t *mp; 2925 uchar_t pri; 2926 int pflag, opflag; 2927 void *control; 2928 t_uscalar_t controllen; 2929 t_uscalar_t namelen; 2930 int so_state = so->so_state; /* Snapshot */ 2931 ssize_t saved_resid; 2932 rval_t rval; 2933 int flags; 2934 clock_t timout; 2935 int first; 2936 int error = 0; 2937 struct uio *suiop = NULL; 2938 sodirect_t *sodp = so->so_direct; 2939 2940 flags = msg->msg_flags; 2941 msg->msg_flags = 0; 2942 2943 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2944 (void *)so, (void *)msg, flags, 2945 pr_state(so->so_state, so->so_mode), so->so_error)); 2946 2947 /* 2948 * If we are not connected because we have never been connected 2949 * we return ENOTCONN. If we have been connected (but are no longer 2950 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2951 * the EOF. 2952 * 2953 * An alternative would be to post an ENOTCONN error in stream head 2954 * (read+write) and clear it when we're connected. However, that error 2955 * would cause incorrect poll/select behavior! 2956 */ 2957 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2958 (so->so_mode & SM_CONNREQUIRED)) { 2959 return (ENOTCONN); 2960 } 2961 2962 /* 2963 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2964 * after checking that the read queue is empty) and returns zero. 2965 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2966 * is zero. 2967 */ 2968 2969 if (flags & MSG_OOB) { 2970 /* Check that the transport supports OOB */ 2971 if (!(so->so_mode & SM_EXDATA)) 2972 return (EOPNOTSUPP); 2973 return (sorecvoob(so, msg, uiop, flags)); 2974 } 2975 2976 /* 2977 * Set msg_controllen and msg_namelen to zero here to make it 2978 * simpler in the cases that no control or name is returned. 2979 */ 2980 controllen = msg->msg_controllen; 2981 namelen = msg->msg_namelen; 2982 msg->msg_controllen = 0; 2983 msg->msg_namelen = 0; 2984 2985 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2986 namelen, controllen)); 2987 2988 mutex_enter(&so->so_lock); 2989 /* 2990 * If an NL7C enabled socket and not waiting for write data. 2991 */ 2992 if ((so->so_nl7c_flags & (NL7C_ENABLED | NL7C_WAITWRITE)) == 2993 NL7C_ENABLED) { 2994 if (so->so_nl7c_uri) { 2995 /* Close uri processing for a previous request */ 2996 nl7c_close(so); 2997 } 2998 if ((so_state & SS_CANTRCVMORE) && so->so_nl7c_rcv_mp == NULL) { 2999 /* Nothing to process, EOF */ 3000 mutex_exit(&so->so_lock); 3001 return (0); 3002 } else if (so->so_nl7c_flags & NL7C_SOPERSIST) { 3003 /* Persistent NL7C socket, try to process request */ 3004 boolean_t ret; 3005 3006 ret = nl7c_process(so, 3007 (so->so_state & (SS_NONBLOCK|SS_NDELAY))); 3008 rval.r_vals = so->so_nl7c_rcv_rval; 3009 error = rval.r_v.r_v2; 3010 if (error) { 3011 /* Error of some sort, return it */ 3012 mutex_exit(&so->so_lock); 3013 return (error); 3014 } 3015 if (so->so_nl7c_flags && 3016 ! (so->so_nl7c_flags & NL7C_WAITWRITE)) { 3017 /* 3018 * Still an NL7C socket and no data 3019 * to pass up to the caller. 3020 */ 3021 mutex_exit(&so->so_lock); 3022 if (ret) { 3023 /* EOF */ 3024 return (0); 3025 } else { 3026 /* Need more data */ 3027 return (EAGAIN); 3028 } 3029 } 3030 } else { 3031 /* 3032 * Not persistent so no further NL7C processing. 3033 */ 3034 so->so_nl7c_flags = 0; 3035 } 3036 } 3037 /* 3038 * Only one reader is allowed at any given time. This is needed 3039 * for T_EXDATA handling and, in the future, MSG_WAITALL. 3040 * 3041 * This is slightly different that BSD behavior in that it fails with 3042 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3043 * is single-threaded using sblock(), which is dropped while waiting 3044 * for data to appear. The difference shows up e.g. if one 3045 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3046 * does use nonblocking io and different threads are reading each 3047 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3048 * in this case as long as the read queue doesn't get empty. 3049 * In this implementation the thread using nonblocking io can 3050 * get an EWOULDBLOCK error due to the blocking thread executing 3051 * e.g. in the uiomove in kstrgetmsg. 3052 * This difference is not believed to be significant. 3053 */ 3054 /* Set SOREADLOCKED */ 3055 error = so_lock_read_intr(so, 3056 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3057 mutex_exit(&so->so_lock); 3058 if (error) 3059 return (error); 3060 3061 /* 3062 * Tell kstrgetmsg to not inspect the stream head errors until all 3063 * queued data has been consumed. 3064 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3065 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3066 * 3067 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3068 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3069 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3070 */ 3071 pflag = MSG_ANY | MSG_DELAYERROR; 3072 if (flags & MSG_PEEK) { 3073 pflag |= MSG_IPEEK; 3074 flags &= ~MSG_WAITALL; 3075 } 3076 if (so->so_mode & SM_ATOMIC) 3077 pflag |= MSG_DISCARDTAIL; 3078 3079 if (flags & MSG_DONTWAIT) 3080 timout = 0; 3081 else 3082 timout = -1; 3083 opflag = pflag; 3084 first = 1; 3085 3086 if (uiop->uio_resid >= uioasync.mincnt && 3087 sodp != NULL && (sodp->sod_state & SOD_ENABLED) && 3088 uioasync.enabled && !(flags & MSG_PEEK) && 3089 !(so_state & SS_CANTRCVMORE)) { 3090 /* 3091 * Big enough I/O for uioa min setup and an sodirect socket 3092 * and sodirect enabled and uioa enabled and I/O will be done 3093 * and not EOF so initialize the sodirect_t uioa_t with "uiop". 3094 */ 3095 mutex_enter(sodp->sod_lockp); 3096 if (!uioainit(uiop, &sodp->sod_uioa)) { 3097 /* 3098 * Successful uioainit() so the uio_t part of the 3099 * uioa_t will be used for all uio_t work to follow, 3100 * we save the original "uiop" in "suiop". 3101 */ 3102 suiop = uiop; 3103 uiop = (uio_t *)&sodp->sod_uioa; 3104 /* 3105 * Before returning to the caller the passed in uio_t 3106 * "uiop" will be updated via a call to uioafini() 3107 * below. 3108 * 3109 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED 3110 * here as first we have to uioamove() any currently 3111 * queued M_DATA mblk_t(s) so it will be done in 3112 * kstrgetmsg(). 3113 */ 3114 } 3115 /* 3116 * In either uioainit() success or not case note the number 3117 * of uio bytes the caller wants for sod framework and/or 3118 * transport (e.g. TCP) strategy. 3119 */ 3120 sodp->sod_want = uiop->uio_resid; 3121 mutex_exit(sodp->sod_lockp); 3122 } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { 3123 /* 3124 * No uioa but still using sodirect so note the number of 3125 * uio bytes the caller wants for sodirect framework and/or 3126 * transport (e.g. TCP) strategy. 3127 * 3128 * Note, sod_lockp not held, only writer is in this function 3129 * and only one thread at a time so not needed just to init. 3130 */ 3131 sodp->sod_want = uiop->uio_resid; 3132 } 3133 retry: 3134 saved_resid = uiop->uio_resid; 3135 pri = 0; 3136 mp = NULL; 3137 if (so->so_nl7c_rcv_mp != NULL) { 3138 /* Already kstrgetmsg()ed saved mblk(s) from NL7C */ 3139 error = nl7c_sorecv(so, &mp, uiop, &rval); 3140 } else { 3141 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3142 timout, &rval); 3143 } 3144 if (error) { 3145 switch (error) { 3146 case EINTR: 3147 case EWOULDBLOCK: 3148 if (!first) 3149 error = 0; 3150 break; 3151 case ETIME: 3152 /* Returned from kstrgetmsg when timeout expires */ 3153 if (!first) 3154 error = 0; 3155 else 3156 error = EWOULDBLOCK; 3157 break; 3158 default: 3159 eprintsoline(so, error); 3160 break; 3161 } 3162 goto out; 3163 } 3164 /* 3165 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3166 * For non-datagrams MOREDATA is used to set MSG_EOR. 3167 */ 3168 ASSERT(!(rval.r_val1 & MORECTL)); 3169 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3170 msg->msg_flags |= MSG_TRUNC; 3171 3172 if (mp == NULL) { 3173 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3174 /* 3175 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3176 * The draft Posix socket spec states that the mark should 3177 * not be cleared when peeking. We follow the latter. 3178 */ 3179 if ((so->so_state & 3180 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3181 (uiop->uio_resid != saved_resid) && 3182 !(flags & MSG_PEEK)) { 3183 sorecv_update_oobstate(so); 3184 } 3185 3186 mutex_enter(&so->so_lock); 3187 /* Set MSG_EOR based on MOREDATA */ 3188 if (!(rval.r_val1 & MOREDATA)) { 3189 if (so->so_state & SS_SAVEDEOR) { 3190 msg->msg_flags |= MSG_EOR; 3191 so->so_state &= ~SS_SAVEDEOR; 3192 } 3193 } 3194 /* 3195 * If some data was received (i.e. not EOF) and the 3196 * read/recv* has not been satisfied wait for some more. 3197 */ 3198 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3199 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3200 mutex_exit(&so->so_lock); 3201 first = 0; 3202 pflag = opflag | MSG_NOMARK; 3203 goto retry; 3204 } 3205 goto out_locked; 3206 } 3207 3208 /* strsock_proto has already verified length and alignment */ 3209 tpr = (union T_primitives *)mp->b_rptr; 3210 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3211 3212 switch (tpr->type) { 3213 case T_DATA_IND: { 3214 if ((so->so_state & 3215 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3216 (uiop->uio_resid != saved_resid) && 3217 !(flags & MSG_PEEK)) { 3218 sorecv_update_oobstate(so); 3219 } 3220 3221 /* 3222 * Set msg_flags to MSG_EOR based on 3223 * MORE_flag and MOREDATA. 3224 */ 3225 mutex_enter(&so->so_lock); 3226 so->so_state &= ~SS_SAVEDEOR; 3227 if (!(tpr->data_ind.MORE_flag & 1)) { 3228 if (!(rval.r_val1 & MOREDATA)) 3229 msg->msg_flags |= MSG_EOR; 3230 else 3231 so->so_state |= SS_SAVEDEOR; 3232 } 3233 freemsg(mp); 3234 /* 3235 * If some data was received (i.e. not EOF) and the 3236 * read/recv* has not been satisfied wait for some more. 3237 */ 3238 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3239 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3240 mutex_exit(&so->so_lock); 3241 first = 0; 3242 pflag = opflag | MSG_NOMARK; 3243 goto retry; 3244 } 3245 goto out_locked; 3246 } 3247 case T_UNITDATA_IND: { 3248 void *addr; 3249 t_uscalar_t addrlen; 3250 void *abuf; 3251 t_uscalar_t optlen; 3252 void *opt; 3253 3254 if ((so->so_state & 3255 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3256 (uiop->uio_resid != saved_resid) && 3257 !(flags & MSG_PEEK)) { 3258 sorecv_update_oobstate(so); 3259 } 3260 3261 if (namelen != 0) { 3262 /* Caller wants source address */ 3263 addrlen = tpr->unitdata_ind.SRC_length; 3264 addr = sogetoff(mp, 3265 tpr->unitdata_ind.SRC_offset, 3266 addrlen, 1); 3267 if (addr == NULL) { 3268 freemsg(mp); 3269 error = EPROTO; 3270 eprintsoline(so, error); 3271 goto out; 3272 } 3273 if (so->so_family == AF_UNIX) { 3274 /* 3275 * Can not use the transport level address. 3276 * If there is a SO_SRCADDR option carrying 3277 * the socket level address it will be 3278 * extracted below. 3279 */ 3280 addr = NULL; 3281 addrlen = 0; 3282 } 3283 } 3284 optlen = tpr->unitdata_ind.OPT_length; 3285 if (optlen != 0) { 3286 t_uscalar_t ncontrollen; 3287 3288 /* 3289 * Extract any source address option. 3290 * Determine how large cmsg buffer is needed. 3291 */ 3292 opt = sogetoff(mp, 3293 tpr->unitdata_ind.OPT_offset, 3294 optlen, __TPI_ALIGN_SIZE); 3295 3296 if (opt == NULL) { 3297 freemsg(mp); 3298 error = EPROTO; 3299 eprintsoline(so, error); 3300 goto out; 3301 } 3302 if (so->so_family == AF_UNIX) 3303 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3304 ncontrollen = so_cmsglen(mp, opt, optlen, 3305 !(flags & MSG_XPG4_2)); 3306 if (controllen != 0) 3307 controllen = ncontrollen; 3308 else if (ncontrollen != 0) 3309 msg->msg_flags |= MSG_CTRUNC; 3310 } else { 3311 controllen = 0; 3312 } 3313 3314 if (namelen != 0) { 3315 /* 3316 * Return address to caller. 3317 * Caller handles truncation if length 3318 * exceeds msg_namelen. 3319 * NOTE: AF_UNIX NUL termination is ensured by 3320 * the sender's copyin_name(). 3321 */ 3322 abuf = kmem_alloc(addrlen, KM_SLEEP); 3323 3324 bcopy(addr, abuf, addrlen); 3325 msg->msg_name = abuf; 3326 msg->msg_namelen = addrlen; 3327 } 3328 3329 if (controllen != 0) { 3330 /* 3331 * Return control msg to caller. 3332 * Caller handles truncation if length 3333 * exceeds msg_controllen. 3334 */ 3335 control = kmem_zalloc(controllen, KM_SLEEP); 3336 3337 error = so_opt2cmsg(mp, opt, optlen, 3338 !(flags & MSG_XPG4_2), 3339 control, controllen); 3340 if (error) { 3341 freemsg(mp); 3342 if (msg->msg_namelen != 0) 3343 kmem_free(msg->msg_name, 3344 msg->msg_namelen); 3345 kmem_free(control, controllen); 3346 eprintsoline(so, error); 3347 goto out; 3348 } 3349 msg->msg_control = control; 3350 msg->msg_controllen = controllen; 3351 } 3352 3353 freemsg(mp); 3354 goto out; 3355 } 3356 case T_OPTDATA_IND: { 3357 struct T_optdata_req *tdr; 3358 void *opt; 3359 t_uscalar_t optlen; 3360 3361 if ((so->so_state & 3362 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3363 (uiop->uio_resid != saved_resid) && 3364 !(flags & MSG_PEEK)) { 3365 sorecv_update_oobstate(so); 3366 } 3367 3368 tdr = (struct T_optdata_req *)mp->b_rptr; 3369 optlen = tdr->OPT_length; 3370 if (optlen != 0) { 3371 t_uscalar_t ncontrollen; 3372 /* 3373 * Determine how large cmsg buffer is needed. 3374 */ 3375 opt = sogetoff(mp, 3376 tpr->optdata_ind.OPT_offset, 3377 optlen, __TPI_ALIGN_SIZE); 3378 3379 if (opt == NULL) { 3380 freemsg(mp); 3381 error = EPROTO; 3382 eprintsoline(so, error); 3383 goto out; 3384 } 3385 3386 ncontrollen = so_cmsglen(mp, opt, optlen, 3387 !(flags & MSG_XPG4_2)); 3388 if (controllen != 0) 3389 controllen = ncontrollen; 3390 else if (ncontrollen != 0) 3391 msg->msg_flags |= MSG_CTRUNC; 3392 } else { 3393 controllen = 0; 3394 } 3395 3396 if (controllen != 0) { 3397 /* 3398 * Return control msg to caller. 3399 * Caller handles truncation if length 3400 * exceeds msg_controllen. 3401 */ 3402 control = kmem_zalloc(controllen, KM_SLEEP); 3403 3404 error = so_opt2cmsg(mp, opt, optlen, 3405 !(flags & MSG_XPG4_2), 3406 control, controllen); 3407 if (error) { 3408 freemsg(mp); 3409 kmem_free(control, controllen); 3410 eprintsoline(so, error); 3411 goto out; 3412 } 3413 msg->msg_control = control; 3414 msg->msg_controllen = controllen; 3415 } 3416 3417 /* 3418 * Set msg_flags to MSG_EOR based on 3419 * DATA_flag and MOREDATA. 3420 */ 3421 mutex_enter(&so->so_lock); 3422 so->so_state &= ~SS_SAVEDEOR; 3423 if (!(tpr->data_ind.MORE_flag & 1)) { 3424 if (!(rval.r_val1 & MOREDATA)) 3425 msg->msg_flags |= MSG_EOR; 3426 else 3427 so->so_state |= SS_SAVEDEOR; 3428 } 3429 freemsg(mp); 3430 /* 3431 * If some data was received (i.e. not EOF) and the 3432 * read/recv* has not been satisfied wait for some more. 3433 * Not possible to wait if control info was received. 3434 */ 3435 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3436 controllen == 0 && 3437 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3438 mutex_exit(&so->so_lock); 3439 first = 0; 3440 pflag = opflag | MSG_NOMARK; 3441 goto retry; 3442 } 3443 goto out_locked; 3444 } 3445 case T_EXDATA_IND: { 3446 dprintso(so, 1, 3447 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3448 "state %s\n", 3449 so->so_oobsigcnt, so->so_oobcnt, 3450 saved_resid - uiop->uio_resid, 3451 pr_state(so->so_state, so->so_mode))); 3452 /* 3453 * kstrgetmsg handles MSGMARK so there is nothing to 3454 * inspect in the T_EXDATA_IND. 3455 * strsock_proto makes the stream head queue the T_EXDATA_IND 3456 * as a separate message with no M_DATA component. Furthermore, 3457 * the stream head does not consolidate M_DATA messages onto 3458 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3459 * remains a message by itself. This is needed since MSGMARK 3460 * marks both the whole message as well as the last byte 3461 * of the message. 3462 */ 3463 freemsg(mp); 3464 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3465 if (flags & MSG_PEEK) { 3466 /* 3467 * Even though we are peeking we consume the 3468 * T_EXDATA_IND thereby moving the mark information 3469 * to SS_RCVATMARK. Then the oob code below will 3470 * retry the peeking kstrgetmsg. 3471 * Note that the stream head read queue is 3472 * never flushed without holding SOREADLOCKED 3473 * thus the T_EXDATA_IND can not disappear 3474 * underneath us. 3475 */ 3476 dprintso(so, 1, 3477 ("sotpi_recvmsg: consume EXDATA_IND " 3478 "counts %d/%d state %s\n", 3479 so->so_oobsigcnt, 3480 so->so_oobcnt, 3481 pr_state(so->so_state, so->so_mode))); 3482 3483 pflag = MSG_ANY | MSG_DELAYERROR; 3484 if (so->so_mode & SM_ATOMIC) 3485 pflag |= MSG_DISCARDTAIL; 3486 3487 pri = 0; 3488 mp = NULL; 3489 3490 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3491 &pri, &pflag, (clock_t)-1, &rval); 3492 ASSERT(uiop->uio_resid == saved_resid); 3493 3494 if (error) { 3495 #ifdef SOCK_DEBUG 3496 if (error != EWOULDBLOCK && error != EINTR) { 3497 eprintsoline(so, error); 3498 } 3499 #endif /* SOCK_DEBUG */ 3500 goto out; 3501 } 3502 ASSERT(mp); 3503 tpr = (union T_primitives *)mp->b_rptr; 3504 ASSERT(tpr->type == T_EXDATA_IND); 3505 freemsg(mp); 3506 } /* end "if (flags & MSG_PEEK)" */ 3507 3508 /* 3509 * Decrement the number of queued and pending oob. 3510 * 3511 * SS_RCVATMARK is cleared when we read past a mark. 3512 * SS_HAVEOOBDATA is cleared when we've read past the 3513 * last mark. 3514 * SS_OOBPEND is cleared if we've read past the last 3515 * mark and no (new) SIGURG has been posted. 3516 */ 3517 mutex_enter(&so->so_lock); 3518 ASSERT(so_verify_oobstate(so)); 3519 ASSERT(so->so_oobsigcnt >= so->so_oobcnt); 3520 ASSERT(so->so_oobsigcnt > 0); 3521 so->so_oobsigcnt--; 3522 ASSERT(so->so_oobcnt > 0); 3523 so->so_oobcnt--; 3524 /* 3525 * Since the T_EXDATA_IND has been removed from the stream 3526 * head, but we have not read data past the mark, 3527 * sockfs needs to track that the socket is still at the mark. 3528 * 3529 * Since no data was received call kstrgetmsg again to wait 3530 * for data. 3531 */ 3532 so->so_state |= SS_RCVATMARK; 3533 mutex_exit(&so->so_lock); 3534 dprintso(so, 1, 3535 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3536 so->so_oobsigcnt, so->so_oobcnt, 3537 pr_state(so->so_state, so->so_mode))); 3538 pflag = opflag; 3539 goto retry; 3540 } 3541 default: 3542 ASSERT(0); 3543 freemsg(mp); 3544 error = EPROTO; 3545 eprintsoline(so, error); 3546 goto out; 3547 } 3548 /* NOTREACHED */ 3549 out: 3550 mutex_enter(&so->so_lock); 3551 out_locked: 3552 if (sodp != NULL) { 3553 /* Finish any sodirect and uioa processing */ 3554 mutex_enter(sodp->sod_lockp); 3555 if (suiop != NULL) { 3556 /* Finish any uioa_t processing */ 3557 int ret; 3558 3559 ASSERT(uiop == (uio_t *)&sodp->sod_uioa); 3560 ret = uioafini(suiop, (uioa_t *)uiop); 3561 if (error == 0 && ret != 0) { 3562 /* If no error yet, set it */ 3563 error = ret; 3564 } 3565 if ((mp = sodp->sod_uioafh) != NULL) { 3566 sodp->sod_uioafh = NULL; 3567 sodp->sod_uioaft = NULL; 3568 freemsg(mp); 3569 } 3570 } 3571 ASSERT(sodp->sod_uioafh == NULL); 3572 if (!(sodp->sod_state & SOD_WAKE_NOT)) { 3573 /* Awoke */ 3574 sodp->sod_state &= SOD_WAKE_CLR; 3575 sodp->sod_state |= SOD_WAKE_NOT; 3576 } 3577 /* Last, clear sod_want value */ 3578 sodp->sod_want = 0; 3579 mutex_exit(sodp->sod_lockp); 3580 } 3581 so_unlock_read(so); /* Clear SOREADLOCKED */ 3582 mutex_exit(&so->so_lock); 3583 return (error); 3584 } 3585 3586 /* 3587 * Sending data with options on a datagram socket. 3588 * Assumes caller has verified that SS_ISBOUND etc. are set. 3589 */ 3590 static int 3591 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3592 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3593 { 3594 struct T_unitdata_req tudr; 3595 mblk_t *mp; 3596 int error; 3597 void *addr; 3598 socklen_t addrlen; 3599 void *src; 3600 socklen_t srclen; 3601 ssize_t len; 3602 int size; 3603 struct T_opthdr toh; 3604 struct fdbuf *fdbuf; 3605 t_uscalar_t optlen; 3606 void *fds; 3607 int fdlen; 3608 3609 ASSERT(name && namelen); 3610 ASSERT(control && controllen); 3611 3612 len = uiop->uio_resid; 3613 if (len > (ssize_t)so->so_tidu_size) { 3614 return (EMSGSIZE); 3615 } 3616 3617 /* 3618 * For AF_UNIX the destination address is translated to an internal 3619 * name and the source address is passed as an option. 3620 * Also, file descriptors are passed as file pointers in an 3621 * option. 3622 */ 3623 3624 /* 3625 * Length and family checks. 3626 */ 3627 error = so_addr_verify(so, name, namelen); 3628 if (error) { 3629 eprintsoline(so, error); 3630 return (error); 3631 } 3632 if (so->so_family == AF_UNIX) { 3633 if (so->so_state & SS_FADDR_NOXLATE) { 3634 /* 3635 * Already have a transport internal address. Do not 3636 * pass any (transport internal) source address. 3637 */ 3638 addr = name; 3639 addrlen = namelen; 3640 src = NULL; 3641 srclen = 0; 3642 } else { 3643 /* 3644 * Pass the sockaddr_un source address as an option 3645 * and translate the remote address. 3646 * 3647 * Note that this code does not prevent so_laddr_sa 3648 * from changing while it is being used. Thus 3649 * if an unbind+bind occurs concurrently with this 3650 * send the peer might see a partially new and a 3651 * partially old "from" address. 3652 */ 3653 src = so->so_laddr_sa; 3654 srclen = (t_uscalar_t)so->so_laddr_len; 3655 dprintso(so, 1, 3656 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3657 srclen, src)); 3658 error = so_ux_addr_xlate(so, name, namelen, 3659 (flags & MSG_XPG4_2), 3660 &addr, &addrlen); 3661 if (error) { 3662 eprintsoline(so, error); 3663 return (error); 3664 } 3665 } 3666 } else { 3667 addr = name; 3668 addrlen = namelen; 3669 src = NULL; 3670 srclen = 0; 3671 } 3672 optlen = so_optlen(control, controllen, 3673 !(flags & MSG_XPG4_2)); 3674 tudr.PRIM_type = T_UNITDATA_REQ; 3675 tudr.DEST_length = addrlen; 3676 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3677 if (srclen != 0) 3678 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3679 _TPI_ALIGN_TOPT(srclen)); 3680 else 3681 tudr.OPT_length = optlen; 3682 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3683 _TPI_ALIGN_TOPT(addrlen)); 3684 3685 size = tudr.OPT_offset + tudr.OPT_length; 3686 3687 /* 3688 * File descriptors only when SM_FDPASSING set. 3689 */ 3690 error = so_getfdopt(control, controllen, 3691 !(flags & MSG_XPG4_2), &fds, &fdlen); 3692 if (error) 3693 return (error); 3694 if (fdlen != -1) { 3695 if (!(so->so_mode & SM_FDPASSING)) 3696 return (EOPNOTSUPP); 3697 3698 error = fdbuf_create(fds, fdlen, &fdbuf); 3699 if (error) 3700 return (error); 3701 mp = fdbuf_allocmsg(size, fdbuf); 3702 } else { 3703 mp = soallocproto(size, _ALLOC_INTR); 3704 if (mp == NULL) { 3705 /* 3706 * Caught a signal waiting for memory. 3707 * Let send* return EINTR. 3708 */ 3709 return (EINTR); 3710 } 3711 } 3712 soappendmsg(mp, &tudr, sizeof (tudr)); 3713 soappendmsg(mp, addr, addrlen); 3714 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3715 3716 if (fdlen != -1) { 3717 ASSERT(fdbuf != NULL); 3718 toh.level = SOL_SOCKET; 3719 toh.name = SO_FILEP; 3720 toh.len = fdbuf->fd_size + 3721 (t_uscalar_t)sizeof (struct T_opthdr); 3722 toh.status = 0; 3723 soappendmsg(mp, &toh, sizeof (toh)); 3724 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3725 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3726 } 3727 if (srclen != 0) { 3728 /* 3729 * There is a AF_UNIX sockaddr_un to include as a source 3730 * address option. 3731 */ 3732 toh.level = SOL_SOCKET; 3733 toh.name = SO_SRCADDR; 3734 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3735 toh.status = 0; 3736 soappendmsg(mp, &toh, sizeof (toh)); 3737 soappendmsg(mp, src, srclen); 3738 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3739 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3740 } 3741 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3742 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3743 /* At most 3 bytes left in the message */ 3744 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3745 ASSERT(MBLKL(mp) <= (ssize_t)size); 3746 3747 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3748 if (audit_active) 3749 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3750 3751 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3752 #ifdef SOCK_DEBUG 3753 if (error) { 3754 eprintsoline(so, error); 3755 } 3756 #endif /* SOCK_DEBUG */ 3757 return (error); 3758 } 3759 3760 /* 3761 * Sending data with options on a connected stream socket. 3762 * Assumes caller has verified that SS_ISCONNECTED is set. 3763 */ 3764 static int 3765 sosend_svccmsg(struct sonode *so, 3766 struct uio *uiop, 3767 int more, 3768 void *control, 3769 t_uscalar_t controllen, 3770 int flags) 3771 { 3772 struct T_optdata_req tdr; 3773 mblk_t *mp; 3774 int error; 3775 ssize_t iosize; 3776 int first = 1; 3777 int size; 3778 struct fdbuf *fdbuf; 3779 t_uscalar_t optlen; 3780 void *fds; 3781 int fdlen; 3782 struct T_opthdr toh; 3783 3784 dprintso(so, 1, 3785 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3786 3787 /* 3788 * Has to be bound and connected. However, since no locks are 3789 * held the state could have changed after sotpi_sendmsg checked it 3790 * thus it is not possible to ASSERT on the state. 3791 */ 3792 3793 /* Options on connection-oriented only when SM_OPTDATA set. */ 3794 if (!(so->so_mode & SM_OPTDATA)) 3795 return (EOPNOTSUPP); 3796 3797 do { 3798 /* 3799 * Set the MORE flag if uio_resid does not fit in this 3800 * message or if the caller passed in "more". 3801 * Error for transports with zero tidu_size. 3802 */ 3803 tdr.PRIM_type = T_OPTDATA_REQ; 3804 iosize = so->so_tidu_size; 3805 if (iosize <= 0) 3806 return (EMSGSIZE); 3807 if (uiop->uio_resid > iosize) { 3808 tdr.DATA_flag = 1; 3809 } else { 3810 if (more) 3811 tdr.DATA_flag = 1; 3812 else 3813 tdr.DATA_flag = 0; 3814 iosize = uiop->uio_resid; 3815 } 3816 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3817 tdr.DATA_flag, iosize)); 3818 3819 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3820 tdr.OPT_length = optlen; 3821 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3822 3823 size = (int)sizeof (tdr) + optlen; 3824 /* 3825 * File descriptors only when SM_FDPASSING set. 3826 */ 3827 error = so_getfdopt(control, controllen, 3828 !(flags & MSG_XPG4_2), &fds, &fdlen); 3829 if (error) 3830 return (error); 3831 if (fdlen != -1) { 3832 if (!(so->so_mode & SM_FDPASSING)) 3833 return (EOPNOTSUPP); 3834 3835 error = fdbuf_create(fds, fdlen, &fdbuf); 3836 if (error) 3837 return (error); 3838 mp = fdbuf_allocmsg(size, fdbuf); 3839 } else { 3840 mp = soallocproto(size, _ALLOC_INTR); 3841 if (mp == NULL) { 3842 /* 3843 * Caught a signal waiting for memory. 3844 * Let send* return EINTR. 3845 */ 3846 return (first ? EINTR : 0); 3847 } 3848 } 3849 soappendmsg(mp, &tdr, sizeof (tdr)); 3850 3851 if (fdlen != -1) { 3852 ASSERT(fdbuf != NULL); 3853 toh.level = SOL_SOCKET; 3854 toh.name = SO_FILEP; 3855 toh.len = fdbuf->fd_size + 3856 (t_uscalar_t)sizeof (struct T_opthdr); 3857 toh.status = 0; 3858 soappendmsg(mp, &toh, sizeof (toh)); 3859 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3860 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3861 } 3862 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3863 /* At most 3 bytes left in the message */ 3864 ASSERT(MBLKL(mp) > (ssize_t)(size - __TPI_ALIGN_SIZE)); 3865 ASSERT(MBLKL(mp) <= (ssize_t)size); 3866 3867 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3868 3869 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3870 0, MSG_BAND, 0); 3871 if (error) { 3872 if (!first && error == EWOULDBLOCK) 3873 return (0); 3874 eprintsoline(so, error); 3875 return (error); 3876 } 3877 control = NULL; 3878 first = 0; 3879 if (uiop->uio_resid > 0) { 3880 /* 3881 * Recheck for fatal errors. Fail write even though 3882 * some data have been written. This is consistent 3883 * with strwrite semantics and BSD sockets semantics. 3884 */ 3885 if (so->so_state & SS_CANTSENDMORE) { 3886 tsignal(curthread, SIGPIPE); 3887 eprintsoline(so, error); 3888 return (EPIPE); 3889 } 3890 if (so->so_error != 0) { 3891 mutex_enter(&so->so_lock); 3892 error = sogeterr(so); 3893 mutex_exit(&so->so_lock); 3894 if (error != 0) { 3895 eprintsoline(so, error); 3896 return (error); 3897 } 3898 } 3899 } 3900 } while (uiop->uio_resid > 0); 3901 return (0); 3902 } 3903 3904 /* 3905 * Sending data on a datagram socket. 3906 * Assumes caller has verified that SS_ISBOUND etc. are set. 3907 * 3908 * For AF_UNIX the destination address is translated to an internal 3909 * name and the source address is passed as an option. 3910 */ 3911 int 3912 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3913 struct uio *uiop, int flags) 3914 { 3915 struct T_unitdata_req tudr; 3916 mblk_t *mp; 3917 int error; 3918 void *addr; 3919 socklen_t addrlen; 3920 void *src; 3921 socklen_t srclen; 3922 ssize_t len; 3923 3924 ASSERT(name != NULL && namelen != 0); 3925 3926 len = uiop->uio_resid; 3927 if (len > so->so_tidu_size) { 3928 error = EMSGSIZE; 3929 goto done; 3930 } 3931 3932 /* Length and family checks */ 3933 error = so_addr_verify(so, name, namelen); 3934 if (error != 0) 3935 goto done; 3936 3937 if (so->so_state & SS_DIRECT) 3938 return (sodgram_direct(so, name, namelen, uiop, flags)); 3939 3940 if (so->so_family == AF_UNIX) { 3941 if (so->so_state & SS_FADDR_NOXLATE) { 3942 /* 3943 * Already have a transport internal address. Do not 3944 * pass any (transport internal) source address. 3945 */ 3946 addr = name; 3947 addrlen = namelen; 3948 src = NULL; 3949 srclen = 0; 3950 } else { 3951 /* 3952 * Pass the sockaddr_un source address as an option 3953 * and translate the remote address. 3954 * 3955 * Note that this code does not prevent so_laddr_sa 3956 * from changing while it is being used. Thus 3957 * if an unbind+bind occurs concurrently with this 3958 * send the peer might see a partially new and a 3959 * partially old "from" address. 3960 */ 3961 src = so->so_laddr_sa; 3962 srclen = (socklen_t)so->so_laddr_len; 3963 dprintso(so, 1, 3964 ("sosend_dgram UNIX: srclen %d, src %p\n", 3965 srclen, src)); 3966 error = so_ux_addr_xlate(so, name, namelen, 3967 (flags & MSG_XPG4_2), 3968 &addr, &addrlen); 3969 if (error) { 3970 eprintsoline(so, error); 3971 goto done; 3972 } 3973 } 3974 } else { 3975 addr = name; 3976 addrlen = namelen; 3977 src = NULL; 3978 srclen = 0; 3979 } 3980 tudr.PRIM_type = T_UNITDATA_REQ; 3981 tudr.DEST_length = addrlen; 3982 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3983 if (srclen == 0) { 3984 tudr.OPT_length = 0; 3985 tudr.OPT_offset = 0; 3986 3987 mp = soallocproto2(&tudr, sizeof (tudr), 3988 addr, addrlen, 0, _ALLOC_INTR); 3989 if (mp == NULL) { 3990 /* 3991 * Caught a signal waiting for memory. 3992 * Let send* return EINTR. 3993 */ 3994 error = EINTR; 3995 goto done; 3996 } 3997 } else { 3998 /* 3999 * There is a AF_UNIX sockaddr_un to include as a source 4000 * address option. 4001 */ 4002 struct T_opthdr toh; 4003 ssize_t size; 4004 4005 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 4006 _TPI_ALIGN_TOPT(srclen)); 4007 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 4008 _TPI_ALIGN_TOPT(addrlen)); 4009 4010 toh.level = SOL_SOCKET; 4011 toh.name = SO_SRCADDR; 4012 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 4013 toh.status = 0; 4014 4015 size = tudr.OPT_offset + tudr.OPT_length; 4016 mp = soallocproto2(&tudr, sizeof (tudr), 4017 addr, addrlen, size, _ALLOC_INTR); 4018 if (mp == NULL) { 4019 /* 4020 * Caught a signal waiting for memory. 4021 * Let send* return EINTR. 4022 */ 4023 error = EINTR; 4024 goto done; 4025 } 4026 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 4027 soappendmsg(mp, &toh, sizeof (toh)); 4028 soappendmsg(mp, src, srclen); 4029 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 4030 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 4031 } 4032 4033 if (audit_active) 4034 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4035 4036 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4037 done: 4038 #ifdef SOCK_DEBUG 4039 if (error) { 4040 eprintsoline(so, error); 4041 } 4042 #endif /* SOCK_DEBUG */ 4043 return (error); 4044 } 4045 4046 /* 4047 * Sending data on a connected stream socket. 4048 * Assumes caller has verified that SS_ISCONNECTED is set. 4049 */ 4050 int 4051 sosend_svc(struct sonode *so, 4052 struct uio *uiop, 4053 t_scalar_t prim, 4054 int more, 4055 int sflag) 4056 { 4057 struct T_data_req tdr; 4058 mblk_t *mp; 4059 int error; 4060 ssize_t iosize; 4061 int first = 1; 4062 4063 dprintso(so, 1, 4064 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 4065 (void *)so, uiop->uio_resid, prim, sflag)); 4066 4067 /* 4068 * Has to be bound and connected. However, since no locks are 4069 * held the state could have changed after sotpi_sendmsg checked it 4070 * thus it is not possible to ASSERT on the state. 4071 */ 4072 4073 do { 4074 /* 4075 * Set the MORE flag if uio_resid does not fit in this 4076 * message or if the caller passed in "more". 4077 * Error for transports with zero tidu_size. 4078 */ 4079 tdr.PRIM_type = prim; 4080 iosize = so->so_tidu_size; 4081 if (iosize <= 0) 4082 return (EMSGSIZE); 4083 if (uiop->uio_resid > iosize) { 4084 tdr.MORE_flag = 1; 4085 } else { 4086 if (more) 4087 tdr.MORE_flag = 1; 4088 else 4089 tdr.MORE_flag = 0; 4090 iosize = uiop->uio_resid; 4091 } 4092 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4093 prim, tdr.MORE_flag, iosize)); 4094 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR); 4095 if (mp == NULL) { 4096 /* 4097 * Caught a signal waiting for memory. 4098 * Let send* return EINTR. 4099 */ 4100 if (first) 4101 return (EINTR); 4102 else 4103 return (0); 4104 } 4105 4106 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4107 0, sflag | MSG_BAND, 0); 4108 if (error) { 4109 if (!first && error == EWOULDBLOCK) 4110 return (0); 4111 eprintsoline(so, error); 4112 return (error); 4113 } 4114 first = 0; 4115 if (uiop->uio_resid > 0) { 4116 /* 4117 * Recheck for fatal errors. Fail write even though 4118 * some data have been written. This is consistent 4119 * with strwrite semantics and BSD sockets semantics. 4120 */ 4121 if (so->so_state & SS_CANTSENDMORE) { 4122 tsignal(curthread, SIGPIPE); 4123 eprintsoline(so, error); 4124 return (EPIPE); 4125 } 4126 if (so->so_error != 0) { 4127 mutex_enter(&so->so_lock); 4128 error = sogeterr(so); 4129 mutex_exit(&so->so_lock); 4130 if (error != 0) { 4131 eprintsoline(so, error); 4132 return (error); 4133 } 4134 } 4135 } 4136 } while (uiop->uio_resid > 0); 4137 return (0); 4138 } 4139 4140 /* 4141 * Check the state for errors and call the appropriate send function. 4142 * 4143 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4144 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4145 * after sending the message. 4146 */ 4147 static int 4148 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop) 4149 { 4150 int so_state; 4151 int so_mode; 4152 int error; 4153 struct sockaddr *name; 4154 t_uscalar_t namelen; 4155 int dontroute; 4156 int flags; 4157 4158 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4159 (void *)so, (void *)msg, msg->msg_flags, 4160 pr_state(so->so_state, so->so_mode), so->so_error)); 4161 4162 mutex_enter(&so->so_lock); 4163 so_state = so->so_state; 4164 4165 if (so_state & SS_CANTSENDMORE) { 4166 mutex_exit(&so->so_lock); 4167 tsignal(curthread, SIGPIPE); 4168 return (EPIPE); 4169 } 4170 4171 if (so->so_error != 0) { 4172 error = sogeterr(so); 4173 if (error != 0) { 4174 mutex_exit(&so->so_lock); 4175 return (error); 4176 } 4177 } 4178 4179 name = (struct sockaddr *)msg->msg_name; 4180 namelen = msg->msg_namelen; 4181 4182 so_mode = so->so_mode; 4183 4184 if (name == NULL) { 4185 if (!(so_state & SS_ISCONNECTED)) { 4186 mutex_exit(&so->so_lock); 4187 if (so_mode & SM_CONNREQUIRED) 4188 return (ENOTCONN); 4189 else 4190 return (EDESTADDRREQ); 4191 } 4192 if (so_mode & SM_CONNREQUIRED) { 4193 name = NULL; 4194 namelen = 0; 4195 } else { 4196 /* 4197 * Note that this code does not prevent so_faddr_sa 4198 * from changing while it is being used. Thus 4199 * if an "unconnect"+connect occurs concurrently with 4200 * this send the datagram might be delivered to a 4201 * garbaled address. 4202 */ 4203 ASSERT(so->so_faddr_sa); 4204 name = so->so_faddr_sa; 4205 namelen = (t_uscalar_t)so->so_faddr_len; 4206 } 4207 } else { 4208 if (!(so_state & SS_ISCONNECTED) && 4209 (so_mode & SM_CONNREQUIRED)) { 4210 /* Required but not connected */ 4211 mutex_exit(&so->so_lock); 4212 return (ENOTCONN); 4213 } 4214 /* 4215 * Ignore the address on connection-oriented sockets. 4216 * Just like BSD this code does not generate an error for 4217 * TCP (a CONNREQUIRED socket) when sending to an address 4218 * passed in with sendto/sendmsg. Instead the data is 4219 * delivered on the connection as if no address had been 4220 * supplied. 4221 */ 4222 if ((so_state & SS_ISCONNECTED) && 4223 !(so_mode & SM_CONNREQUIRED)) { 4224 mutex_exit(&so->so_lock); 4225 return (EISCONN); 4226 } 4227 if (!(so_state & SS_ISBOUND)) { 4228 so_lock_single(so); /* Set SOLOCKED */ 4229 error = sotpi_bind(so, NULL, 0, 4230 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD); 4231 so_unlock_single(so, SOLOCKED); 4232 if (error) { 4233 mutex_exit(&so->so_lock); 4234 eprintsoline(so, error); 4235 return (error); 4236 } 4237 } 4238 /* 4239 * Handle delayed datagram errors. These are only queued 4240 * when the application sets SO_DGRAM_ERRIND. 4241 * Return the error if we are sending to the address 4242 * that was returned in the last T_UDERROR_IND. 4243 * If sending to some other address discard the delayed 4244 * error indication. 4245 */ 4246 if (so->so_delayed_error) { 4247 struct T_uderror_ind *tudi; 4248 void *addr; 4249 t_uscalar_t addrlen; 4250 boolean_t match = B_FALSE; 4251 4252 ASSERT(so->so_eaddr_mp); 4253 error = so->so_delayed_error; 4254 so->so_delayed_error = 0; 4255 tudi = (struct T_uderror_ind *)so->so_eaddr_mp->b_rptr; 4256 addrlen = tudi->DEST_length; 4257 addr = sogetoff(so->so_eaddr_mp, 4258 tudi->DEST_offset, 4259 addrlen, 1); 4260 ASSERT(addr); /* Checked by strsock_proto */ 4261 switch (so->so_family) { 4262 case AF_INET: { 4263 /* Compare just IP address and port */ 4264 sin_t *sin1 = (sin_t *)name; 4265 sin_t *sin2 = (sin_t *)addr; 4266 4267 if (addrlen == sizeof (sin_t) && 4268 namelen == addrlen && 4269 sin1->sin_port == sin2->sin_port && 4270 sin1->sin_addr.s_addr == 4271 sin2->sin_addr.s_addr) 4272 match = B_TRUE; 4273 break; 4274 } 4275 case AF_INET6: { 4276 /* Compare just IP address and port. Not flow */ 4277 sin6_t *sin1 = (sin6_t *)name; 4278 sin6_t *sin2 = (sin6_t *)addr; 4279 4280 if (addrlen == sizeof (sin6_t) && 4281 namelen == addrlen && 4282 sin1->sin6_port == sin2->sin6_port && 4283 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4284 &sin2->sin6_addr)) 4285 match = B_TRUE; 4286 break; 4287 } 4288 case AF_UNIX: 4289 default: 4290 if (namelen == addrlen && 4291 bcmp(name, addr, namelen) == 0) 4292 match = B_TRUE; 4293 } 4294 if (match) { 4295 freemsg(so->so_eaddr_mp); 4296 so->so_eaddr_mp = NULL; 4297 mutex_exit(&so->so_lock); 4298 #ifdef DEBUG 4299 dprintso(so, 0, 4300 ("sockfs delayed error %d for %s\n", 4301 error, 4302 pr_addr(so->so_family, name, namelen))); 4303 #endif /* DEBUG */ 4304 return (error); 4305 } 4306 freemsg(so->so_eaddr_mp); 4307 so->so_eaddr_mp = NULL; 4308 } 4309 } 4310 mutex_exit(&so->so_lock); 4311 4312 flags = msg->msg_flags; 4313 dontroute = 0; 4314 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4315 uint32_t val; 4316 4317 val = 1; 4318 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4319 &val, (t_uscalar_t)sizeof (val)); 4320 if (error) 4321 return (error); 4322 dontroute = 1; 4323 } 4324 4325 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4326 error = EOPNOTSUPP; 4327 goto done; 4328 } 4329 if (msg->msg_controllen != 0) { 4330 if (!(so_mode & SM_CONNREQUIRED)) { 4331 error = sosend_dgramcmsg(so, name, namelen, uiop, 4332 msg->msg_control, msg->msg_controllen, flags); 4333 } else { 4334 if (flags & MSG_OOB) { 4335 /* Can't generate T_EXDATA_REQ with options */ 4336 error = EOPNOTSUPP; 4337 goto done; 4338 } 4339 error = sosend_svccmsg(so, uiop, 4340 !(flags & MSG_EOR), 4341 msg->msg_control, msg->msg_controllen, 4342 flags); 4343 } 4344 goto done; 4345 } 4346 4347 if (!(so_mode & SM_CONNREQUIRED)) { 4348 /* 4349 * If there is no SO_DONTROUTE to turn off return immediately 4350 * from send_dgram. This can allow tail-call optimizations. 4351 */ 4352 if (!dontroute) { 4353 return (sosend_dgram(so, name, namelen, uiop, flags)); 4354 } 4355 error = sosend_dgram(so, name, namelen, uiop, flags); 4356 } else { 4357 t_scalar_t prim; 4358 int sflag; 4359 4360 /* Ignore msg_name in the connected state */ 4361 if (flags & MSG_OOB) { 4362 prim = T_EXDATA_REQ; 4363 /* 4364 * Send down T_EXDATA_REQ even if there is flow 4365 * control for data. 4366 */ 4367 sflag = MSG_IGNFLOW; 4368 } else { 4369 if (so_mode & SM_BYTESTREAM) { 4370 /* Byte stream transport - use write */ 4371 4372 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4373 /* 4374 * If there is no SO_DONTROUTE to turn off, 4375 * SS_DIRECT is on, and there is no flow 4376 * control, we can take the fast path. 4377 */ 4378 if (!dontroute && 4379 (so_state & SS_DIRECT) && 4380 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4381 return (sostream_direct(so, uiop, 4382 NULL, CRED())); 4383 } 4384 error = strwrite(SOTOV(so), uiop, CRED()); 4385 goto done; 4386 } 4387 prim = T_DATA_REQ; 4388 sflag = 0; 4389 } 4390 /* 4391 * If there is no SO_DONTROUTE to turn off return immediately 4392 * from sosend_svc. This can allow tail-call optimizations. 4393 */ 4394 if (!dontroute) 4395 return (sosend_svc(so, uiop, prim, 4396 !(flags & MSG_EOR), sflag)); 4397 error = sosend_svc(so, uiop, prim, 4398 !(flags & MSG_EOR), sflag); 4399 } 4400 ASSERT(dontroute); 4401 done: 4402 if (dontroute) { 4403 uint32_t val; 4404 4405 val = 0; 4406 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4407 &val, (t_uscalar_t)sizeof (val)); 4408 } 4409 return (error); 4410 } 4411 4412 /* 4413 * Sending data on a datagram socket. 4414 * Assumes caller has verified that SS_ISBOUND etc. are set. 4415 */ 4416 /* ARGSUSED */ 4417 static int 4418 sodgram_direct(struct sonode *so, struct sockaddr *name, 4419 socklen_t namelen, struct uio *uiop, int flags) 4420 { 4421 struct T_unitdata_req tudr; 4422 mblk_t *mp = NULL; 4423 int error = 0; 4424 void *addr; 4425 socklen_t addrlen; 4426 ssize_t len; 4427 struct stdata *stp = SOTOV(so)->v_stream; 4428 int so_state; 4429 queue_t *udp_wq; 4430 boolean_t connected; 4431 mblk_t *mpdata = NULL; 4432 4433 ASSERT(name != NULL && namelen != 0); 4434 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4435 ASSERT(!(so->so_mode & SM_EXDATA)); 4436 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4437 ASSERT(SOTOV(so)->v_type == VSOCK); 4438 4439 /* Caller checked for proper length */ 4440 len = uiop->uio_resid; 4441 ASSERT(len <= so->so_tidu_size); 4442 4443 /* Length and family checks have been done by caller */ 4444 ASSERT(name->sa_family == so->so_family); 4445 ASSERT(so->so_family == AF_INET || 4446 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4447 ASSERT(so->so_family == AF_INET6 || 4448 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4449 4450 addr = name; 4451 addrlen = namelen; 4452 4453 if (stp->sd_sidp != NULL && 4454 (error = straccess(stp, JCWRITE)) != 0) 4455 goto done; 4456 4457 so_state = so->so_state; 4458 4459 connected = so_state & SS_ISCONNECTED; 4460 if (!connected) { 4461 tudr.PRIM_type = T_UNITDATA_REQ; 4462 tudr.DEST_length = addrlen; 4463 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4464 tudr.OPT_length = 0; 4465 tudr.OPT_offset = 0; 4466 4467 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4468 _ALLOC_INTR); 4469 if (mp == NULL) { 4470 /* 4471 * Caught a signal waiting for memory. 4472 * Let send* return EINTR. 4473 */ 4474 error = EINTR; 4475 goto done; 4476 } 4477 } 4478 4479 /* 4480 * For UDP we don't break up the copyin into smaller pieces 4481 * as in the TCP case. That means if ENOMEM is returned by 4482 * mcopyinuio() then the uio vector has not been modified at 4483 * all and we fallback to either strwrite() or kstrputmsg() 4484 * below. Note also that we never generate priority messages 4485 * from here. 4486 */ 4487 udp_wq = stp->sd_wrq->q_next; 4488 if (canput(udp_wq) && 4489 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4490 ASSERT(DB_TYPE(mpdata) == M_DATA); 4491 ASSERT(uiop->uio_resid == 0); 4492 if (!connected) 4493 linkb(mp, mpdata); 4494 else 4495 mp = mpdata; 4496 if (audit_active) 4497 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4498 4499 udp_wput(udp_wq, mp); 4500 return (0); 4501 } 4502 4503 ASSERT(mpdata == NULL); 4504 if (error != 0 && error != ENOMEM) { 4505 freemsg(mp); 4506 return (error); 4507 } 4508 4509 /* 4510 * For connected, let strwrite() handle the blocking case. 4511 * Otherwise we fall thru and use kstrputmsg(). 4512 */ 4513 if (connected) 4514 return (strwrite(SOTOV(so), uiop, CRED())); 4515 4516 if (audit_active) 4517 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4518 4519 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4520 done: 4521 #ifdef SOCK_DEBUG 4522 if (error != 0) { 4523 eprintsoline(so, error); 4524 } 4525 #endif /* SOCK_DEBUG */ 4526 return (error); 4527 } 4528 4529 int 4530 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4531 { 4532 struct stdata *stp = SOTOV(so)->v_stream; 4533 ssize_t iosize, rmax, maxblk; 4534 queue_t *tcp_wq = stp->sd_wrq->q_next; 4535 mblk_t *newmp; 4536 int error = 0, wflag = 0; 4537 4538 ASSERT(so->so_mode & SM_BYTESTREAM); 4539 ASSERT(SOTOV(so)->v_type == VSOCK); 4540 4541 if (stp->sd_sidp != NULL && 4542 (error = straccess(stp, JCWRITE)) != 0) 4543 return (error); 4544 4545 if (uiop == NULL) { 4546 /* 4547 * kstrwritemp() should have checked sd_flag and 4548 * flow-control before coming here. If we end up 4549 * here it means that we can simply pass down the 4550 * data to tcp. 4551 */ 4552 ASSERT(mp != NULL); 4553 if (stp->sd_wputdatafunc != NULL) { 4554 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4555 NULL, NULL, NULL); 4556 if (newmp == NULL) { 4557 /* The caller will free mp */ 4558 return (ECOMM); 4559 } 4560 mp = newmp; 4561 } 4562 tcp_wput(tcp_wq, mp); 4563 return (0); 4564 } 4565 4566 /* Fallback to strwrite() to do proper error handling */ 4567 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4568 return (strwrite(SOTOV(so), uiop, cr)); 4569 4570 rmax = stp->sd_qn_maxpsz; 4571 ASSERT(rmax >= 0 || rmax == INFPSZ); 4572 if (rmax == 0 || uiop->uio_resid <= 0) 4573 return (0); 4574 4575 if (rmax == INFPSZ) 4576 rmax = uiop->uio_resid; 4577 4578 maxblk = stp->sd_maxblk; 4579 4580 for (;;) { 4581 iosize = MIN(uiop->uio_resid, rmax); 4582 4583 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4584 if (mp == NULL) { 4585 /* 4586 * Fallback to strwrite() for ENOMEM; if this 4587 * is our first time in this routine and the uio 4588 * vector has not been modified, we will end up 4589 * calling strwrite() without any flag set. 4590 */ 4591 if (error == ENOMEM) 4592 goto slow_send; 4593 else 4594 return (error); 4595 } 4596 ASSERT(uiop->uio_resid >= 0); 4597 /* 4598 * If mp is non-NULL and ENOMEM is set, it means that 4599 * mcopyinuio() was able to break down some of the user 4600 * data into one or more mblks. Send the partial data 4601 * to tcp and let the rest be handled in strwrite(). 4602 */ 4603 ASSERT(error == 0 || error == ENOMEM); 4604 if (stp->sd_wputdatafunc != NULL) { 4605 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4606 NULL, NULL, NULL); 4607 if (newmp == NULL) { 4608 /* The caller will free mp */ 4609 return (ECOMM); 4610 } 4611 mp = newmp; 4612 } 4613 tcp_wput(tcp_wq, mp); 4614 4615 wflag |= NOINTR; 4616 4617 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4618 ASSERT(error == 0); 4619 break; 4620 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4621 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4622 slow_send: 4623 /* 4624 * We were able to send down partial data using 4625 * the direct call interface, but are now relying 4626 * on strwrite() to handle the non-fastpath cases. 4627 * If the socket is blocking we will sleep in 4628 * strwaitq() until write is permitted, otherwise, 4629 * we will need to return the amount of bytes 4630 * written so far back to the app. This is the 4631 * reason why we pass NOINTR flag to strwrite() 4632 * for non-blocking socket, because we don't want 4633 * to return EAGAIN when portion of the user data 4634 * has actually been sent down. 4635 */ 4636 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4637 } 4638 } 4639 return (0); 4640 } 4641 4642 /* 4643 * Update so_faddr by asking the transport (unless AF_UNIX). 4644 */ 4645 int 4646 sotpi_getpeername(struct sonode *so) 4647 { 4648 struct strbuf strbuf; 4649 int error = 0, res; 4650 void *addr; 4651 t_uscalar_t addrlen; 4652 k_sigset_t smask; 4653 4654 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4655 (void *)so, pr_state(so->so_state, so->so_mode))); 4656 4657 mutex_enter(&so->so_lock); 4658 so_lock_single(so); /* Set SOLOCKED */ 4659 if (!(so->so_state & SS_ISCONNECTED)) { 4660 error = ENOTCONN; 4661 goto done; 4662 } 4663 /* Added this check for X/Open */ 4664 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4665 error = EINVAL; 4666 if (xnet_check_print) { 4667 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4668 } 4669 goto done; 4670 } 4671 #ifdef DEBUG 4672 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4673 pr_addr(so->so_family, so->so_faddr_sa, 4674 (t_uscalar_t)so->so_faddr_len))); 4675 #endif /* DEBUG */ 4676 4677 if (so->so_family == AF_UNIX) { 4678 /* Transport has different name space - return local info */ 4679 error = 0; 4680 goto done; 4681 } 4682 4683 ASSERT(so->so_faddr_sa); 4684 /* Allocate local buffer to use with ioctl */ 4685 addrlen = (t_uscalar_t)so->so_faddr_maxlen; 4686 mutex_exit(&so->so_lock); 4687 addr = kmem_alloc(addrlen, KM_SLEEP); 4688 4689 /* 4690 * Issue TI_GETPEERNAME with signals masked. 4691 * Put the result in so_faddr_sa so that getpeername works after 4692 * a shutdown(output). 4693 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4694 * back to the socket. 4695 */ 4696 strbuf.buf = addr; 4697 strbuf.maxlen = addrlen; 4698 strbuf.len = 0; 4699 4700 sigintr(&smask, 0); 4701 res = 0; 4702 ASSERT(CRED()); 4703 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4704 0, K_TO_K, CRED(), &res); 4705 sigunintr(&smask); 4706 4707 mutex_enter(&so->so_lock); 4708 /* 4709 * If there is an error record the error in so_error put don't fail 4710 * the getpeername. Instead fallback on the recorded 4711 * so->so_faddr_sa. 4712 */ 4713 if (error) { 4714 /* 4715 * Various stream head errors can be returned to the ioctl. 4716 * However, it is impossible to determine which ones of 4717 * these are really socket level errors that were incorrectly 4718 * consumed by the ioctl. Thus this code silently ignores the 4719 * error - to code explicitly does not reinstate the error 4720 * using soseterror(). 4721 * Experiments have shows that at least this set of 4722 * errors are reported and should not be reinstated on the 4723 * socket: 4724 * EINVAL E.g. if an I_LINK was in effect when 4725 * getpeername was called. 4726 * EPIPE The ioctl error semantics prefer the write 4727 * side error over the read side error. 4728 * ENOTCONN The transport just got disconnected but 4729 * sockfs had not yet seen the T_DISCON_IND 4730 * when issuing the ioctl. 4731 */ 4732 error = 0; 4733 } else if (res == 0 && strbuf.len > 0 && 4734 (so->so_state & SS_ISCONNECTED)) { 4735 ASSERT(strbuf.len <= (int)so->so_faddr_maxlen); 4736 so->so_faddr_len = (socklen_t)strbuf.len; 4737 bcopy(addr, so->so_faddr_sa, so->so_faddr_len); 4738 so->so_state |= SS_FADDR_VALID; 4739 } 4740 kmem_free(addr, addrlen); 4741 #ifdef DEBUG 4742 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4743 pr_addr(so->so_family, so->so_faddr_sa, 4744 (t_uscalar_t)so->so_faddr_len))); 4745 #endif /* DEBUG */ 4746 done: 4747 so_unlock_single(so, SOLOCKED); 4748 mutex_exit(&so->so_lock); 4749 return (error); 4750 } 4751 4752 /* 4753 * Update so_laddr by asking the transport (unless AF_UNIX). 4754 */ 4755 int 4756 sotpi_getsockname(struct sonode *so) 4757 { 4758 struct strbuf strbuf; 4759 int error = 0, res; 4760 void *addr; 4761 t_uscalar_t addrlen; 4762 k_sigset_t smask; 4763 4764 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4765 (void *)so, pr_state(so->so_state, so->so_mode))); 4766 4767 mutex_enter(&so->so_lock); 4768 so_lock_single(so); /* Set SOLOCKED */ 4769 if (!(so->so_state & SS_ISBOUND) && so->so_family != AF_UNIX) { 4770 /* Return an all zero address except for the family */ 4771 if (so->so_family == AF_INET) 4772 so->so_laddr_len = (socklen_t)sizeof (sin_t); 4773 else if (so->so_family == AF_INET6) 4774 so->so_laddr_len = (socklen_t)sizeof (sin6_t); 4775 ASSERT(so->so_laddr_len <= so->so_laddr_maxlen); 4776 bzero(so->so_laddr_sa, so->so_laddr_len); 4777 /* 4778 * Can not assume there is a sa_family for all 4779 * protocol families. 4780 */ 4781 if (so->so_family == AF_INET || so->so_family == AF_INET6) 4782 so->so_laddr_sa->sa_family = so->so_family; 4783 } 4784 #ifdef DEBUG 4785 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4786 pr_addr(so->so_family, so->so_laddr_sa, 4787 (t_uscalar_t)so->so_laddr_len))); 4788 #endif /* DEBUG */ 4789 if (so->so_family == AF_UNIX) { 4790 /* Transport has different name space - return local info */ 4791 error = 0; 4792 goto done; 4793 } 4794 if (!(so->so_state & SS_ISBOUND)) { 4795 /* If not bound, then nothing to return. */ 4796 error = 0; 4797 goto done; 4798 } 4799 /* Allocate local buffer to use with ioctl */ 4800 addrlen = (t_uscalar_t)so->so_laddr_maxlen; 4801 mutex_exit(&so->so_lock); 4802 addr = kmem_alloc(addrlen, KM_SLEEP); 4803 4804 /* 4805 * Issue TI_GETMYNAME with signals masked. 4806 * Put the result in so_laddr_sa so that getsockname works after 4807 * a shutdown(output). 4808 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4809 * back to the socket. 4810 */ 4811 strbuf.buf = addr; 4812 strbuf.maxlen = addrlen; 4813 strbuf.len = 0; 4814 4815 sigintr(&smask, 0); 4816 res = 0; 4817 ASSERT(CRED()); 4818 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4819 0, K_TO_K, CRED(), &res); 4820 sigunintr(&smask); 4821 4822 mutex_enter(&so->so_lock); 4823 /* 4824 * If there is an error record the error in so_error put don't fail 4825 * the getsockname. Instead fallback on the recorded 4826 * so->so_laddr_sa. 4827 */ 4828 if (error) { 4829 /* 4830 * Various stream head errors can be returned to the ioctl. 4831 * However, it is impossible to determine which ones of 4832 * these are really socket level errors that were incorrectly 4833 * consumed by the ioctl. Thus this code silently ignores the 4834 * error - to code explicitly does not reinstate the error 4835 * using soseterror(). 4836 * Experiments have shows that at least this set of 4837 * errors are reported and should not be reinstated on the 4838 * socket: 4839 * EINVAL E.g. if an I_LINK was in effect when 4840 * getsockname was called. 4841 * EPIPE The ioctl error semantics prefer the write 4842 * side error over the read side error. 4843 */ 4844 error = 0; 4845 } else if (res == 0 && strbuf.len > 0 && 4846 (so->so_state & SS_ISBOUND)) { 4847 ASSERT(strbuf.len <= (int)so->so_laddr_maxlen); 4848 so->so_laddr_len = (socklen_t)strbuf.len; 4849 bcopy(addr, so->so_laddr_sa, so->so_laddr_len); 4850 so->so_state |= SS_LADDR_VALID; 4851 } 4852 kmem_free(addr, addrlen); 4853 #ifdef DEBUG 4854 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 4855 pr_addr(so->so_family, so->so_laddr_sa, 4856 (t_uscalar_t)so->so_laddr_len))); 4857 #endif /* DEBUG */ 4858 done: 4859 so_unlock_single(so, SOLOCKED); 4860 mutex_exit(&so->so_lock); 4861 return (error); 4862 } 4863 4864 /* 4865 * Get socket options. For SOL_SOCKET options some options are handled 4866 * by the sockfs while others use the value recorded in the sonode as a 4867 * fallback should the T_SVR4_OPTMGMT_REQ fail. 4868 * 4869 * On the return most *optlenp bytes are copied to optval. 4870 */ 4871 int 4872 sotpi_getsockopt(struct sonode *so, int level, int option_name, 4873 void *optval, socklen_t *optlenp, int flags) 4874 { 4875 struct T_optmgmt_req optmgmt_req; 4876 struct T_optmgmt_ack *optmgmt_ack; 4877 struct opthdr oh; 4878 struct opthdr *opt_res; 4879 mblk_t *mp = NULL; 4880 int error = 0; 4881 void *option = NULL; /* Set if fallback value */ 4882 t_uscalar_t maxlen = *optlenp; 4883 t_uscalar_t len; 4884 uint32_t value; 4885 4886 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 4887 (void *)so, level, option_name, optval, (void *)optlenp, 4888 pr_state(so->so_state, so->so_mode))); 4889 4890 mutex_enter(&so->so_lock); 4891 so_lock_single(so); /* Set SOLOCKED */ 4892 4893 /* 4894 * Check for SOL_SOCKET options. 4895 * Certain SOL_SOCKET options are returned directly whereas 4896 * others only provide a default (fallback) value should 4897 * the T_SVR4_OPTMGMT_REQ fail. 4898 */ 4899 if (level == SOL_SOCKET) { 4900 /* Check parameters */ 4901 switch (option_name) { 4902 case SO_TYPE: 4903 case SO_ERROR: 4904 case SO_DEBUG: 4905 case SO_ACCEPTCONN: 4906 case SO_REUSEADDR: 4907 case SO_KEEPALIVE: 4908 case SO_DONTROUTE: 4909 case SO_BROADCAST: 4910 case SO_USELOOPBACK: 4911 case SO_OOBINLINE: 4912 case SO_SNDBUF: 4913 case SO_RCVBUF: 4914 #ifdef notyet 4915 case SO_SNDLOWAT: 4916 case SO_RCVLOWAT: 4917 case SO_SNDTIMEO: 4918 case SO_RCVTIMEO: 4919 #endif /* notyet */ 4920 case SO_DOMAIN: 4921 case SO_DGRAM_ERRIND: 4922 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 4923 error = EINVAL; 4924 eprintsoline(so, error); 4925 goto done2; 4926 } 4927 break; 4928 case SO_LINGER: 4929 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 4930 error = EINVAL; 4931 eprintsoline(so, error); 4932 goto done2; 4933 } 4934 break; 4935 } 4936 4937 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 4938 4939 switch (option_name) { 4940 case SO_TYPE: 4941 value = so->so_type; 4942 option = &value; 4943 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4944 4945 case SO_ERROR: 4946 value = sogeterr(so); 4947 option = &value; 4948 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4949 4950 case SO_ACCEPTCONN: 4951 if (so->so_state & SS_ACCEPTCONN) 4952 value = SO_ACCEPTCONN; 4953 else 4954 value = 0; 4955 #ifdef DEBUG 4956 if (value) { 4957 dprintso(so, 1, 4958 ("sotpi_getsockopt: 0x%x is set\n", 4959 option_name)); 4960 } else { 4961 dprintso(so, 1, 4962 ("sotpi_getsockopt: 0x%x not set\n", 4963 option_name)); 4964 } 4965 #endif /* DEBUG */ 4966 option = &value; 4967 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4968 4969 case SO_DEBUG: 4970 case SO_REUSEADDR: 4971 case SO_KEEPALIVE: 4972 case SO_DONTROUTE: 4973 case SO_BROADCAST: 4974 case SO_USELOOPBACK: 4975 case SO_OOBINLINE: 4976 case SO_DGRAM_ERRIND: 4977 value = (so->so_options & option_name); 4978 #ifdef DEBUG 4979 if (value) { 4980 dprintso(so, 1, 4981 ("sotpi_getsockopt: 0x%x is set\n", 4982 option_name)); 4983 } else { 4984 dprintso(so, 1, 4985 ("sotpi_getsockopt: 0x%x not set\n", 4986 option_name)); 4987 } 4988 #endif /* DEBUG */ 4989 option = &value; 4990 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 4991 4992 /* 4993 * The following options are only returned by sockfs when the 4994 * T_SVR4_OPTMGMT_REQ fails. 4995 */ 4996 case SO_LINGER: 4997 option = &so->so_linger; 4998 len = (t_uscalar_t)sizeof (struct linger); 4999 break; 5000 case SO_SNDBUF: { 5001 ssize_t lvalue; 5002 5003 /* 5004 * If the option has not been set then get a default 5005 * value from the read queue. This value is 5006 * returned if the transport fails 5007 * the T_SVR4_OPTMGMT_REQ. 5008 */ 5009 lvalue = so->so_sndbuf; 5010 if (lvalue == 0) { 5011 mutex_exit(&so->so_lock); 5012 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5013 QHIWAT, 0, &lvalue); 5014 mutex_enter(&so->so_lock); 5015 dprintso(so, 1, 5016 ("got SO_SNDBUF %ld from q\n", lvalue)); 5017 } 5018 value = (int)lvalue; 5019 option = &value; 5020 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5021 break; 5022 } 5023 case SO_RCVBUF: { 5024 ssize_t lvalue; 5025 5026 /* 5027 * If the option has not been set then get a default 5028 * value from the read queue. This value is 5029 * returned if the transport fails 5030 * the T_SVR4_OPTMGMT_REQ. 5031 * 5032 * XXX If SO_RCVBUF has been set and this is an 5033 * XPG 4.2 application then do not ask the transport 5034 * since the transport might adjust the value and not 5035 * return exactly what was set by the application. 5036 * For non-XPG 4.2 application we return the value 5037 * that the transport is actually using. 5038 */ 5039 lvalue = so->so_rcvbuf; 5040 if (lvalue == 0) { 5041 mutex_exit(&so->so_lock); 5042 (void) strqget(RD(strvp2wq(SOTOV(so))), 5043 QHIWAT, 0, &lvalue); 5044 mutex_enter(&so->so_lock); 5045 dprintso(so, 1, 5046 ("got SO_RCVBUF %ld from q\n", lvalue)); 5047 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5048 value = (int)lvalue; 5049 option = &value; 5050 goto copyout; /* skip asking transport */ 5051 } 5052 value = (int)lvalue; 5053 option = &value; 5054 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5055 break; 5056 } 5057 case SO_DOMAIN: 5058 value = so->so_family; 5059 option = &value; 5060 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5061 5062 #ifdef notyet 5063 /* 5064 * We do not implement the semantics of these options 5065 * thus we shouldn't implement the options either. 5066 */ 5067 case SO_SNDLOWAT: 5068 value = so->so_sndlowat; 5069 option = &value; 5070 break; 5071 case SO_RCVLOWAT: 5072 value = so->so_rcvlowat; 5073 option = &value; 5074 break; 5075 case SO_SNDTIMEO: 5076 value = so->so_sndtimeo; 5077 option = &value; 5078 break; 5079 case SO_RCVTIMEO: 5080 value = so->so_rcvtimeo; 5081 option = &value; 5082 break; 5083 #endif /* notyet */ 5084 } 5085 } 5086 5087 mutex_exit(&so->so_lock); 5088 5089 /* Send request */ 5090 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5091 optmgmt_req.MGMT_flags = T_CHECK; 5092 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5093 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5094 5095 oh.level = level; 5096 oh.name = option_name; 5097 oh.len = maxlen; 5098 5099 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5100 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP); 5101 /* Let option management work in the presence of data flow control */ 5102 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5103 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5104 mp = NULL; 5105 mutex_enter(&so->so_lock); 5106 if (error) { 5107 eprintsoline(so, error); 5108 goto done2; 5109 } 5110 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5111 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5112 if (error) { 5113 if (option != NULL) { 5114 /* We have a fallback value */ 5115 error = 0; 5116 goto copyout; 5117 } 5118 eprintsoline(so, error); 5119 goto done2; 5120 } 5121 ASSERT(mp); 5122 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5123 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5124 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5125 if (opt_res == NULL) { 5126 if (option != NULL) { 5127 /* We have a fallback value */ 5128 error = 0; 5129 goto copyout; 5130 } 5131 error = EPROTO; 5132 eprintsoline(so, error); 5133 goto done; 5134 } 5135 option = &opt_res[1]; 5136 5137 /* check to ensure that the option is within bounds */ 5138 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5139 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5140 if (option != NULL) { 5141 /* We have a fallback value */ 5142 error = 0; 5143 goto copyout; 5144 } 5145 error = EPROTO; 5146 eprintsoline(so, error); 5147 goto done; 5148 } 5149 5150 len = opt_res->len; 5151 5152 copyout: { 5153 t_uscalar_t size = MIN(len, maxlen); 5154 bcopy(option, optval, size); 5155 bcopy(&size, optlenp, sizeof (size)); 5156 } 5157 done: 5158 freemsg(mp); 5159 done2: 5160 so_unlock_single(so, SOLOCKED); 5161 mutex_exit(&so->so_lock); 5162 return (error); 5163 } 5164 5165 /* 5166 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5167 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5168 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5169 * setsockopt has to work even if the transport does not support the option. 5170 */ 5171 int 5172 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5173 const void *optval, t_uscalar_t optlen) 5174 { 5175 struct T_optmgmt_req optmgmt_req; 5176 struct opthdr oh; 5177 mblk_t *mp; 5178 int error = 0; 5179 boolean_t handled = B_FALSE; 5180 5181 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5182 (void *)so, level, option_name, optval, optlen, 5183 pr_state(so->so_state, so->so_mode))); 5184 5185 5186 /* X/Open requires this check */ 5187 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5188 if (xnet_check_print) 5189 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5190 return (EINVAL); 5191 } 5192 5193 /* Caller allocates aligned optval, or passes null */ 5194 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 5195 /* If optval is null optlen is 0, and vice-versa */ 5196 ASSERT(optval != NULL || optlen == 0); 5197 ASSERT(optlen != 0 || optval == NULL); 5198 5199 mutex_enter(&so->so_lock); 5200 so_lock_single(so); /* Set SOLOCKED */ 5201 mutex_exit(&so->so_lock); 5202 5203 /* 5204 * For SOCKET or TCP level options, try to set it here itself 5205 * provided socket has not been popped and we know the tcp 5206 * structure (stored in so_priv). 5207 */ 5208 if ((level == SOL_SOCKET || level == IPPROTO_TCP) && 5209 (so->so_family == AF_INET || so->so_family == AF_INET6) && 5210 (so->so_version == SOV_SOCKSTREAM) && (so->so_priv != NULL)) { 5211 tcp_t *tcp = so->so_priv; 5212 boolean_t onoff; 5213 5214 #define intvalue (*(int32_t *)optval) 5215 5216 switch (level) { 5217 case SOL_SOCKET: 5218 switch (option_name) { /* Check length param */ 5219 case SO_DEBUG: 5220 case SO_REUSEADDR: 5221 case SO_DONTROUTE: 5222 case SO_BROADCAST: 5223 case SO_USELOOPBACK: 5224 case SO_OOBINLINE: 5225 case SO_DGRAM_ERRIND: 5226 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5227 error = EINVAL; 5228 eprintsoline(so, error); 5229 mutex_enter(&so->so_lock); 5230 goto done2; 5231 } 5232 ASSERT(optval); 5233 onoff = intvalue != 0; 5234 handled = B_TRUE; 5235 break; 5236 case SO_LINGER: 5237 if (optlen != 5238 (t_uscalar_t)sizeof (struct linger)) { 5239 error = EINVAL; 5240 eprintsoline(so, error); 5241 mutex_enter(&so->so_lock); 5242 goto done2; 5243 } 5244 ASSERT(optval); 5245 handled = B_TRUE; 5246 break; 5247 } 5248 5249 switch (option_name) { /* Do actions */ 5250 case SO_LINGER: { 5251 struct linger *lgr = (struct linger *)optval; 5252 5253 if (lgr->l_onoff) { 5254 tcp->tcp_linger = 1; 5255 tcp->tcp_lingertime = lgr->l_linger; 5256 so->so_linger.l_onoff = SO_LINGER; 5257 so->so_options |= SO_LINGER; 5258 } else { 5259 tcp->tcp_linger = 0; 5260 tcp->tcp_lingertime = 0; 5261 so->so_linger.l_onoff = 0; 5262 so->so_options &= ~SO_LINGER; 5263 } 5264 so->so_linger.l_linger = lgr->l_linger; 5265 handled = B_TRUE; 5266 break; 5267 } 5268 case SO_DEBUG: 5269 tcp->tcp_debug = onoff; 5270 #ifdef SOCK_TEST 5271 if (intvalue & 2) 5272 sock_test_timelimit = 10 * hz; 5273 else 5274 sock_test_timelimit = 0; 5275 5276 if (intvalue & 4) 5277 do_useracc = 0; 5278 else 5279 do_useracc = 1; 5280 #endif /* SOCK_TEST */ 5281 break; 5282 case SO_DONTROUTE: 5283 /* 5284 * SO_DONTROUTE, SO_USELOOPBACK and 5285 * SO_BROADCAST are only of interest to IP. 5286 * We track them here only so 5287 * that we can report their current value. 5288 */ 5289 tcp->tcp_dontroute = onoff; 5290 if (onoff) 5291 so->so_options |= option_name; 5292 else 5293 so->so_options &= ~option_name; 5294 break; 5295 case SO_USELOOPBACK: 5296 tcp->tcp_useloopback = onoff; 5297 if (onoff) 5298 so->so_options |= option_name; 5299 else 5300 so->so_options &= ~option_name; 5301 break; 5302 case SO_BROADCAST: 5303 tcp->tcp_broadcast = onoff; 5304 if (onoff) 5305 so->so_options |= option_name; 5306 else 5307 so->so_options &= ~option_name; 5308 break; 5309 case SO_REUSEADDR: 5310 tcp->tcp_reuseaddr = onoff; 5311 if (onoff) 5312 so->so_options |= option_name; 5313 else 5314 so->so_options &= ~option_name; 5315 break; 5316 case SO_OOBINLINE: 5317 tcp->tcp_oobinline = onoff; 5318 if (onoff) 5319 so->so_options |= option_name; 5320 else 5321 so->so_options &= ~option_name; 5322 break; 5323 case SO_DGRAM_ERRIND: 5324 tcp->tcp_dgram_errind = onoff; 5325 if (onoff) 5326 so->so_options |= option_name; 5327 else 5328 so->so_options &= ~option_name; 5329 break; 5330 } 5331 break; 5332 case IPPROTO_TCP: 5333 switch (option_name) { 5334 case TCP_NODELAY: 5335 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5336 error = EINVAL; 5337 eprintsoline(so, error); 5338 mutex_enter(&so->so_lock); 5339 goto done2; 5340 } 5341 ASSERT(optval); 5342 tcp->tcp_naglim = intvalue ? 1 : tcp->tcp_mss; 5343 handled = B_TRUE; 5344 break; 5345 } 5346 break; 5347 default: 5348 handled = B_FALSE; 5349 break; 5350 } 5351 } 5352 5353 if (handled) { 5354 mutex_enter(&so->so_lock); 5355 goto done2; 5356 } 5357 5358 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5359 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5360 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5361 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5362 5363 oh.level = level; 5364 oh.name = option_name; 5365 oh.len = optlen; 5366 5367 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5368 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP); 5369 /* Let option management work in the presence of data flow control */ 5370 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5371 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5372 mp = NULL; 5373 mutex_enter(&so->so_lock); 5374 if (error) { 5375 eprintsoline(so, error); 5376 goto done; 5377 } 5378 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5379 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5380 if (error) { 5381 eprintsoline(so, error); 5382 goto done; 5383 } 5384 ASSERT(mp); 5385 /* No need to verify T_optmgmt_ack */ 5386 freemsg(mp); 5387 done: 5388 /* 5389 * Check for SOL_SOCKET options and record their values. 5390 * If we know about a SOL_SOCKET parameter and the transport 5391 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5392 * EPROTO) we let the setsockopt succeed. 5393 */ 5394 if (level == SOL_SOCKET) { 5395 /* Check parameters */ 5396 switch (option_name) { 5397 case SO_DEBUG: 5398 case SO_REUSEADDR: 5399 case SO_KEEPALIVE: 5400 case SO_DONTROUTE: 5401 case SO_BROADCAST: 5402 case SO_USELOOPBACK: 5403 case SO_OOBINLINE: 5404 case SO_SNDBUF: 5405 case SO_RCVBUF: 5406 #ifdef notyet 5407 case SO_SNDLOWAT: 5408 case SO_RCVLOWAT: 5409 case SO_SNDTIMEO: 5410 case SO_RCVTIMEO: 5411 #endif /* notyet */ 5412 case SO_DGRAM_ERRIND: 5413 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5414 error = EINVAL; 5415 eprintsoline(so, error); 5416 goto done2; 5417 } 5418 ASSERT(optval); 5419 handled = B_TRUE; 5420 break; 5421 case SO_LINGER: 5422 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5423 error = EINVAL; 5424 eprintsoline(so, error); 5425 goto done2; 5426 } 5427 ASSERT(optval); 5428 handled = B_TRUE; 5429 break; 5430 } 5431 5432 #define intvalue (*(int32_t *)optval) 5433 5434 switch (option_name) { 5435 case SO_TYPE: 5436 case SO_ERROR: 5437 case SO_ACCEPTCONN: 5438 /* Can't be set */ 5439 error = ENOPROTOOPT; 5440 goto done2; 5441 case SO_LINGER: { 5442 struct linger *l = (struct linger *)optval; 5443 5444 so->so_linger.l_linger = l->l_linger; 5445 if (l->l_onoff) { 5446 so->so_linger.l_onoff = SO_LINGER; 5447 so->so_options |= SO_LINGER; 5448 } else { 5449 so->so_linger.l_onoff = 0; 5450 so->so_options &= ~SO_LINGER; 5451 } 5452 break; 5453 } 5454 5455 case SO_DEBUG: 5456 #ifdef SOCK_TEST 5457 if (intvalue & 2) 5458 sock_test_timelimit = 10 * hz; 5459 else 5460 sock_test_timelimit = 0; 5461 5462 if (intvalue & 4) 5463 do_useracc = 0; 5464 else 5465 do_useracc = 1; 5466 #endif /* SOCK_TEST */ 5467 /* FALLTHRU */ 5468 case SO_REUSEADDR: 5469 case SO_KEEPALIVE: 5470 case SO_DONTROUTE: 5471 case SO_BROADCAST: 5472 case SO_USELOOPBACK: 5473 case SO_OOBINLINE: 5474 case SO_DGRAM_ERRIND: 5475 if (intvalue != 0) { 5476 dprintso(so, 1, 5477 ("sotpi_setsockopt: setting 0x%x\n", 5478 option_name)); 5479 so->so_options |= option_name; 5480 } else { 5481 dprintso(so, 1, 5482 ("sotpi_setsockopt: clearing 0x%x\n", 5483 option_name)); 5484 so->so_options &= ~option_name; 5485 } 5486 break; 5487 /* 5488 * The following options are only returned by us when the 5489 * T_SVR4_OPTMGMT_REQ fails. 5490 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5491 * since the transport might adjust the value and not 5492 * return exactly what was set by the application. 5493 */ 5494 case SO_SNDBUF: 5495 so->so_sndbuf = intvalue; 5496 break; 5497 case SO_RCVBUF: 5498 so->so_rcvbuf = intvalue; 5499 break; 5500 #ifdef notyet 5501 /* 5502 * We do not implement the semantics of these options 5503 * thus we shouldn't implement the options either. 5504 */ 5505 case SO_SNDLOWAT: 5506 so->so_sndlowat = intvalue; 5507 break; 5508 case SO_RCVLOWAT: 5509 so->so_rcvlowat = intvalue; 5510 break; 5511 case SO_SNDTIMEO: 5512 so->so_sndtimeo = intvalue; 5513 break; 5514 case SO_RCVTIMEO: 5515 so->so_rcvtimeo = intvalue; 5516 break; 5517 #endif /* notyet */ 5518 } 5519 #undef intvalue 5520 5521 if (error) { 5522 if ((error == ENOPROTOOPT || error == EPROTO || 5523 error == EINVAL) && handled) { 5524 dprintso(so, 1, 5525 ("setsockopt: ignoring error %d for 0x%x\n", 5526 error, option_name)); 5527 error = 0; 5528 } 5529 } 5530 } 5531 done2: 5532 ret: 5533 so_unlock_single(so, SOLOCKED); 5534 mutex_exit(&so->so_lock); 5535 return (error); 5536 } 5537